deepseek-ai · AmmarkoV · Jan 7, 2025 · Jan 7, 2025 · Jan 7, 2025 · Jan 7, 2025
diff --git a/client.py b/client.py
@@ -0,0 +1,148 @@
+import json
+import time
+import sys
+import os
+from gradio_client import Client, handle_file
+
+# Replace with the actual server URL if different
+ip = "127.0.0.1"
+port = "8080"
+
+# Define the user prompt (caption)
+user_prompt = "Thoroughly and carefully describe this image."
+
+files = []
+output_file = "output.json"
+
+# Hyperparameters
+temperature = 0.6
+top_k = 50
+top_p = 0.9
+max_tokens = 100
+
+startAt = 0
+
+argumentStart = 1
+if len(sys.argv) > 1:
+    for i in range(0, len(sys.argv)):
+        if sys.argv[i] == "--ip":
+            ip = sys.argv[i + 1]
+            argumentStart += 2
+        if sys.argv[i] == "--directory":
+            directory = sys.argv[i + 1]
+            argumentStart += 2
+            # Populate files with image (.jpg, .png) contents of directory
+            if os.path.isdir(directory):
+                directoryList = os.listdir(directory)
+                directoryList.sort()
+                for file in directoryList:
+                    if file.lower().endswith(('.jpg', '.png', '.jpeg', '.txt')):
+                        files.append(os.path.join(directory, file))
+            else:
+                print(f"Error: Directory '{directory}' does not exist.")
+                sys.exit(1)
+        elif sys.argv[i] == "--start":
+            startAt = int(sys.argv[i + 1])
+            argumentStart += 2
+        elif sys.argv[i] == "--port":
+            port = sys.argv[i + 1]
+            argumentStart += 2
+        elif sys.argv[i] == "--prompt":
+            user_prompt = sys.argv[i + 1]
+            argumentStart += 2
+        elif sys.argv[i] == "--temperature":
+            temperature = float(sys.argv[i + 1])
+            argumentStart += 2
+        elif sys.argv[i] == "--top_k":
+            top_k = int(sys.argv[i + 1])
+            argumentStart += 2
+        elif sys.argv[i] == "--top_p":
+            top_p = float(sys.argv[i + 1])
+            argumentStart += 2
+        elif sys.argv[i] == "--max_tokens":
+            max_tokens = int(sys.argv[i + 1])
+            argumentStart += 2
+        elif sys.argv[i] in ("--output", "-o"):
+            output_file = sys.argv[i + 1]
+            argumentStart += 2
+
+# Initialize the Gradio client with the server URL
+client = Client(f"http://{ip}:{port}")
+
+results = {"prompt": user_prompt}
+
+for i in range(argumentStart, len(sys.argv)):
+    files.append(sys.argv[i])
+
+# Make sure the list is sorted
+files.sort()
+
+# Possibly start at a specific index
+for i in range(startAt, len(files)):
+    # Grab the next image path
+    image_path = files[i]
+
+    # Count start time
+    start = time.time()
+
+    # Make query to VLLM
+    try:
+        imageFile = None
+        this_user_prompt = user_prompt
+        if image_path.endswith('.txt'):
+            with open(image_path, 'r') as txt_file:
+                this_user_prompt = txt_file.read().strip()
+        else:
+            imageFile = handle_file(image_path)
+
+        # Reset state 
+        result = client.predict(api_name="/reset_state" )
+
+        # Send the image file path and the prompt to the Gradio app for processing
+        result = client.predict(
+            input_images=[imageFile],           # Provide the file path directly
+            input_text=this_user_prompt,     # Adapted prompt parameter
+            api_name="/transfer_input"
+        )
+
+        result = client.predict(
+		    chatbot=[],
+            temperature=temperature,
+            #top_k=top_k,
+            top_p=top_p,
+            max_length_tokens=max_tokens, # Adapted max_tokens parameter
+		    repetition_penalty=1.1,
+		    max_context_length_tokens=4096,
+		    #model_select_dropdown="deepseek-ai/deepseek-vl2-tiny",
+            api_name="/predict"
+        )
+
+
+
+    except Exception as e:
+        print(f"Failed to complete job at index {i}: {e}")
+        output_file = f"partial_until_{i}_{output_file}"
+        break
+
+    # Calculate elapsed time
+    seconds = time.time() - start
+    remaining = (len(files) - i) * seconds
+    hz = 1 / (seconds + 0.0001)
+
+    # Output the result
+    #print("result[0][0][1] ",result[0][0][1])
+    question = this_user_prompt #Don't try to recover it from the list..
+    response = result[0][0][1]
+
+    # Print on screen
+    print(f"Processing {1 + i}/{len(files)} | {hz:.2f} Hz / remaining {remaining / 60:.2f} minutes")
+    print(f"Image: {image_path}\nResponse: {response}")
+
+    # Store each path as the key pointing to each description
+    results[image_path] = response
+
+# Save results to JSON
+print(f"\n\n\nStoring results in JSON file {output_file}")
+with open(output_file, "w") as outfile:
+    json.dump(results, outfile, indent=4)
+
diff --git a/scripts/linux_setup.sh b/scripts/linux_setup.sh
@@ -0,0 +1,68 @@
+#!/bin/bash 
+
+DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
+cd "$DIR"
+cd ..
+
+
+if [ -d venv/ ]
+then
+echo "Found a virtual environment" 
+source venv/bin/activate
+else 
+echo "Creating a virtual environment"
+#Simple dependency checker that will apt-get stuff if something is missing
+# sudo apt-get install python3-venv python3-pip
+SYSTEM_DEPENDENCIES="python3-venv python3-pip zip libhdf5-dev"
+
+for REQUIRED_PKG in $SYSTEM_DEPENDENCIES
+do
+PKG_OK=$(dpkg-query -W --showformat='${Status}\n' $REQUIRED_PKG|grep "install ok installed")
+echo "Checking for $REQUIRED_PKG: $PKG_OK"
+if [ "" = "$PKG_OK" ]; then
+
+  echo "No $REQUIRED_PKG. Setting up $REQUIRED_PKG."
+
+  #If this is uncommented then only packages that are missing will get prompted..
+  #sudo apt-get --yes install $REQUIRED_PKG
+
+  #if this is uncommented then if one package is missing then all missing packages are immediately installed..
+  sudo apt-get install $SYSTEM_DEPENDENCIES  
+  break
+fi
+done
+#------------------------------------------------------------------------------
+python3 -m venv venv
+source venv/bin/activate
+fi 
+
+
+#git clone https://github.com/deepseek-ai/DeepSeek-VL2
+#cd DeepSeek-VL2
+#python3 -m venv venv
+#source venv/bin/activate
+
+
+#Make sure pip is up to date
+python3 -m pip install --upgrade pip
+
+python3 -m pip install -e .
+python3 -m pip install -e .[gradio]
+
+python3 -m pip install joblib wheel
+python3 -m pip install flash-attn --no-build-isolation
+python3 -m pip install xformers
+python3 -m pip install --upgrade gradio
+
+#You can now run using :
+#CUDA_VISIBLE_DEVICES=2 python3 web_demo.py --model_name "deepseek-ai/deepseek-vl2-tiny"  --port 8080
+
+echo "From now on you can run the web demo using: "
+DEMO_DIR=`pwd`
+echo "cd $DEMO_DIR"
+echo "source venv/bin/activate"
+echo "python3 web_demo.py --model_name \"deepseek-ai/deepseek-vl2-tiny\" --port 8080"
+
+
+exit 0
+
diff --git a/web_demo.py b/web_demo.py
@@ -118,8 +118,8 @@
     [
         [
             "images/multi_image_1.jpeg",
-            "images/mi_2.jpeg",
-            "images/mi_3.jpeg"
+            "images/multi_image_2.jpeg",
+            "images/multi_image_3.jpeg"
         ],
         "能帮我用这几个食材做一道菜吗?",
     ]
@@ -663,7 +663,8 @@ def format_examples(examples_list):
     demo.title = "DeepSeek-VL2 Chatbot"
 
     reload_javascript()
-    demo.queue(concurrency_count=CONCURRENT_COUNT, max_size=MAX_EVENTS).launch(
+    demo.queue(#concurrency_count=CONCURRENT_COUNT, #<- for some reason this emmits an error!
+        max_size=MAX_EVENTS).launch(
         # share=False,
         share=True,
         favicon_path="deepseek_vl2/serve/assets/favicon.ico",