Skip to content

Commit a92a6a5

Browse files
author
Kye
committed
[FEAT][Flow.run() img = None for conditional img inputs, BaseMultiModalModel, and multi modal swarms of manufacturing agents
1 parent f895497 commit a92a6a5

File tree

10 files changed

+189
-28
lines changed

10 files changed

+189
-28
lines changed

example.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,21 @@
1+
import os
2+
3+
from dotenv import load_dotenv
4+
5+
# Import the OpenAIChat model and the Flow struct
16
from swarms.models import OpenAIChat
27
from swarms.structs import Flow
38

9+
# Load the environment variables
10+
load_dotenv()
11+
12+
# Get the API key from the environment
13+
api_key = os.environ.get("OPENAI_API_KEY")
14+
415
# Initialize the language model
516
llm = OpenAIChat(
617
temperature=0.5,
18+
openai_api_key=api_key,
719
)
820

921

multi_modal_auto_agent.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,8 @@
11
from swarms.structs import Flow
22
from swarms.models.gpt4_vision_api import GPT4VisionAPI
3+
from swarms.prompts.multi_modal_autonomous_instruction_prompt import (
4+
MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
5+
)
36

47

58
llm = GPT4VisionAPI()
@@ -10,6 +13,7 @@
1013
## Initialize the workflow
1114
flow = Flow(
1215
llm=llm,
16+
sop=MULTI_MODAL_AUTO_AGENT_SYSTEM_PROMPT_1,
1317
max_loops="auto",
1418
)
1519

playground/demos/idea_2_img/main.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
"""
2+
Idea 2 img
3+
4+
task -> gpt4 text -> dalle3 img -> gpt4vision img + text analyze img -> dalle3 img -> loop
5+
6+
"""
7+
from swarms.models.gpt4_vision_api import GPT4VisionAPI
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
"""
2+
Swarm of multi modal autonomous agents for manufacturing!
3+
---------------------------------------------------------
4+
Health Security agent: Agent that monitors the health of working conditions: input image of factory output: health safety index 0.0 - 1.0 being the highest
5+
Quality Control agent: Agent that monitors the quality of the product: input image of product output: quality index 0.0 - 1.0 being the highest
6+
Productivity agent: Agent that monitors the productivity of the factory: input image of factory output: productivity index 0.0 - 1.0 being the highest
7+
Safety agent: Agent that monitors the safety of the factory: input image of factory output: safety index 0.0 - 1.0 being the highest
8+
Security agent: Agent that monitors the security of the factory: input image of factory output: security index 0.0 - 1.0 being the highest
9+
Sustainability agent: Agent that monitors the sustainability of the factory: input image of factory output: sustainability index 0.0 - 1.0 being the highest
10+
Efficiency agent: Agent that monitors the efficiency of the factory: input image of factory output: efficiency index 0.0 - 1.0 being the highest
11+
12+
13+
Flow:
14+
health security agent -> quality control agent -> productivity agent -> safety agent -> security agent -> sustainability agent -> efficiency agent
15+
"""

swarms/models/base_multimodal_model.py

Lines changed: 27 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from abc import abstractmethod
12
import asyncio
23
import base64
34
import concurrent.futures
@@ -7,8 +8,8 @@
78
from typing import List, Optional, Tuple
89

910
import requests
10-
from ABC import abstractmethod
1111
from PIL import Image
12+
from termcolor import colored
1213

1314

1415
class BaseMultiModalModel:
@@ -37,7 +38,6 @@ def __init__(
3738
self.retries = retries
3839
self.chat_history = []
3940

40-
4141
@abstractmethod
4242
def __call__(self, text: str, img: str):
4343
"""Run the model"""
@@ -61,17 +61,17 @@ def get_img_from_web(self, img: str):
6161
except requests.RequestException as error:
6262
print(f"Error fetching image from {img} and error: {error}")
6363
return None
64-
64+
6565
def encode_img(self, img: str):
6666
"""Encode the image to base64"""
6767
with open(img, "rb") as image_file:
6868
return base64.b64encode(image_file.read()).decode("utf-8")
69-
69+
7070
def get_img(self, img: str):
7171
"""Get the image from the path"""
7272
image_pil = Image.open(img)
7373
return image_pil
74-
74+
7575
def clear_chat_history(self):
7676
"""Clear the chat history"""
7777
self.chat_history = []
@@ -87,11 +87,11 @@ def run_many(
8787
Args:
8888
tasks (List[str]): List of tasks
8989
imgs (List[str]): List of image paths
90-
90+
9191
Returns:
9292
List[str]: List of responses
93-
94-
93+
94+
9595
"""
9696
# Instantiate the thread pool executor
9797
with ThreadPoolExecutor(max_workers=self.max_workers) as executor:
@@ -101,7 +101,6 @@ def run_many(
101101
for result in results:
102102
print(result)
103103

104-
105104
def run_batch(self, tasks_images: List[Tuple[str, str]]) -> List[str]:
106105
"""Process a batch of tasks and images"""
107106
with concurrent.futures.ThreadPoolExecutor() as executor:
@@ -133,11 +132,11 @@ async def run_batch_async_with_retries(
133132
for task, img in tasks_images
134133
]
135134
return await asyncio.gather(*futures)
136-
135+
137136
def unique_chat_history(self):
138137
"""Get the unique chat history"""
139138
return list(set(self.chat_history))
140-
139+
141140
def run_with_retries(self, task: str, img: str):
142141
"""Run the model with retries"""
143142
for i in range(self.retries):
@@ -146,7 +145,7 @@ def run_with_retries(self, task: str, img: str):
146145
except Exception as error:
147146
print(f"Error with the request {error}")
148147
continue
149-
148+
150149
def run_batch_with_retries(self, tasks_images: List[Tuple[str, str]]):
151150
"""Run the model with retries"""
152151
for i in range(self.retries):
@@ -188,28 +187,37 @@ def get_generation_time(self) -> float:
188187
if self.start_time and self.end_time:
189188
return self.end_time - self.start_time
190189
return 0
191-
190+
192191
def get_chat_history(self):
193192
"""Get the chat history"""
194193
return self.chat_history
195-
194+
196195
def get_unique_chat_history(self):
197196
"""Get the unique chat history"""
198197
return list(set(self.chat_history))
199-
198+
200199
def get_chat_history_length(self):
201200
"""Get the chat history length"""
202201
return len(self.chat_history)
203-
202+
204203
def get_unique_chat_history_length(self):
205204
"""Get the unique chat history length"""
206205
return len(list(set(self.chat_history)))
207-
206+
208207
def get_chat_history_tokens(self):
209208
"""Get the chat history tokens"""
210209
return self._num_tokens()
211-
210+
212211
def print_beautiful(self, content: str, color: str = "cyan"):
213212
"""Print Beautifully with termcolor"""
214213
content = colored(content, color)
215-
print(content)
214+
print(content)
215+
216+
def stream(self, content: str):
217+
"""Stream the output
218+
219+
Args:
220+
content (str): _description_
221+
"""
222+
for chunk in content:
223+
print(chunk)

swarms/models/gpt4_vision_api.py

Lines changed: 106 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
1-
import logging
1+
import logging
22
import asyncio
33
import base64
4+
from typing import Optional
45
import concurrent.futures
56
from termcolor import colored
67
import json
@@ -12,6 +13,13 @@
1213
import requests
1314
from dotenv import load_dotenv
1415

16+
17+
try:
18+
import cv2
19+
except ImportError:
20+
print("OpenCV not installed. Please install OpenCV to use this model.")
21+
raise ImportError
22+
1523
# Load environment variables
1624
load_dotenv()
1725
openai_api_key = os.getenv("OPENAI_API_KEY")
@@ -59,7 +67,8 @@ def __init__(
5967
max_workers: int = 10,
6068
max_tokens: str = 300,
6169
openai_proxy: str = "https://api.openai.com/v1/chat/completions",
62-
beautify: bool = False
70+
beautify: bool = False,
71+
streaming_enabled: Optional[bool] = False,
6372
):
6473
super().__init__()
6574
self.openai_api_key = openai_api_key
@@ -69,6 +78,7 @@ def __init__(
6978
self.max_tokens = max_tokens
7079
self.openai_proxy = openai_proxy
7180
self.beautify = beautify
81+
self.streaming_enabled = streaming_enabled
7282

7383
if self.logging_enabled:
7484
logging.basicConfig(level=logging.DEBUG)
@@ -123,14 +133,101 @@ def run(self, task: str, img: str):
123133
out = response.json()
124134
content = out["choices"][0]["message"]["content"]
125135

136+
if self.streaming_enabled:
137+
content = self.stream_response(content)
138+
else:
139+
pass
140+
126141
if self.beautify:
127142
content = colored(content, "cyan")
143+
print(content)
128144
else:
129145
print(content)
146+
130147
except Exception as error:
131148
print(f"Error with the request: {error}")
132149
raise error
133150

151+
def video_prompt(self, frames):
152+
"""
153+
SystemPrompt is a class that generates a prompt for the user to respond to.
154+
The prompt is generated based on the current state of the system.
155+
156+
Parameters
157+
----------
158+
frames : list
159+
A list of base64 frames
160+
161+
Returns
162+
-------
163+
PROMPT : str
164+
The system prompt
165+
166+
Examples
167+
--------
168+
169+
>>> from swarms.models import GPT4VisionAPI
170+
>>> llm = GPT4VisionAPI()
171+
>>> video = "video.mp4"
172+
>>> base64_frames = llm.process_video(video)
173+
>>> prompt = llm.video_prompt(base64_frames)
174+
>>> print(prompt)
175+
176+
"""
177+
PROMPT = f"""
178+
These are frames from a video that I want to upload. Generate a compelling description that I can upload along with the video:
179+
180+
{frames}
181+
"""
182+
return PROMPT
183+
184+
def stream_response(self, content: str):
185+
"""Stream the response of the output
186+
187+
Args:
188+
content (str): _description_
189+
"""
190+
for chunk in content:
191+
print(chunk)
192+
193+
def process_video(self, video: str):
194+
"""
195+
Process a video into a list of base64 frames
196+
197+
Parameters
198+
----------
199+
video : str
200+
The path to the video file
201+
202+
Returns
203+
-------
204+
base64_frames : list
205+
A list of base64 frames
206+
207+
Examples
208+
--------
209+
>>> from swarms.models import GPT4VisionAPI
210+
>>> llm = GPT4VisionAPI()
211+
>>> video = "video.mp4"
212+
>>> base64_frames = llm.process_video(video)
213+
214+
"""
215+
video = cv2.VideoCapture(video)
216+
217+
base64_frames = []
218+
while video.isOpened():
219+
success, frame = video.read()
220+
if not success:
221+
break
222+
_, buffer = cv2.imencode(".jpg", frame)
223+
base64_frames.append(base64.b64encode(buffer).decode("utf-8"))
224+
225+
video.release()
226+
print(len(base64_frames), "frames read.")
227+
228+
for img in base64_frames:
229+
base64.b64decode(img.encode("utf-8"))
230+
134231
def __call__(self, task: str, img: str):
135232
"""Run the model."""
136233
try:
@@ -168,10 +265,17 @@ def __call__(self, task: str, img: str):
168265
out = response.json()
169266
content = out["choices"][0]["message"]["content"]
170267

268+
if self.streaming_enabled:
269+
content = self.stream_response(content)
270+
else:
271+
pass
272+
171273
if self.beautify:
172274
content = colored(content, "cyan")
275+
print(content)
173276
else:
174277
print(content)
278+
175279
except Exception as error:
176280
print(f"Error with the request: {error}")
177281
raise error

swarms/models/kosmos_two.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class Kosmos:
2424
----------
2525
model_name : str
2626
Path to the pretrained model
27-
27+
2828
Examples
2929
--------
3030
>>> kosmos = Kosmos()

swarms/models/whisperx_model.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,7 +99,9 @@ def transcribe_youtube_video(self):
9999
print("The key 'segments' is not found in the result.")
100100

101101
def transcribe(self, audio_file):
102-
model = whisperx_model.load_model("large-v2", self.device, self.compute_type)
102+
model = whisperx_model.load_model(
103+
"large-v2", self.device, self.compute_type
104+
)
103105
audio = whisperx_model.load_audio(audio_file)
104106
result = model.transcribe(audio, batch_size=self.batch_size)
105107

0 commit comments

Comments
 (0)