Skip to content

Commit

Permalink
Merge branch 'main' of github.com:landing-ai/vision-agent into feat/f…
Browse files Browse the repository at this point in the history
…ine-tune-predict
  • Loading branch information
Dayof committed Aug 21, 2024
2 parents 21d5ee8 + 31af305 commit d6d4b78
Show file tree
Hide file tree
Showing 11 changed files with 608 additions and 425 deletions.
2 changes: 1 addition & 1 deletion examples/custom_tools/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ call out which tool you want to use. For example:
```python
import vision_agent as va

agent = va.agent.VisionAgent(verbosity=2)
agent = va.agent.VisionAgentCoder(verbosity=2)
agent(
"Can you use the 'template_match_' tool to find the location of pid_template.png in pid.png?",
media="pid.png",
Expand Down
2 changes: 2 additions & 0 deletions examples/custom_tools/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
torch
torchvision
2 changes: 1 addition & 1 deletion examples/custom_tools/run_custom_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ def template_match(target_image: np.ndarray, template_image: np.ndarray) -> dict


if __name__ == "__main__":
agent = va.agent.VisionAgent(verbosity=2)
agent = va.agent.VisionAgentCoder(verbosity=2)
result = agent.chat_with_workflow(
[
{
Expand Down
705 changes: 352 additions & 353 deletions poetry.lock

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api"

[tool.poetry]
name = "vision-agent"
version = "0.2.103"
version = "0.2.109"
description = "Toolset for Vision Agent"
authors = ["Landing AI <[email protected]>"]
readme = "README.md"
Expand Down Expand Up @@ -35,8 +35,8 @@ nbformat = "^5.10.4"
rich = "^13.7.1"
langsmith = "^0.1.58"
ipykernel = "^6.29.4"
e2b = "^0.17.1"
e2b-code-interpreter = "0.0.11a27"
e2b = "^0.17.2a50"
e2b-code-interpreter = "0.0.11a37"
tenacity = "^8.3.0"
pillow-heif = "^0.16.0"
pytube = "15.0.0"
Expand Down
16 changes: 13 additions & 3 deletions tests/unit/fixtures.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,21 @@ def langsmith_wrap_oepnai_mock(request, openai_llm_mock):
@pytest.fixture
def openai_lmm_mock(request):
content = request.param

def mock_generate(*args, **kwargs):
if kwargs.get("stream", False):

def generator():
for chunk in content.split(" ") + [None]:
yield MagicMock(choices=[MagicMock(delta=MagicMock(content=chunk))])

return generator()
else:
return MagicMock(choices=[MagicMock(message=MagicMock(content=content))])

# Note the path here is adjusted to where OpenAI is used, not where it's defined
with patch("vision_agent.lmm.lmm.OpenAI") as mock:
# Setup a mock response structure that matches what your code expects
mock_instance = mock.return_value
mock_instance.chat.completions.create.return_value = MagicMock(
choices=[MagicMock(message=MagicMock(content=content))]
)
mock_instance.chat.completions.create.return_value = mock_generate()
yield mock_instance
62 changes: 62 additions & 0 deletions tests/unit/test_lmm.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,24 @@ def test_generate_with_mock(openai_lmm_mock): # noqa: F811
)


@pytest.mark.parametrize(
"openai_lmm_mock", ["mocked response"], indirect=["openai_lmm_mock"]
)
def test_generate_with_mock_stream(openai_lmm_mock): # noqa: F811
temp_image = create_temp_image()
lmm = OpenAILMM()
response = lmm.generate("test prompt", media=[temp_image], stream=True)
expected_response = ["mocked", "response", None]
for i, chunk in enumerate(response):
assert chunk == expected_response[i]
assert (
"image_url"
in openai_lmm_mock.chat.completions.create.call_args.kwargs["messages"][0][
"content"
][1]
)


@pytest.mark.parametrize(
"openai_lmm_mock", ["mocked response"], indirect=["openai_lmm_mock"]
)
Expand All @@ -49,6 +67,23 @@ def test_chat_with_mock(openai_lmm_mock): # noqa: F811
)


@pytest.mark.parametrize(
"openai_lmm_mock", ["mocked response"], indirect=["openai_lmm_mock"]
)
def test_chat_with_mock_stream(openai_lmm_mock): # noqa: F811
lmm = OpenAILMM()
response = lmm.chat([{"role": "user", "content": "test prompt"}], stream=True)
expected_response = ["mocked", "response", None]
for i, chunk in enumerate(response):
assert chunk == expected_response[i]
assert (
openai_lmm_mock.chat.completions.create.call_args.kwargs["messages"][0][
"content"
][0]["text"]
== "test prompt"
)


@pytest.mark.parametrize(
"openai_lmm_mock", ["mocked response"], indirect=["openai_lmm_mock"]
)
Expand All @@ -73,6 +108,33 @@ def test_call_with_mock(openai_lmm_mock): # noqa: F811
)


@pytest.mark.parametrize(
"openai_lmm_mock", ["mocked response"], indirect=["openai_lmm_mock"]
)
def test_call_with_mock_stream(openai_lmm_mock): # noqa: F811
expected_response = ["mocked", "response", None]
lmm = OpenAILMM()
response = lmm("test prompt", stream=True)
for i, chunk in enumerate(response):
assert chunk == expected_response[i]
assert (
openai_lmm_mock.chat.completions.create.call_args.kwargs["messages"][0][
"content"
][0]["text"]
== "test prompt"
)

response = lmm([{"role": "user", "content": "test prompt"}], stream=True)
for i, chunk in enumerate(response):
assert chunk == expected_response[i]
assert (
openai_lmm_mock.chat.completions.create.call_args.kwargs["messages"][0][
"content"
][0]["text"]
== "test prompt"
)


@pytest.mark.parametrize(
"openai_lmm_mock",
['{"Parameters": {"prompt": "cat"}}'],
Expand Down
2 changes: 1 addition & 1 deletion vision_agent/agent/vision_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def run_conversation(orch: LMM, chat: List[Message]) -> Dict[str, Any]:
dir=WORKSPACE,
conversation=conversation,
)
return extract_json(orch([{"role": "user", "content": prompt}]))
return extract_json(orch([{"role": "user", "content": prompt}], stream=False)) # type: ignore


def run_code_action(code: str, code_interpreter: CodeInterpreter) -> str:
Expand Down
17 changes: 9 additions & 8 deletions vision_agent/agent/vision_agent_coder.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def write_plans(
context = USER_REQ.format(user_request=user_request)
prompt = PLAN.format(context=context, tool_desc=tool_desc, feedback=working_memory)
chat[-1]["content"] = prompt
return extract_json(model.chat(chat))
return extract_json(model(chat, stream=False)) # type: ignore


def pick_plan(
Expand Down Expand Up @@ -160,7 +160,7 @@ def pick_plan(
docstring=tool_info, plans=plan_str, previous_attempts="", media=media
)

code = extract_code(model(prompt))
code = extract_code(model(prompt, stream=False)) # type: ignore
log_progress(
{
"type": "log",
Expand Down Expand Up @@ -211,7 +211,7 @@ def pick_plan(
"code": DefaultImports.prepend_imports(code),
}
)
code = extract_code(model(prompt))
code = extract_code(model(prompt, stream=False)) # type: ignore
tool_output = code_interpreter.exec_isolation(
DefaultImports.prepend_imports(code)
)
Expand Down Expand Up @@ -251,7 +251,7 @@ def pick_plan(
tool_output=tool_output_str[:20_000],
)
chat[-1]["content"] = prompt
best_plan = extract_json(model(chat))
best_plan = extract_json(model(chat, stream=False)) # type: ignore

if verbosity >= 1:
_LOGGER.info(f"Best plan:\n{best_plan}")
Expand Down Expand Up @@ -286,7 +286,7 @@ def write_code(
feedback=feedback,
)
chat[-1]["content"] = prompt
return extract_code(coder(chat))
return extract_code(coder(chat, stream=False)) # type: ignore


def write_test(
Expand All @@ -310,7 +310,7 @@ def write_test(
media=media,
)
chat[-1]["content"] = prompt
return extract_code(tester(chat))
return extract_code(tester(chat, stream=False)) # type: ignore


def write_and_test_code(
Expand Down Expand Up @@ -439,13 +439,14 @@ def debug_code(
while not success and count < 3:
try:
fixed_code_and_test = extract_json(
debugger(
debugger( # type: ignore
FIX_BUG.format(
code=code,
tests=test,
result="\n".join(result.text().splitlines()[-50:]),
feedback=format_memory(working_memory + new_working_memory),
)
),
stream=False,
)
)
success = True
Expand Down
Loading

0 comments on commit d6d4b78

Please sign in to comment.