From 2373b424317dc704df5b5c8c7b788398a84720d0 Mon Sep 17 00:00:00 2001 From: Asia <2736300+humpydonkey@users.noreply.github.com> Date: Wed, 12 Jun 2024 21:51:58 -0700 Subject: [PATCH] Integrate langsmith for better observability (#131) * Integrate langsmith for better observability --- poetry.lock | 10 +- pyproject.toml | 4 +- vision_agent/agent/vision_agent.py | 158 +++++++++++++++++------------ 3 files changed, 102 insertions(+), 70 deletions(-) diff --git a/poetry.lock b/poetry.lock index ec393bc7..0a5128e4 100644 --- a/poetry.lock +++ b/poetry.lock @@ -596,17 +596,17 @@ websockets = ">=11.0.3" [[package]] name = "e2b-code-interpreter" -version = "0.0.7" +version = "0.0.9" description = "E2B Code Interpreter - Stateful code execution" optional = false python-versions = "<4.0,>=3.8" files = [ - {file = "e2b_code_interpreter-0.0.7-py3-none-any.whl", hash = "sha256:3a490e80698640b02cf8340e8451e684cc7725617c6f2f97572e8deb37ba61c6"}, - {file = "e2b_code_interpreter-0.0.7.tar.gz", hash = "sha256:2078cdecb436ef865ba9327e89a809c4ce93a4560a9b629295f49b529f49699d"}, + {file = "e2b_code_interpreter-0.0.9-py3-none-any.whl", hash = "sha256:62f08f799d7d93164b11938272cca27dba702dfe9b5c1af886d4bca0f63fa98d"}, + {file = "e2b_code_interpreter-0.0.9.tar.gz", hash = "sha256:f3aa400f97037e6c43a985471731f8b65673fa42ec170414614a2ac28da8350b"}, ] [package.dependencies] -e2b = ">=0.17.0" +e2b = ">=0.17.1" pydantic = ">1,<3" websocket-client = ">=1.7.0,<2.0.0" @@ -3567,4 +3567,4 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", [metadata] lock-version = "2.0" python-versions = ">=3.9,<4.0" -content-hash = "f83b2f518eb15325260c63eb90a84e54d70c85b047994e281659409bad3ef49d" +content-hash = "556f19356f845564c3f9099d7339b969e84aa37e1f20924837f3e3071e5b3bc2" diff --git a/pyproject.toml b/pyproject.toml index c0f9ab3d..23c267fa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -34,8 +34,8 @@ nbformat = "^5.10.4" rich = "^13.7.1" langsmith = "^0.1.58" ipykernel = "^6.29.4" -e2b = "^0.17.0" -e2b-code-interpreter = "^0.0.7" +e2b = "^0.17.1" +e2b-code-interpreter = "^0.0.9" tenacity = "^8.3.0" [tool.poetry.group.dev.dependencies] diff --git a/vision_agent/agent/vision_agent.py b/vision_agent/agent/vision_agent.py index 4b6b5857..fda7ac96 100644 --- a/vision_agent/agent/vision_agent.py +++ b/vision_agent/agent/vision_agent.py @@ -8,6 +8,7 @@ from typing import Any, Callable, Dict, List, Optional, Sequence, Union, cast from PIL import Image +from langsmith import traceable from rich.console import Console from rich.style import Style from rich.syntax import Syntax @@ -130,6 +131,7 @@ def extract_image( return new_media +@traceable def write_plan( chat: List[Message], tool_desc: str, @@ -147,6 +149,7 @@ def write_plan( return extract_json(model.chat(chat))["plan"] # type: ignore +@traceable def write_code( coder: LMM, chat: List[Message], @@ -167,6 +170,7 @@ def write_code( return extract_code(coder(chat)) +@traceable def write_test( tester: LMM, chat: List[Message], @@ -191,6 +195,7 @@ def write_test( return extract_code(tester(chat)) +@traceable def reflect( chat: List[Message], plan: str, @@ -266,70 +271,19 @@ def write_and_test_code( count = 0 new_working_memory: List[Dict[str, str]] = [] while not result.success and count < max_retries: - log_progress( - { - "type": "code", - "status": "started", - } - ) - fixed_code_and_test = extract_json( - debugger( - FIX_BUG.format( - code=code, - tests=test, - result="\n".join(result.text().splitlines()[-50:]), - feedback=format_memory(working_memory + new_working_memory), - ) - ) - ) - old_code = code - old_test = test - - if fixed_code_and_test["code"].strip() != "": - code = extract_code(fixed_code_and_test["code"]) - if fixed_code_and_test["test"].strip() != "": - test = extract_code(fixed_code_and_test["test"]) - - new_working_memory.append( - { - "code": f"{code}\n{test}", - "feedback": fixed_code_and_test["reflections"], - "edits": get_diff(f"{old_code}\n{old_test}", f"{code}\n{test}"), - } - ) - log_progress( - { - "type": "code", - "status": "running", - "payload": { - "code": DefaultImports.prepend_imports(code), - "test": test, - }, - } - ) - - result = code_interpreter.exec_isolation( - f"{DefaultImports.to_code_string()}\n{code}\n{test}" - ) - log_progress( - { - "type": "code", - "status": "completed" if result.success else "failed", - "payload": { - "code": DefaultImports.prepend_imports(code), - "test": test, - "result": result.to_json(), - }, - } - ) if verbosity == 2: - _LOGGER.info( - f"Debug attempt {count + 1}, reflection: {fixed_code_and_test['reflections']}" - ) - _print_code("Code and test after attempted fix:", code, test) - _LOGGER.info( - f"Code execution result after attempted fix: {result.text(include_logs=True)}" - ) + _LOGGER.info(f"Start debugging attempt {count + 1}") + code, test, result = debug_code( + working_memory, + debugger, + code_interpreter, + code, + test, + result, + new_working_memory, + log_progress, + verbosity, + ) count += 1 if verbosity >= 1: @@ -344,6 +298,83 @@ def write_and_test_code( } +@traceable +def debug_code( + working_memory: List[Dict[str, str]], + debugger: LMM, + code_interpreter: CodeInterpreter, + code: str, + test: str, + result: Execution, + new_working_memory: List[Dict[str, str]], + log_progress: Callable[[Dict[str, Any]], None], + verbosity: int = 0, +) -> tuple[str, str, Execution]: + log_progress( + { + "type": "code", + "status": "started", + } + ) + fixed_code_and_test = extract_json( + debugger( + FIX_BUG.format( + code=code, + tests=test, + result="\n".join(result.text().splitlines()[-50:]), + feedback=format_memory(working_memory + new_working_memory), + ) + ) + ) + old_code = code + old_test = test + + if fixed_code_and_test["code"].strip() != "": + code = extract_code(fixed_code_and_test["code"]) + if fixed_code_and_test["test"].strip() != "": + test = extract_code(fixed_code_and_test["test"]) + + new_working_memory.append( + { + "code": f"{code}\n{test}", + "feedback": fixed_code_and_test["reflections"], + "edits": get_diff(f"{old_code}\n{old_test}", f"{code}\n{test}"), + } + ) + log_progress( + { + "type": "code", + "status": "running", + "payload": { + "code": DefaultImports.prepend_imports(code), + "test": test, + }, + } + ) + + result = code_interpreter.exec_isolation( + f"{DefaultImports.to_code_string()}\n{code}\n{test}" + ) + log_progress( + { + "type": "code", + "status": "completed" if result.success else "failed", + "payload": { + "code": DefaultImports.prepend_imports(code), + "test": test, + "result": result.to_json(), + }, + } + ) + if verbosity == 2: + _print_code("Code and test after attempted fix:", code, test) + _LOGGER.info( + f"Reflection: {fixed_code_and_test['reflections']}\nCode execution result after attempted fix: {result.text(include_logs=True)}" + ) + + return code, test, result + + def _print_code(title: str, code: str, test: Optional[str] = None) -> None: _CONSOLE.print(title, style=Style(bgcolor="dark_orange3", bold=True)) _CONSOLE.print("=" * 30 + " Code " + "=" * 30) @@ -481,6 +512,7 @@ def __call__( results.pop("working_memory") return results # type: ignore + @traceable def chat_with_workflow( self, chat: List[Message],