Integrate langsmith for better observability (#131)

* Integrate langsmith for better observability
landing-ai · Jun 13, 2024 · 2373b42 · 2373b42
1 parent a5d78f6
commit 2373b42
Show file tree

Hide file tree

Showing 3 changed files with 102 additions and 70 deletions.
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -34,8 +34,8 @@ nbformat = "^5.10.4"
 rich = "^13.7.1"
 langsmith = "^0.1.58"
 ipykernel = "^6.29.4"
-e2b = "^0.17.0"
-e2b-code-interpreter = "^0.0.7"
+e2b = "^0.17.1"
+e2b-code-interpreter = "^0.0.9"
 tenacity = "^8.3.0"
 
 [tool.poetry.group.dev.dependencies]

diff --git a/vision_agent/agent/vision_agent.py b/vision_agent/agent/vision_agent.py
@@ -8,6 +8,7 @@
 from typing import Any, Callable, Dict, List, Optional, Sequence, Union, cast
 
 from PIL import Image
+from langsmith import traceable
 from rich.console import Console
 from rich.style import Style
 from rich.syntax import Syntax
@@ -130,6 +131,7 @@ def extract_image(
  return new_media
 
 
+@traceable
 def write_plan(
  chat: List[Message],
  tool_desc: str,
@@ -147,6 +149,7 @@ def write_plan(
  return extract_json(model.chat(chat))["plan"] # type: ignore
 
 
+@traceable
 def write_code(
  coder: LMM,
  chat: List[Message],
@@ -167,6 +170,7 @@ def write_code(
  return extract_code(coder(chat))
 
 
+@traceable
 def write_test(
  tester: LMM,
  chat: List[Message],
@@ -191,6 +195,7 @@ def write_test(
  return extract_code(tester(chat))
 
 
+@traceable
 def reflect(
  chat: List[Message],
  plan: str,
@@ -266,70 +271,19 @@ def write_and_test_code(
  count = 0
  new_working_memory: List[Dict[str, str]] = []
  while not result.success and count < max_retries:
- log_progress(
- {
- "type": "code",
- "status": "started",
- }
- )
- fixed_code_and_test = extract_json(
- debugger(
- FIX_BUG.format(
- code=code,
- tests=test,
- result="\n".join(result.text().splitlines()[-50:]),
- feedback=format_memory(working_memory + new_working_memory),
- )
- )
- )
- old_code = code
- old_test = test
-
- if fixed_code_and_test["code"].strip() != "":
- code = extract_code(fixed_code_and_test["code"])
- if fixed_code_and_test["test"].strip() != "":
- test = extract_code(fixed_code_and_test["test"])
-
- new_working_memory.append(
- {
- "code": f"{code}\n{test}",
- "feedback": fixed_code_and_test["reflections"],
- "edits": get_diff(f"{old_code}\n{old_test}", f"{code}\n{test}"),
- }
- )
- log_progress(
- {
- "type": "code",
- "status": "running",
- "payload": {
- "code": DefaultImports.prepend_imports(code),
- "test": test,
- },
- }
- )
-
- result = code_interpreter.exec_isolation(
- f"{DefaultImports.to_code_string()}\n{code}\n{test}"
- )
- log_progress(
- {
- "type": "code",
- "status": "completed" if result.success else "failed",
- "payload": {
- "code": DefaultImports.prepend_imports(code),
- "test": test,
- "result": result.to_json(),
- },
- }
- )
  if verbosity == 2:
- _LOGGER.info(
- f"Debug attempt {count + 1}, reflection: {fixed_code_and_test['reflections']}"
- )
- _print_code("Code and test after attempted fix:", code, test)
- _LOGGER.info(
- f"Code execution result after attempted fix: {result.text(include_logs=True)}"
- )
+ _LOGGER.info(f"Start debugging attempt {count + 1}")
+ code, test, result = debug_code(
+ working_memory,
+ debugger,
+ code_interpreter,
+ code,
+ test,
+ result,
+ new_working_memory,
+ log_progress,
+ verbosity,
+ )
  count += 1
 
  if verbosity >= 1:
@@ -344,6 +298,83 @@ def write_and_test_code(
  }
 
 
+@traceable
+def debug_code(
+ working_memory: List[Dict[str, str]],
+ debugger: LMM,
+ code_interpreter: CodeInterpreter,
+ code: str,
+ test: str,
+ result: Execution,
+ new_working_memory: List[Dict[str, str]],
+ log_progress: Callable[[Dict[str, Any]], None],
+ verbosity: int = 0,
+) -> tuple[str, str, Execution]:
+ log_progress(
+ {
+ "type": "code",
+ "status": "started",
+ }
+ )
+ fixed_code_and_test = extract_json(
+ debugger(
+ FIX_BUG.format(
+ code=code,
+ tests=test,
+ result="\n".join(result.text().splitlines()[-50:]),
+ feedback=format_memory(working_memory + new_working_memory),
+ )
+ )
+ )
+ old_code = code
+ old_test = test
+
+ if fixed_code_and_test["code"].strip() != "":
+ code = extract_code(fixed_code_and_test["code"])
+ if fixed_code_and_test["test"].strip() != "":
+ test = extract_code(fixed_code_and_test["test"])
+
+ new_working_memory.append(
+ {
+ "code": f"{code}\n{test}",
+ "feedback": fixed_code_and_test["reflections"],
+ "edits": get_diff(f"{old_code}\n{old_test}", f"{code}\n{test}"),
+ }
+ )
+ log_progress(
+ {
+ "type": "code",
+ "status": "running",
+ "payload": {
+ "code": DefaultImports.prepend_imports(code),
+ "test": test,
+ },
+ }
+ )
+
+ result = code_interpreter.exec_isolation(
+ f"{DefaultImports.to_code_string()}\n{code}\n{test}"
+ )
+ log_progress(
+ {
+ "type": "code",
+ "status": "completed" if result.success else "failed",
+ "payload": {
+ "code": DefaultImports.prepend_imports(code),
+ "test": test,
+ "result": result.to_json(),
+ },
+ }
+ )
+ if verbosity == 2:
+ _print_code("Code and test after attempted fix:", code, test)
+ _LOGGER.info(
+ f"Reflection: {fixed_code_and_test['reflections']}\nCode execution result after attempted fix: {result.text(include_logs=True)}"
+ )
+
+ return code, test, result
+
+
 def _print_code(title: str, code: str, test: Optional[str] = None) -> None:
  _CONSOLE.print(title, style=Style(bgcolor="dark_orange3", bold=True))
  _CONSOLE.print("=" * 30 + " Code " + "=" * 30)
@@ -481,6 +512,7 @@ def __call__(
  results.pop("working_memory")
  return results # type: ignore
 
+ @traceable
  def chat_with_workflow(
  self,
  chat: List[Message],