Skip to content

Commit

Permalink
AgentCoder-like Example for Coding Solutions (#71)
Browse files Browse the repository at this point in the history
* fix conflict

* use tester as debug

* added comments

* format fix

* fix type errors'

* fix trailing spaces

* fix newline

* fix flake8

* add ignores for windows

* undo window ignores

* do not run for windows

* format fix

* fix for windows

* test no type check

* fix type errors
  • Loading branch information
dillonalaird authored May 1, 2024
1 parent 1fee1fd commit 32263ff
Show file tree
Hide file tree
Showing 7 changed files with 802 additions and 20 deletions.
1 change: 1 addition & 0 deletions vision_agent/agent/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
from .agent import Agent
from .agent_coder import AgentCoder
from .easytool import EasyTool
from .reflexion import Reflexion
from .vision_agent import VisionAgent
170 changes: 170 additions & 0 deletions vision_agent/agent/agent_coder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
import json
import logging
import os
import sys
from pathlib import Path
from typing import Dict, List, Optional, Union

from vision_agent.agent import Agent
from vision_agent.llm import LLM, OpenAILLM
from vision_agent.lmm import LMM, OpenAILMM
from vision_agent.tools.tools_v2 import TOOLS_DOCSTRING, UTILITIES_DOCSTRING

from .agent_coder_prompts import DEBUG, FIX_BUG, PROGRAM, TEST, VISUAL_TEST
from .execution import IMPORT_HELPER, check_correctness

logging.basicConfig(stream=sys.stdout)
_LOGGER = logging.getLogger(__name__)


def write_tests(question: str, code: str, model: LLM) -> str:
prompt = TEST.format(
question=question,
code=code,
)
completion = model(prompt)
return preprocess_data(completion)


def preprocess_data(code: str) -> str:
if "```python" in code:
code = code[code.find("```python") + len("```python") :]
code = code[: code.find("```")]
return code


def parse_file_name(s: str) -> str:
# We only output png files
return "".join([p for p in s.split(" ") if p.endswith(".png")])


def write_program(question: str, feedback: str, model: LLM) -> str:
prompt = PROGRAM.format(
docstring=TOOLS_DOCSTRING, question=question, feedback=feedback
)
completion = model(prompt)
return preprocess_data(completion)


def write_debug(question: str, code: str, feedback: str, model: LLM) -> str:
prompt = DEBUG.format(
docstring=UTILITIES_DOCSTRING,
code=code,
question=question,
feedback=feedback,
)
completion = model(prompt)
return preprocess_data(completion)


def execute_tests(code: str, tests: str) -> Dict[str, Union[str, bool]]:
full_code = f"{IMPORT_HELPER}\n{code}\n{tests}"
return check_correctness(full_code, 20.0)


def run_visual_tests(
question: str, code: str, viz_file: str, feedback: str, model: LMM
) -> Dict[str, Union[str, bool]]:
prompt = VISUAL_TEST.format(
docstring=TOOLS_DOCSTRING,
code=code,
question=question,
feedback=feedback,
)
completion = model(prompt, images=[viz_file])
# type is from the prompt
return json.loads(completion) # type: ignore


def fix_bugs(code: str, tests: str, result: str, feedback: str, model: LLM) -> str:
prompt = FIX_BUG.format(completion=code, test_case=tests, result=result)
completion = model(prompt)
return preprocess_data(completion)


class AgentCoder(Agent):
"""AgentCoder is based off of the AgentCoder paper https://arxiv.org/abs/2312.13010
and it's open source code https://github.com/huangd1999/AgentCoder with some key
differences. AgentCoder comprises of 3 components: a coder agent, a tester agent,
and an executor. The tester agents writes code to test the code written by the coder
agent, but in our case because we are solving a vision task it's difficult to write
testing code. We instead have the tester agent write code to visualize the output
of the code written by the coder agent. If the code fails, we pass it back to the
coder agent to fix the bug, if it succeeds we pass it to a visual tester agent, which
is an LMM model like GPT4V, to visually inspect the output and make sure it looks
good."""

def __init__(
self,
coder_agent: Optional[LLM] = None,
tester_agent: Optional[LLM] = None,
visual_tester_agent: Optional[LMM] = None,
verbose: bool = False,
) -> None:
self.coder_agent = (
OpenAILLM(temperature=0.1) if coder_agent is None else coder_agent
)
self.tester_agent = (
OpenAILLM(temperature=0.1) if tester_agent is None else tester_agent
)
self.visual_tester_agent = (
OpenAILMM(temperature=0.1, json_mode=True)
if visual_tester_agent is None
else visual_tester_agent
)
self.max_turns = 3
if verbose:
_LOGGER.setLevel(logging.INFO)

def __call__(
self,
input: Union[List[Dict[str, str]], str],
image: Optional[Union[str, Path]] = None,
) -> str:
if isinstance(input, str):
input = [{"role": "user", "content": input}]
return self.chat(input, image)

def chat(
self,
input: List[Dict[str, str]],
image: Optional[Union[str, Path]] = None,
) -> str:
question = input[0]["content"]
if image:
question += f" Input file path: {os.path.abspath(image)}"

code = ""
feedback = ""
for _ in range(self.max_turns):
code = write_program(question, feedback, self.coder_agent)
_LOGGER.info(f"code:\n{code}")
debug = write_debug(question, code, feedback, self.tester_agent)
_LOGGER.info(f"debug:\n{debug}")
results = execute_tests(code, debug)
_LOGGER.info(
f"execution results: passed: {results['passed']}\n{results['result']}"
)

if not results["passed"]:
code = fix_bugs(
code, debug, results["result"].strip(), feedback, self.coder_agent # type: ignore
)
_LOGGER.info(f"fixed code:\n{code}")
else:
# TODO: Sometimes it prints nothing, so we need to handle that case
# TODO: The visual agent reflection does not work very well, needs more testing
# viz_test_results = run_visual_tests(
# question, code, parse_file_name(results["result"].strip()), feedback, self.visual_tester_agent
# )
# _LOGGER.info(f"visual test results:\n{viz_test_results}")
# if viz_test_results["finished"]:
# return f"{IMPORT_HELPER}\n{code}"
# feedback += f"\n{viz_test_results['feedback']}"

return f"{IMPORT_HELPER}\n{code}"

return f"{IMPORT_HELPER}\n{code}"

def log_progress(self, description: str) -> None:
_LOGGER.info(description)
135 changes: 135 additions & 0 deletions vision_agent/agent/agent_coder_prompts.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
PROGRAM = """
**Role**: You are a software programmer.
**Task**: As a programmer, you are required to complete the function. Use a Chain-of-Thought approach to break down the problem, create pseudocode, and then write the code in Python language. Ensure that your code is efficient, readable, and well-commented. Return the requested information from the function you create.
**Documentation**:
This is the documentation for the functions you have access to. You may call any of these functions to help you complete the task, you do not need to worry about defining them or importing them and can assume they are available to you.
{docstring}
**Input Code Snippet**:
```python
def execute(image_path: str):
# Your code here
```
**User Instructions**:
{question}
**Previous Feedback**:
{feedback}
**Instructions**:
1. **Understand and Clarify**: Make sure you understand the task.
2. **Algorithm/Method Selection**: Decide on the most efficient way.
3. **Pseudocode Creation**: Write down the steps you will follow in pseudocode.
4. **Code Generation**: Translate your pseudocode into executable Python code.
"""

DEBUG = """
**Role**: You are a software programmer.
**Task**: Your task is to run the `execute` function and either print the output or print a file name containing visualized output for another agent to examine. The other agent will then use your output, either the printed return value of the function or the visualized output as a file, to determine if `execute` is functioning correctly.
**Documentation**
This is the documentation for the functions you have access to. You may call any of these functions to help you complete the task, you do not need to worry about defining them or importing them and can assume they are available to you.
{docstring}
**Input Code Snippet**:
```python
### Please decided how would you want to generate test cases. Based on incomplete code or completed version.
{code}
```
**User Instructions**:
{question}
**Previous Feedback**:
{feedback}
**Instructions**:
1. **Understand and Clarify**: Make sure you understand the task.
2. **Code Execution**: Run the `execute` function with the given input from the user instructions.
3. **Output Generation**: Print the output or save it as a file for visualization utilizing the functions you have access to.
"""

VISUAL_TEST = """
**Role**: You are a machine vision expert.
**Task**: Your task is to visually inspect the output of the `execute` function and determine if the visualization of the function output looks correct given the user's instructions. If not, you can provide suggestions to improve the `execute` function to imporve it.
**Documentation**:
This is the documentation for the functions you have access to. You may call any of these functions to help you complete the task, you do not need to worry about defining them or importing them and can assume they are available to you.
{docstring}
**Input Code Snippet**:
This is the code that
```python
{code}
```
**User Instructions**:
{question}
**Previous Feedback**:
{feedback}
**Instructions**:
1. **Visual Inspection**: Examine the visual output of the `execute` function.
2. **Evaluation**: Determine if the visualization is correct based on the user's instructions.
3. **Feedback**: Provide feedback on the visualization and suggest improvements if necessary.
4. **Clear Concrete Instructions**: Provide clear concrete instructions to improve the results. You can only make coding suggestions based on the either the input code snippet or the documented code provided. For example, do not say the threshold needs to be adjust, instead provide an exact value for adjusting the threshold.
Provide output in JSON format {{"finished": boolean, "feedback": "your feedback"}} where "finished" is True if the output is correct and False if not and "feedback" is your feedback.
"""

FIX_BUG = """
Please re-complete the code to fix the error message. Here is the previous version:
```python
{code}
```
When we run this code:
```python
{tests}
```
It raises this error:
```python
{result}
```
This is previous feedback provided on the code:
{feedback}
Please fix the bug by follow the error information and only return python code. You do not need return the test cases. The re-completion code should in triple backticks format(i.e., in ```python ```).
"""

TEST = """
**Role**: As a tester, your task is to create comprehensive test cases for the incomplete `execute` function. These test cases should encompass Basic, Edge, and Large Scale scenarios to ensure the code's robustness, reliability, and scalability.
**User Instructions**:
{question}
**Input Code Snippet**:
```python
### Please decided how would you want to generate test cases. Based on incomplete code or completed version.
{code}
```
**1. Basic Test Cases**:
- **Objective**: To verify the fundamental functionality of the `has_close_elements` function under normal conditions.
**2. Edge Test Cases**:
- **Objective**: To evaluate the function's behavior under extreme or unusual conditions.
**3. Large Scale Test Cases**:
- **Objective**: To assess the function’s performance and scalability with large data samples.
**Instructions**:
- Implement a comprehensive set of test cases following the guidelines above.
- Ensure each test case is well-documented with comments explaining the scenario it covers.
- Pay special attention to edge cases as they often reveal hidden bugs.
- For large-scale tests, focus on the function's efficiency and performance under heavy loads.
"""
Loading

0 comments on commit 32263ff

Please sign in to comment.