Fix docs (#117)

* fixed docs * update lock file
landing-ai · Jun 6, 2024 · db7d1ca · db7d1ca
1 parent 3dae514
commit db7d1ca
Show file tree

Hide file tree

Showing 13 changed files with 522 additions and 523 deletions.
diff --git a/docs/api/agent.md b/docs/api/agent.md
@@ -1,16 +1,14 @@
-::: vision_agent.agent
+::: vision_agent.agent.agent.Agent
 
-::: vision_agent.agent.agent
+::: vision_agent.agent.vision_agent.VisionAgent
 
-::: vision_agent.agent.vision_agent
+::: vision_agent.agent.agent_coder.AgentCoder
 
-::: vision_agent.agent.agent_coder
+::: vision_agent.agent.data_interpreter.DataInterpreter
 
-::: vision_agent.agent.data_interpreter
+::: vision_agent.agent.easytool_v2.EasyToolV2
 
-::: vision_agent.agent.easytool_v2
+::: vision_agent.agent.easytool.EasyTool
 
-::: vision_agent.agent.easytool
-
-::: vision_agent.agent.reflexion
+::: vision_agent.agent.reflexion.Reflexion
 
diff --git a/docs/api/image_utils.md b/docs/api/image_utils.md
diff --git a/docs/api/llm.md b/docs/api/llm.md
@@ -1,3 +1 @@
-::: vision_agent.llm
-
-::: vision_agent.llm.llm
+::: vision_agent.llm.OpenAILLM
diff --git a/docs/api/lmm.md b/docs/api/lmm.md
@@ -1,3 +1 @@
-::: vision_agent.lmm
-
-::: vision_agent.lmm.lmm
+::: vision_agent.lmm.OpenAILMM
diff --git a/docs/easy_tool_v2.md b/docs/easy_tool_v2.md
@@ -91,7 +91,7 @@ agent(
 ```
 Here, `reference_mask.png` and `reference_image.png` in `reference_data` could be any
 image with it's corresponding mask that is the object you want to detect in `image.jpg`.
-You can find a demo app to generate masks for DINOv [here](examples/mask_app/).
+You can find a demo app to generate masks for DINOv [here](https://github.com/landing-ai/vision-agent/tree/main/examples/mask_app).
 
 ### Tools
 There are a variety of tools for the model or the user to use. Some are executed locally
@@ -134,7 +134,7 @@ class NumItems(Tool):
 ```
 This will register it with the list of tools Easy Tool V2 has access to. It will be able
 to pick it based on the tool description and use it based on the usage provided. You can
-find an example that creates a custom tool for template matching [here](examples/custom_tools/).
+find an example that creates a custom tool for template matching [here](https://github.com/landing-ai/vision-agent/tree/main/examples/custom_tools).
 
 #### Tool List
 | Tool | Description |

diff --git a/docs/index.md b/docs/index.md
@@ -136,7 +136,7 @@ def custom_tool(image_path: str) -> str:
 
 You need to ensure you call `@va.tools.register_tool` with any imports it might use and
 ensure the documentation is in the same format above with description, `Parameters:`,
-`Returns:`, and `Example\n-------`. You can find an example use case [here](examples/custom_tools/).
+`Returns:`, and `Example\n-------`. You can find an example use case [here](https://github.com/landing-ai/vision-agent/tree/main/examples/custom_tools).
 
 ### Azure Setup
 If you want to use Azure OpenAI models, you can set the environment variable:

diff --git a/mkdocs.yml b/mkdocs.yml
@@ -37,5 +37,5 @@ nav:
  - vision_agent.tools: api/tools.md
  - vision_agent.llm: api/llm.md
  - vision_agent.lmm: api/lmm.md
- - vision_agent.image_utils: api/image_utils.md
- - Old documentation: old.md
+ - vision_agent.utils: api/utils.md
+ - EasyToolV2: easy_tool_v2.md
diff --git a/poetry.lock b/poetry.lock
diff --git a/vision_agent/agent/easytool_v2.py b/vision_agent/agent/easytool_v2.py
@@ -428,12 +428,12 @@ def visualize_result(all_tool_results: List[Dict]) -> Sequence[Union[str, Path]]
 
 
 class EasyToolV2(Agent):
- r"""EasyToolV2 is an agent framework that utilizes tools as well as self
- reflection to accomplish tasks, in particular vision tasks. EasyToolV2 is based
- off of EasyTool https://arxiv.org/abs/2401.06201 and Reflexion
- https://arxiv.org/abs/2303.11366 where it will attempt to complete a task and then
- reflect on whether or not it was able to accomplish the task based off of the plan
- and final results, if not it will redo the task with this newly added reflection.
+ """EasyToolV2 is an agent framework that utilizes tools as well as self reflection
+ to accomplish tasks, in particular vision tasks. EasyToolV2 is based off of EasyTool
+ https://arxiv.org/abs/2401.06201 and Reflexion https://arxiv.org/abs/2303.11366
+ where it will attempt to complete a task and then reflect on whether or not it was
+ able to accomplish the task based off of the plan and final results, if not it will
+ redo the task with this newly added reflection.
 
  Example
  -------
@@ -461,7 +461,10 @@ def __init__(
  reflect_model: the model to use for self reflection.
  max_retries: maximum number of retries to attempt to complete the task.
  verbose: whether to print more logs.
- report_progress_callback: a callback to report the progress of the agent. This is useful for streaming logs in a web application where multiple EasyToolV2 instances are running in parallel. This callback ensures that the progress are not mixed up.
+ report_progress_callback: a callback to report the progress of the agent.
+ This is useful for streaming logs in a web application where multiple
+ EasyToolV2 instances are running in parallel. This callback ensures
+ that the progress are not mixed up.
  """
  self.task_model = (
  OpenAILLM(model_name="gpt-4-turbo", json_mode=True, temperature=0.0)
@@ -495,9 +498,10 @@ def __call__(
  """Invoke the vision agent.
 
  Parameters:
- chat: A conversation in the format of
- [{"role": "user", "content": "describe your task here..."}].
- image: The input image referenced in the chat parameter.
+ input: A conversation in the format of
+ [{"role": "user", "content": "describe your task here..."}] or a string
+ containing just the content.
+ media: The input media referenced in the chat parameter.
  reference_data: A dictionary containing the reference image, mask or bounding
  box in the format of:
  {"image": "image.jpg", "mask": "mask.jpg", "bbox": [0.1, 0.2, 0.1, 0.2]}
@@ -549,7 +553,7 @@ def chat_with_workflow(
  Parameters:
  chat: A conversation in the format of
  [{"role": "user", "content": "describe your task here..."}].
- image: The input image referenced in the chat parameter.
+ media: The media image referenced in the chat parameter.
  reference_data: A dictionary containing the reference image, mask or bounding
  box in the format of:
  {"image": "image.jpg", "mask": "mask.jpg", "bbox": [0.1, 0.2, 0.1, 0.2]}
@@ -558,9 +562,8 @@ def chat_with_workflow(
  self_reflection: boolean to enable and disable self reflection.
 
  Returns:
- A tuple where the first item is the final answer and the second item is a
- list of all the tool results. The last item in the tool results also
- contains the visualized output.
+ Tuple[str, List[Dict]]: A tuple where the first item is the final answer
+ and the second item is a list of all the tool results.
  """
  if len(chat) == 0:
  raise ValueError("Input cannot be empty.")

diff --git a/vision_agent/agent/reflexion.py b/vision_agent/agent/reflexion.py
@@ -144,7 +144,7 @@ def __call__(
 
  Parameters:
  input: a prompt that describe the task or a conversation in the format of [{"role": "user", "content": "describe your task here..."}].
- image: the input image referenced in the prompt parameter.
+ media: the input media referenced in the prompt parameter.
 
  Returns:
  A text response.

diff --git a/vision_agent/agent/vision_agent.py b/vision_agent/agent/vision_agent.py
@@ -442,10 +442,10 @@ def __call__(
  """Chat with Vision Agent and return intermediate information regarding the task.
 
  Parameters:
- chat (List[Dict[str, str]]): A conversation in the format of
- [{"role": "user", "content": "describe your task here..."}].
+ input (Union[List[Dict[str, str]], str]): A conversation in the format of
+ [{"role": "user", "content": "describe your task here..."}] or a string
+ of just the contents.
  media (Optional[Union[str, Path]]): The media file to be used in the task.
- self_reflection (bool): Whether to reflect on the task and debug the code.
 
  Returns:
  str: The code output by the Vision Agent.
@@ -471,7 +471,8 @@ def chat_with_workflow(
  [{"role": "user", "content": "describe your task here..."}].
  media (Optional[Union[str, Path]]): The media file to be used in the task.
  self_reflection (bool): Whether to reflect on the task and debug the code.
- show_visualization (bool): If True, it opens a new window locally to show the image(s) created by visualization code (if there is any).
+ display_visualization (bool): If True, it opens a new window locally to
+ show the image(s) created by visualization code (if there is any).
 
  Returns:
  Dict[str, Any]: A dictionary containing the code, test, test result, plan,