landing-ai · shankar-vision-eng · Feb 28, 2024 · Feb 28, 2024 · Feb 28, 2024 · Feb 28, 2024
diff --git a/examples/img/ct_scan1.jpg b/examples/img/ct_scan1.jpg
diff --git a/examples/img/ct_scan2.jpg b/examples/img/ct_scan2.jpg
diff --git a/examples/img/doc1.jpg b/examples/img/doc1.jpg
diff --git a/examples/img/doc2.jpg b/examples/img/doc2.jpg
diff --git a/examples/img/doc3.jpg b/examples/img/doc3.jpg
diff --git a/examples/va_example.ipynb b/examples/va_example.ipynb
diff --git a/vision_agent/data/data.py b/vision_agent/data/data.py
@@ -2,7 +2,7 @@
 
 import uuid
 from pathlib import Path
-from typing import Dict, List, Optional, Union, cast
+from typing import Dict, List, Optional, Union, cast, Callable
 
 import faiss
 import numpy as np
@@ -44,18 +44,25 @@ def add_lmm(self, lmm: LMM) -> Self:
  self.lmm = lmm
  return self
 
- def add_column(self, name: str, prompt: str) -> Self:
+ def add_column(
+ self, name: str, prompt: str, func: Optional[Callable[[str], str]] = None
+ ) -> Self:
  r"""Adds a new column to the DataFrame containing the generated metadata from the LMM.
 
  Args:
  name (str): The name of the column to be added.
  prompt (str): The prompt to be used to generate the metadata.
+ func (Optional[Callable[[Any], Any]]): A Python function to be applied on the output of `lmm.generate`. Defaults to None.
  """
  if self.lmm is None:
  raise ValueError("LMM not set yet")
 
  self.df[name] = self.df["image_paths"].progress_apply( # type: ignore
- lambda x: self.lmm.generate(prompt, image=x)
+ lambda x: (
+ func(self.lmm.generate(prompt, image=x))
+ if func
+ else self.lmm.generate(prompt, image=x)
+ )
  )
  return self
 

diff --git a/vision_agent/lmm/lmm.py b/vision_agent/lmm/lmm.py
@@ -31,10 +31,18 @@ class LLaVALMM(LMM):
  def __init__(self, name: str):
  self.name = name
 
- def generate(self, prompt: str, image: Optional[Union[str, Path]] = None) -> str:
+ def generate(
+ self,
+ prompt: str,
+ image: Optional[Union[str, Path]] = None,
+ temperature: float = 0.1,
+ max_new_tokens: int = 1500,
+ ) -> str:
  data = {"prompt": prompt}
  if image:
  data["image"] = encode_image(image)
+ data["temperature"] = temperature # type: ignore
+ data["max_new_tokens"] = max_new_tokens # type: ignore
  res = requests.post(
  _LLAVA_ENDPOINT,
  headers={"Content-Type": "application/json"},