Skip to content

Commit 5444ff4

Browse files
authored
Merge pull request gyorilab#416 from kkaris/llm-annotate
LLM concept annotation
2 parents 2f777c4 + b0c13ec commit 5444ff4

File tree

9 files changed

+787
-69
lines changed

9 files changed

+787
-69
lines changed

docs/source/sources.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,10 @@ SymPy ODE extraction (:py:mod:`mira.sources.sympy_ode`)
6161
:members:
6262
:show-inheritance:
6363

64+
.. automodule:: mira.sources.sympy_ode.llm_util
65+
:members:
66+
:show-inheritance:
67+
6468
Bilayer extraction (:py:mod:`mira.sources.bilayer`)
6569
---------------------------------------------------
6670
.. automodule:: mira.sources.bilayer

mira/openai/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
try:
22
import openai
3-
from .client import OpenAIClient
3+
from .client import OpenAIClient, ImageFmts, ALLOWED_FORMATS
44
except ImportError as ierr:
55
if 'openai' in str(ierr):
66
raise ImportError(

mira/openai/client.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,56 @@
55

66

77
ImageFmts = Literal["jpeg", "jpg", "png", "webp", "gif"]
8+
ALLOWED_FORMATS = ["jpeg", "jpg", "png", "webp", "gif"]
89

910

1011
class OpenAIClient:
1112

1213
def __init__(self, api_key: str = None):
1314
self.client = OpenAI(api_key=api_key)
1415

16+
def run_chat_completion(
17+
self,
18+
message: str,
19+
model: str = "gpt-4o-mini",
20+
max_tokens: int = 2048,
21+
):
22+
"""Run the OpenAI chat completion
23+
24+
Parameters
25+
----------
26+
message :
27+
The prompt to send for chat completion
28+
model :
29+
The model to use. The default is the gpt-4o-mini model.
30+
max_tokens :
31+
The maximum number of tokens to generate for chat completion. One
32+
token is roughly one word in plain text, however it can be more per
33+
word in some cases. The default is 150.
34+
35+
Returns
36+
-------
37+
:
38+
The response from OpenAI as a string.
39+
"""
40+
41+
response = self.client.chat.completions.create(
42+
model=model,
43+
messages=[
44+
{
45+
"role": "user",
46+
"content": [
47+
{
48+
"type": "text",
49+
"text": message,
50+
}
51+
],
52+
}
53+
],
54+
max_tokens=max_tokens,
55+
)
56+
return response.choices[0]
57+
1558
def run_chat_completion_with_image(
1659
self,
1760
message: str,
@@ -43,6 +86,11 @@ def run_chat_completion_with_image(
4386
:
4487
The response from OpenAI as a string.
4588
"""
89+
if image_format not in ALLOWED_FORMATS:
90+
raise ValueError(
91+
f"Image format {image_format} not supported."
92+
f"Supported formats are {ALLOWED_FORMATS}"
93+
)
4694
response = self.client.chat.completions.create(
4795
model=model,
4896
messages=[

mira/sources/sympy_ode/constants.py

Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
from string import Template
2+
3+
4+
ODE_IMAGE_PROMPT = """Transform these equations into a sympy representation based on the example style below
5+
6+
```python
7+
# Define time variable
8+
t = sympy.symbols("t")
9+
10+
# Define the time-dependent variables
11+
S, E, I, R = sympy.symbols("S E I R", cls=sympy.Function)
12+
13+
# Define the parameters
14+
b, g, r = sympy.symbols("b g r")
15+
16+
odes = [
17+
sympy.Eq(S(t).diff(t), - b * S(t) * I(t)),
18+
sympy.Eq(E(t).diff(t), b * S(t) * I(t) - r * E(t)),
19+
sympy.Eq(I(t).diff(t), r * E(t) - g * I(t)),
20+
sympy.Eq(R(t).diff(t), g * I(t))
21+
]
22+
```
23+
24+
Instead of using unicode characters, spell out in symbols in lowercase like theta, omega, etc.
25+
Also, provide the code snippet only and no explanation."""
26+
27+
ODE_CONCEPTS_PROMPT_TEMPLATE = Template("""
28+
I want to annotate epidemiology models with attributes that describes the identity and context of each compartment.
29+
30+
An example is the set of ODE equations below, and the corresponding context data:
31+
32+
odes = [
33+
sp.Eq(S_l(t).diff(t), pi_h * (1 - rho) - nu * lambda_h * S_l(t) - mu_h * S_l(t)),
34+
sp.Eq(S_h(t).diff(t), pi_h * rho - lambda_h * S_h(t) - mu_h * S_h(t)),
35+
sp.Eq(E_h(t).diff(t), nu * S_l(t) * lambda_h + S_h(t) * lambda_h - (sigma_h + mu_h) * E_h(t)),
36+
sp.Eq(P(t).diff(t), sigma_h * E_h(t) - (omega + mu_h) * P(t)),
37+
sp.Eq(I1(t).diff(t), omega * P(t) - (theta + k1 + tau1 + mu_h) * I1(t)),
38+
sp.Eq(I2(t).diff(t), theta * I1(t) - (k2 + delta_i + tau2 + mu_h) * I2(t)),
39+
sp.Eq(H(t).diff(t), k1 * I1(t) + k2 * I2(t) - (delta_h + tau3 + mu_h) * H(t)),
40+
sp.Eq(R_h(t).diff(t), tau1 * I1(t) + tau2 * I2(t) + tau3 * H(t) - mu_h * R_h(t)),
41+
sp.Eq(S_r(t).diff(t), pi_r - lambda_r * S_r(t) - mu_r * S_r(t)),
42+
sp.Eq(E_r(t).diff(t), lambda_r * S_r(t) - (sigma_r + mu_r) * E_r(t)),
43+
sp.Eq(I_r(t).diff(t), sigma_r * E_r(t) - (delta_r + tau_r + mu_r) * I_r(t)),
44+
sp.Eq(R_r(t).diff(t), tau_r * I_r(t) - mu_r * R_r(t)),
45+
]
46+
47+
concept_data = {
48+
'S_l': {'identifiers': {'ido': '0000514'},
49+
'context': {'severity': 'low', 'species': 'ncbitaxon:9606'}},
50+
'S_h': {'identifiers': {'ido': '0000514'},
51+
'context': {'severity': 'high', 'species': 'ncbitaxon:9606'}},
52+
'E_h': {'identifiers': {'apollosv': '00000154'},
53+
'context': {'species': 'ncbitaxon:9606'}},
54+
'P': {'identifiers': {'ido': '0000511'},
55+
'context': {'stage': 'predromal', 'species': 'ncbitaxon:9606'}},
56+
'I1': {'identifiers': {'ido': '0000511'},
57+
'context': {'stage': 'mild', 'species': 'ncbitaxon:9606'}},
58+
'I2': {'identifiers': {'ido': '0000511'},
59+
'context': {'stage': 'severe', 'species': 'ncbitaxon:9606'}},
60+
'H': {'identifiers': {'ido': '0000511'},
61+
'context': {'hospitalization': 'ncit:C25179', 'species': 'ncbitaxon:9606'}},
62+
'R_h': {'identifiers': {'ido': '0000592'},
63+
'context': {'species': 'ncbitaxon:9606'}},
64+
'S_r': {'identifiers': {'ido': '0000514'},
65+
'context': {'species': 'ncbitaxon:9989'}},
66+
'E_r': {'identifiers': {'apollosv': '00000154'},
67+
'context': {'species': 'ncbitaxon:9989'}},
68+
'I_r': {'identifiers': {'ido': '0000511'},
69+
'context': {'species': 'ncbitaxon:9989'}},
70+
'R_r': {'identifiers': {'ido': '0000592'},
71+
'context': {'species': 'ncbitaxon:9989'}},
72+
}
73+
74+
Now look at the following equations and give me the corresponding concept data:
75+
76+
$ode_insert
77+
78+
Below, there are many more examples of how we annotate various commonly occurring compartments:
79+
80+
{'Ailing': {'identifiers': {'ido': '0000511'},
81+
'context': {'disease_severity': 'ncit:C25269', 'diagnosis': 'ncit:C113725'}},
82+
'asymptomatic': {'identifiers': {'ido': '0000511'},
83+
'context': {'disease_severity': 'ncit:C3833'}},
84+
'Asymptomatic': {'identifiers': {'ido': '0000511'},
85+
'context': {'disease_severity': 'ncit:C3833'}},
86+
'Confirmed': {'identifiers': {'ido': '0000511'},
87+
'context': {'diagnosis': 'ncit:C15220'}},
88+
'Confirmed_Infected': {'identifiers': {'ido': '0000511'},
89+
'context': {'diagnosis': 'ncit:C15220'}},
90+
'dead_corona_nontested': {'identifiers': {'ncit': 'C28554'},
91+
'context': {'diagnosis': 'ncit:C113725', 'cause_of_death': 'ncit:C171133'}},
92+
'dead_corona_tested': {'identifiers': {'ncit': 'C28554'},
93+
'context': {'diagnosis': 'ncit:C15220', 'cause_of_death': 'ncit:C171133'}},
94+
'dead_noncorona': {'identifiers': {'ncit': 'C28554'},
95+
'context': {'cause_of_death': 'ncit:C17649'}},
96+
'deceased': {'identifiers': {'ncit': 'C28554'}, 'context': {}},
97+
'Deceased': {'identifiers': {'ncit': 'C28554'}, 'context': {}},
98+
'Deceased_Counties_neighbouring_counties_with_airports': {'identifiers': {'ncit': 'C28554'},
99+
'context': {'county_property': 'neighbouring_counties_with_airports'}},
100+
'Deceased_Counties_with_airports': {'identifiers': {'ncit': 'C28554'},
101+
'context': {'county_property': 'with_airports'}},
102+
'Deceased_Counties_with_highways': {'identifiers': {'ncit': 'C28554'},
103+
'context': {'county_property': 'with_highways'}},
104+
'Deceased_Low_risk_counties': {'identifiers': {'ncit': 'C28554'},
105+
'context': {'county_property': 'low_risk'}},
106+
'detected': {'identifiers': {'ido': '0000511'},
107+
'context': {'diagnosis': 'ncit:C15220'}},
108+
'Diagnosed': {'identifiers': {'ido': '0000511'},
109+
'context': {'diagnosis': 'ncit:C15220'}},
110+
'Discharged_Counties_neighbouring_counties_with_airports': {'identifiers': {'ido': '0000592'},
111+
'context': {'hospitalization': 'ncit:C154475',
112+
'county_property': 'neighbouring_counties_with_airports'}},
113+
'Discharged_Counties_with_airports': {'identifiers': {'ido': '0000592'},
114+
'context': {'hospitalization': 'ncit:C154475',
115+
'county_property': 'with_airports'}},
116+
'Discharged_Counties_with_highways': {'identifiers': {'ido': '0000592'},
117+
'context': {'hospitalization': 'ncit:C154475',
118+
'county_property': 'with_highways'}},
119+
'Discharged_Low_risk_counties': {'identifiers': {'ido': '0000592'},
120+
'context': {'hospitalization': 'ncit:C154475',
121+
'county_property': 'low_risk'}},
122+
'exposed': {'identifiers': {'apollosv': '00000154'}, 'context': {}},
123+
'Exposed': {'identifiers': {'apollosv': '00000154'}, 'context': {}},
124+
'Exposed_quarantined': {'identifiers': {'apollosv': '00000154'},
125+
'context': {'quarantined': 'ncit:C71902'}},
126+
'Extinct': {'identifiers': {'ncit': 'C28554'}, 'context': {}},
127+
'Fatalities': {'identifiers': {'ncit': 'C28554'}, 'context': {}},
128+
'Healed': {'identifiers': {'ido': '0000592'}, 'context': {}},
129+
'Hospitalised': {'identifiers': {'ido': '0000511'},
130+
'context': {'hospitalization': 'ncit:C25179'}},
131+
'Hospitalised_Counties_neighbouring_counties_with_airports': {'identifiers': {'ido': '0000511'},
132+
'context': {'hospitalization': 'ncit:C25179',
133+
'county_property': 'neighbouring_counties_with_airports',
134+
'icu': 'ncit:C68851'}},
135+
'Hospitalised_Counties_with_airports': {'identifiers': {'ido': '0000511'},
136+
'context': {'hospitalization': 'ncit:C25179',
137+
'county_property': 'with_airports',
138+
'icu': 'ncit:C68851'}},
139+
'Hospitalised_Counties_with_highways': {'identifiers': {'ido': '0000511'},
140+
'context': {'hospitalization': 'ncit:C25179',
141+
'county_property': 'with_highways',
142+
'icu': 'ncit:C68851'}},
143+
'Hospitalised_Low_risk_counties': {'identifiers': {'ido': '0000511'},
144+
'context': {'hospitalization': 'ncit:C25179',
145+
'county_property': 'low_risk',
146+
'icu': 'ncit:C68851'}},
147+
'Hospitalized': {'identifiers': {'ido': '0000511'},
148+
'context': {'hospitalization': 'ncit:C25179',
149+
'disease_severity': 'ncit:C25269'}},
150+
'ICU_Counties_neighbouring_counties_with_airports': {'identifiers': {'ido': '0000511'},
151+
'context': {'hospitalization': 'ncit:C25179',
152+
'icu': 'ncit:C53511',
153+
'county_property': 'neighbouring_counties_with_airports'}},
154+
'ICU_Counties_with_airports': {'identifiers': {'ido': '0000511'},
155+
'context': {'hospitalization': 'ncit:C25179',
156+
'icu': 'ncit:C53511',
157+
'county_property': 'with_airports'}},
158+
'ICU_Counties_with_highways': {'identifiers': {'ido': '0000511'},
159+
'context': {'hospitalization': 'ncit:C25179',
160+
'icu': 'ncit:C53511',
161+
'county_property': 'with_highways'}},
162+
'ICU_Low_risk_counties': {'identifiers': {'ido': '0000511'},
163+
'context': {'hospitalization': 'ncit:C25179',
164+
'icu': 'ncit:C53511',
165+
'county_property': 'low_risk'}},
166+
'Infected': {'identifiers': {'ido': '0000511'}, 'context': {}},
167+
'Infected_Asymptomatic': {'identifiers': {'ido': '0000511'},
168+
'context': {'disease_severity': 'ncit:C3833'}},
169+
'Infected_Counties_neighbouring_counties_with_airports': {'identifiers': {'ido': '0000511'},
170+
'context': {'county_property': 'neighbouring_counties_with_airports'}},
171+
'Infected_Counties_with_airports': {'identifiers': {'ido': '0000511'},
172+
'context': {'county_property': 'with_airports'}},
173+
'Infected_Counties_with_highways': {'identifiers': {'ido': '0000511'},
174+
'context': {'county_property': 'with_highways'}},
175+
'Infected_Low_risk_counties': {'identifiers': {'ido': '0000511'},
176+
'context': {'county_property': 'low_risk'}},
177+
'infected_nontested': {'identifiers': {'ido': '0000511'},
178+
'context': {'diagnosed': 'ncit:C113725'}},
179+
'Infected_quarantined': {'identifiers': {'ido': '0000511'},
180+
'context': {'quarantined': 'ncit:C71902'}},
181+
'Infected_reported': {'identifiers': {'ido': '0000511'},
182+
'context': {'diagnosis': 'ncit:C15220'}},
183+
'Infected_strong_immune_system': {'identifiers': {'ido': '0000511'},
184+
'context': {'immune_system': 'ncit:C62223'}},
185+
'Infected_Symptomatic': {'identifiers': {'ido': '0000511'},
186+
'context': {'disease_severity': 'ncit:C25269'}},
187+
'infected_tested': {'identifiers': {'ido': '0000511'},
188+
'context': {'diagnosis': 'ncit:C15220'}},
189+
'Infected_unreported': {'identifiers': {'ido': '0000511'},
190+
'context': {'diagnosed': 'ncit:C113725'}},
191+
'Infected_weak_immune_system': {'identifiers': {'ido': '0000511'},
192+
'context': {'immune_system': 'ncit:C62224'}},
193+
'Infectious': {'identifiers': {'ido': '0000511'},
194+
'context': {'transmissibility': 'ncit:C25376'}},
195+
'Pathogen': {'identifiers': {'ncit': 'C80324'}, 'context': {}},
196+
'Quarantined': {'identifiers': {'ido': '0000511'},
197+
'context': {'quarantined': 'ncit:C71902'}},
198+
'Quarantined_Infected': {'identifiers': {'ido': '0000511'},
199+
'context': {'quarantined': 'ncit:C71902'}},
200+
'Recognized': {'identifiers': {'ido': '0000511'},
201+
'context': {'diagnosis': 'ncit:C15220'}},
202+
'recovered': {'identifiers': {'ido': '0000592'}, 'context': {}},
203+
'Recovered': {'identifiers': {'ido': '0000592'}, 'context': {}},
204+
'Recovered_Counties_neighbouring_counties_with_airports': {'identifiers': {'ido': '0000592'},
205+
'context': {'county_property': 'neighbouring_counties_with_airports'}},
206+
'Recovered_Counties_with_airports': {'identifiers': {'ido': '0000592'},
207+
'context': {'county_property': 'with_airports'}},
208+
'Recovered_Counties_with_highways': {'identifiers': {'ido': '0000592'},
209+
'context': {'county_property': 'with_highways'}},
210+
'Recovered_Low_risk_counties': {'identifiers': {'ido': '0000592'},
211+
'context': {'county_property': 'low_risk'}},
212+
'recovered_nontested': {'identifiers': {'ido': '0000592'},
213+
'context': {'diagnosis': 'ncit:C113725'}},
214+
'recovered_tested': {'identifiers': {'ido': '0000592'},
215+
'context': {'diagnosis': 'ncit:C15220'}},
216+
'Removed': {'identifiers': {'ido': '0000592'}, 'context': {}},
217+
'Super_spreaders': {'identifiers': {'ido': '0000511'},
218+
'context': {'transmissibility': 'ncit:C49508'}},
219+
'Susceptible': {'identifiers': {'ido': '0000514'}, 'context': {}},
220+
'susceptible': {'identifiers': {'ido': '0000514'}, 'context': {}},
221+
'Susceptible_confined': {'identifiers': {'ido': '0000514'},
222+
'context': {'quarantined': 'ncit:C71902'}},
223+
'Susceptible_Counties_neighbouring_counties_with_airports': {'identifiers': {'ido': '0000514'},
224+
'context': {'county_property': 'neighbouring_counties_with_airports'}},
225+
'Susceptible_Counties_with_airports': {'identifiers': {'ido': '0000514'},
226+
'context': {'county_property': 'with_airports'}},
227+
'Susceptible_Counties_with_highways': {'identifiers': {'ido': '0000514'},
228+
'context': {'county_property': 'with_highways'}},
229+
'Susceptible_isolated': {'identifiers': {'ido': '0000514'},
230+
'context': {'quarantined': 'ncit:C71902'}},
231+
'Susceptible_Low_risk_counties': {'identifiers': {'ido': '0000514'},
232+
'context': {'county_property': 'low_risk'}},
233+
'Susceptible_quarantined': {'identifiers': {'ido': '0000514'},
234+
'context': {'quarantined': 'ncit:C71902'}},
235+
'Susceptible_unconfined': {'identifiers': {'ido': '0000514'},
236+
'context': {'quarantined': 'ncit:C68851'}},
237+
'symptomatic': {'identifiers': {'ido': '0000511'},
238+
'context': {'disease_severity': 'ncit:C25269'}},
239+
'symptoms_nontested': {'identifiers': {'ido': '0000511'},
240+
'context': {'disease_severity': 'ncit:C25269', 'diagnosed': 'ncit:C113725'}},
241+
'symptoms_tested': {'identifiers': {'ido': '0000511'},
242+
'context': {'disease_severity': 'ncit:C25269', 'diagnosis': 'ncit:C15220'}},
243+
'Threatened': {'identifiers': {'ido': '0000511'},
244+
'context': {'disease_severity': 'ncit:C25467'}},
245+
'Total_population': {'identifiers': {'ido': '0000509'}, 'context': {}},
246+
'uninfected_nontested': {'identifiers': {'ido': '0000514'},
247+
'context': {'diagnosis': 'ncit:C113725'}},
248+
'uninfected_tested': {'identifiers': {'ido': '0000514'},
249+
'context': {'diagnosis': 'ncit:C15220'}},
250+
'Unquarantined_Infected': {'identifiers': {'ido': '0000511'},
251+
'context': {'quarantined': 'ncit:C68851'}}}
252+
253+
Please only respond with the code snippet defining the concept data.
254+
""")

0 commit comments

Comments
 (0)