Skip to content

Commit

Permalink
WIP
Browse files Browse the repository at this point in the history
  • Loading branch information
arcones committed Nov 7, 2023
1 parent cf283ee commit 159f2de
Show file tree
Hide file tree
Showing 10 changed files with 273 additions and 188 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,10 @@ Server will listen in port 8000. You can check the functionality with this examp
```shell
curl 'localhost:8000/query-study-hierarchy?keyword=stroke%20AND%20single%20cell%20rna%20seq%20AND%20musculus'
```

## TODO Handle several SRP for one GSE situation
## TODO Unit tests, prettyhttp
## TODO quitar trazas del connectionpool
## TODO crear tarea en trello de paralalelizar las parejas de GET y POST
## TODO tipado de todo, parametros y tipos de retorno, named params
## TODO estilo de comillas coherente
10 changes: 6 additions & 4 deletions kilombo/model/study_hierarchy.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,17 +29,19 @@ def move_study_to_failed(self, failed_study: FailedStudy):
self.count_total += 1
assert len(self.successful) + len(self.failed) == self.count_total

def move_study_to_successful(self, study_id, srp):
def move_study_to_successful(self, study_id, srps):
self.successful[study_id] = {}
self.successful[study_id]["gse"] = self.pending[study_id]["GSE"]
self.successful[study_id]["srp"] = srp
if "srps" not in self.pending[study_id]:
self.successful[study_id]["srps"] = []
self.successful[study_id]["srps"].append({srps: []})
self.count_successful += 1
assert len(self.successful) == self.count_successful
self.count_total += 1
assert len(self.successful) + len(self.failed) == self.count_total

def add_srrs(self, study_id, srrs: []):
self.successful[study_id]["srrs"] = srrs
def add_srrs(self, study_id, srp, srrs: []):
self.successful[study_id]["srps"][srp] = srrs

def reconcile(self):
self._clean_pending_studies_already_processed()
Expand Down
169 changes: 0 additions & 169 deletions kilombo/service/external/ncbi.py

This file was deleted.

Empty file.
29 changes: 29 additions & 0 deletions kilombo/service/external/ncbi/esearch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
from kilombo.service.external.ncbi.ncbi_request import NCBIRequest


class ESearch(NCBIRequest):
def __init__(self):
super().__init__()
self.base_url = f"{self.utils_base_url}/esearch.fcgi?retmode=json"

def esearch_get_studies(self, db, term): ## Unit test
study_id_batches = self.paginated_request("GET", self.base_url + f"&db={db}&term={term}")
study_ids = []
for batch in study_id_batches:
study_ids += study_id_batches[batch]["esearchresult"]["idlist"]
return study_ids

def esearch_post_webenv(self, db: str, term: str):
payload = {"db": db, "usehistory": "n", "retmode": "json", "term": term}
response = self.paginated_request("POST", self.base_url, payload)
return response["esearchresult"]["webenv"]

def _is_pending_data_to_retrieve(self, responses: {}): ## Unit test
if not responses:
return True
for response in responses.values():
response_length = len(response["esearchresult"]["idlist"])
if response_length < self.batch_size:
return False
else:
return True
35 changes: 35 additions & 0 deletions kilombo/service/external/ncbi/esummary.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
import json
import logging

import aiohttp

from kilombo.service.external.ncbi.ncbi_request import NCBIRequest


class ESummary(NCBIRequest):
def __init__(self):
super().__init__()
self.base_url = f"{self.utils_base_url}/esummary.fcgi?retmode=json"

# def _is_all_data_retrieved(self, responses: {}):
# return True

async def esummary_get_study(self, db: str, study_id: int): ## TODO pending pagination here and split this method responsibility
logging.debug(f"Started get summary for study ==> {study_id}")
unauthenticated_url = f"{self.base_url}&db={db}&id={study_id}&retmax=10000"
retries_count = 1
while retries_count < self.retry_max:
api_key = self.api_keys[0] if retries_count % 2 == 0 else self.api_keys[1]
url = unauthenticated_url + f"&api_key={api_key}"
async with aiohttp.ClientSession() as session:
logging.debug(f"HTTP GET Started ==> {url}")
async with session.get(url) as response:
if response.status == 200:
logging.debug(f"Done get summary in retry #{retries_count} ==> {study_id}")
response = json.loads(await response.text())
logging.debug(f"HTTP GET Done ==> {url} with response {response}")
return response
else:
retries_count += 1
logging.debug(f"Get a {response.status} from {url}, retries count incremented to {retries_count}")
raise Exception(f"Unable to fetch {study_id} in {self.retry_max} attempts")
Loading

0 comments on commit 159f2de

Please sign in to comment.