Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
motey committed May 18, 2021
1 parent 01ef92e commit ed88ecb
Showing 1 changed file with 3 additions and 2 deletions.
5 changes: 3 additions & 2 deletions dataloader/load_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ def _index_dirs(self, path):
# The filename is made of the paper id (a sha hash of the origin pdf)
file_id = os.path.splitext(os.path.basename(file))[0]
self._index[file_id] = os.path.join(root, file)
log.info(f"Indexed {len(list(self._index.keys()))} json files")

def get_full_text_paper_pathes(self, paper_sha, paper_pmcid):
pathes = []
Expand Down Expand Up @@ -507,7 +508,7 @@ def load_data():

if __name__ == "__main__":
# with CodeTimer(unit="s"):
# load_data_mp(config.NO_OF_PROCESSES, config.PAPER_BATCH_SIZE)
load_data_mp(1, 1)
load_data_mp(config.NO_OF_PROCESSES, config.PAPER_BATCH_SIZE)
# load_data_mp(1, 1)
# load_data()

0 comments on commit ed88ecb

Please sign in to comment.