diff --git a/HCK21_2024_Janelia_NDRH/README.md b/HCK21_2024_Janelia_NDRH/README.md index 4b02fa418..8471afd75 100644 --- a/HCK21_2024_Janelia_NDRH/README.md +++ b/HCK21_2024_Janelia_NDRH/README.md @@ -89,6 +89,11 @@ Thanks to the generous sponsorship of The Janelia Research Campus, this event wi Resources will be posted here to help participants prepare for the event. +* Search API tutorials + * All slides: [Google Slides](https://drive.google.com/drive/folders/1DAmQr4qWCamhj_2Zyke0kkHGvV8Kyq13?usp=sharing) + * [Basic API tutorial](tutorials/simple_dandiset_search.ipynb) + * [Advanced API tutorial](tutorials/advanced_asset_search.ipynb) + * A report of the first NeuroDataReHack event: ([PDF](../HCK14_2022_Seattle_RH/report/Report_Neurodata_Rehack_v2.pdf)) * A report of the second NeuroDataReHack event: ([PDF](../HCK16_2023_Granada_RH/report/Report__NeuroDataReHack_2023.pdf)) * Recordings of talks will be made available on the [NWB Youtube channel](https://www.youtube.com/channel/UCfD_mU-EFz135a9TpNFJP5A). diff --git a/HCK21_2024_Janelia_NDRH/tutorials/advanced_asset_search.ipynb b/HCK21_2024_Janelia_NDRH/tutorials/advanced_asset_search.ipynb new file mode 100644 index 000000000..ad2f18c9f --- /dev/null +++ b/HCK21_2024_Janelia_NDRH/tutorials/advanced_asset_search.ipynb @@ -0,0 +1,538 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "62326652-fb78-4838-9bbd-6f29e09793fc", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "import json\n", + "import numpy as np\n", + "from dandi.dandiapi import DandiAPIClient\n", + "from tqdm.notebook import tqdm\n", + "from isodate import parse_duration, Duration\n", + "from datetime import datetime" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "3c949acf-7661-41ec-801b-7f7aec844ee0", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "client = DandiAPIClient()\n", + "dandisets = list(client.get_dandisets())" + ] + }, + { + "cell_type": "markdown", + "id": "6a226e74-0f13-40c6-9e8d-9a3eb3fbd3ad", + "metadata": { + "tags": [] + }, + "source": [ + "# More specific identification of NWB dandisets\n", + "\n", + "The simpler tutorial only tested if the phrase \"NWB\" was in the name of any of the data standards for a dandiset.\n", + "\n", + "The more official and precise method is to use the specific [RRID of NWB](https://scicrunch.org/resolver/RRID:SCR_015242), which is `\"RRID:SCR_015242\"`." + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "01b611f1-6d7f-40d7-8922-26eb1c5b2264", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "a2988d690aff4294aaf1a13e597bd6df", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/465 [00:00 float:\n", + " \"\"\"\n", + " Defining a helper function which parses the ISO 8601 age and returns it in float-valued seconds.\n", + " \n", + " This is because a dattetime.timedelta can only return either its `.days` (integer, rounded down) or\n", + " its `total_seconds()`.\n", + " \n", + " This helper also resolves some complications that can arise in other datasets when the age is measured in years,\n", + " or if the the age is a range.\n", + " \"\"\"\n", + " if \"/\" in age_iso: # Some ages can be have upper and lower ranges due to uncertainty\n", + " return # Skip\n", + "\n", + " age_duration = parse_duration(datestring=age_iso)\n", + "\n", + " if isinstance(age_duration, Duration):\n", + " experiment_datetime = datetime.fromisoformat(experiment_date)\n", + " time_delta = age_duration.totimedelta(end=experiment_datetime)\n", + " else:\n", + " time_delta = age_duration\n", + "\n", + " return time_delta.total_seconds() / ( # Evaluate using the total number of seconds\n", + " 60 * # 60 seconds per minute\n", + " 60 * # 60 minutes per hour\n", + " 24 # 24 hours per day (ignoring daylight savings time)\n", + " )\n", + "\n", + "\n", + "all_subject_ages_in_days = []\n", + "\n", + "dandiset = client.get_dandiset(\"000398\")\n", + "assets = list(dandiset.get_assets())\n", + "for asset in tqdm(assets):\n", + " raw_metadata = asset.get_raw_metadata()\n", + " subjects = raw_metadata[\"wasAttributedTo\"]\n", + "\n", + " for subject_metadata in subjects:\n", + " if \"age\" in subject_metadata:\n", + " age_in_days = iso_to_fractional_days(\n", + " age_iso=subject_metadata[\"age\"][\"value\"],\n", + " experiment_date=raw_metadata[\"wasGeneratedBy\"][0][\"startDate\"]\n", + " )\n", + "\n", + " if age_in_days: # Skip if the age is null\n", + " all_subject_ages_in_days.append(age_in_days)\n", + "print(f\"The average age of the subjects in dandiset #398 is: {np.mean(all_subject_ages_in_days)} days\")" + ] + }, + { + "cell_type": "markdown", + "id": "eb82bd5c-cbf1-4312-81a3-da0120a0d154", + "metadata": {}, + "source": [ + "# Count the number of spiking units across all sessions in an experiment\n", + "\n", + "The number of units identified from spike sorting is not something that DANDI extracts automatically during upload...\n", + "\n", + "But we can calculate it ourselves without downloading an entire dandiset!\n", + "\n", + "We do this by streaming directly from the archive, which requires us to retrieve the asset path on the S3 backend of the DANDI archive and then set the `driver` argument to `ros3` (Read-Only S3).\n", + "\n", + "There are several ways to retrieve the S3 path, but the easiest is to use the NWB Inspector helper function `nwbinspector.tools.get_s3_urls_and_dandi_paths`, which will format the path in the way `ros3` expects." + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "2edbd010-1c2c-469c-a492-5e96d6e98a36", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Collecting remfile\n", + " Downloading remfile-0.1.13-py3-none-any.whl.metadata (3.7 kB)\n", + "Requirement already satisfied: numpy in /opt/conda/lib/python3.11/site-packages (from remfile) (1.26.3)\n", + "Requirement already satisfied: h5py in /opt/conda/lib/python3.11/site-packages (from remfile) (3.10.0)\n", + "Requirement already satisfied: requests in /opt/conda/lib/python3.11/site-packages (from remfile) (2.31.0)\n", + "Requirement already satisfied: mpi4py>=3.1.1 in /opt/conda/lib/python3.11/site-packages (from h5py->remfile) (3.1.5)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /opt/conda/lib/python3.11/site-packages (from requests->remfile) (3.3.2)\n", + "Requirement already satisfied: idna<4,>=2.5 in /opt/conda/lib/python3.11/site-packages (from requests->remfile) (3.6)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/conda/lib/python3.11/site-packages (from requests->remfile) (2.0.7)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.11/site-packages (from requests->remfile) (2024.2.2)\n", + "Downloading remfile-0.1.13-py3-none-any.whl (11 kB)\n", + "Installing collected packages: remfile\n", + "Successfully installed remfile-0.1.13\n" + ] + } + ], + "source": [ + "!pip install remfile" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "e23e50af-678f-4517-b0dc-ba10ba85d529", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from warnings import simplefilter\n", + "simplefilter(\"ignore\") # Suppress namespace warnings from reading older NWB files\n", + "\n", + "from nwbinspector.tools import get_s3_urls_and_dandi_paths\n", + "from pynwb import NWBHDF5IO\n", + "import remfile\n", + "import h5py" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "85ac164c-5613-4eaa-8a20-0e2ae870583b", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "0ad4fa87e6fb4bf29cef775cff1b0dd1", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + " 0%| | 0/100 [00:00