diff --git a/notebooks/demo.ipynb b/notebooks/demo.ipynb index 528dde4918f43de23e65de008786e3abe21d0d0e..383798919e142896b743c8bcf9398af301abf3aa 100644 --- a/notebooks/demo.ipynb +++ b/notebooks/demo.ipynb @@ -10,43 +10,12 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "id": "5b441713-77b4-4e57-a188-9959c09784bd", "metadata": { - "execution": { - "iopub.execute_input": "2022-02-11T10:40:43.234120Z", - "iopub.status.busy": "2022-02-11T10:40:43.233485Z", - "iopub.status.idle": "2022-02-11T10:40:44.641758Z", - "shell.execute_reply": "2022-02-11T10:40:44.641057Z", - "shell.execute_reply.started": "2022-02-11T10:40:43.234047Z" - } + "tags": [] }, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "2022-02-11 11:40:43,461 - WARNING - tvb.simulator.common - psutil module not available: no warnings will be issued when a\n", - " simulation may require more memory than available\n", - " INFO log level set to INFO\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/home/izaak/local_repos/nostromo/tvb-ebrains-data/env/lib/python3.6/site-packages/tvb/datatypes/surfaces.py:63: UserWarning: Geodesic distance module is unavailable; some functionality for surfaces will be unavailable.\n", - " warnings.warn(msg)\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING Token required outside collaboratory environment. Set EBRAINS_TOKEN in the environment or provide directly.\n" - ] - } - ], + "outputs": [], "source": [ "from tvb_ebrains_data import Brains1000Dataset" ] @@ -374,11 +343,136 @@ "source": [ "W.shape, D.shape" ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "896ad8f3-c824-48d7-a2dc-8503a808294e", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "from tvb_ebrains_data import HCPDomhof" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "4e730e39-68cf-4abc-b0dd-14abd74e82e3", + "metadata": { + "tags": [] + }, + "outputs": [], + "source": [ + "dset = HCPDomhof(data_root='.')" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "4e58c1d7-743d-41aa-9a43-bf7b90cc195d", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "data": { + "text/plain": [ + "['093', '124', '106', '020', '077']" + ] + }, + "execution_count": 6, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dset.subjects[:5]" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "5dcd7edd-8067-4d37-a55b-46a831279b75", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(1200, 200)" + ] + }, + "execution_count": 7, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dset.load_bold('093').shape" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "a1a6c4ef-5a0f-4f81-a952-c0117c168e85", + "metadata": { + "tags": [] + }, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on method load_sc in module tvb_ebrains_data.data:\n", + "\n", + "load_sc(subj) method of tvb_ebrains_data.data.HCPDomhof instance\n", + " Load structural connectivity.\n", + " \n", + " Parameters\n", + " ----------\n", + " subj : str\n", + " Subject id from `self.subjects`.\n", + " \n", + " Returns\n", + " -------\n", + " weights: ndarray\n", + " Weights matrix [N,N].\n", + " tract_length: ndarray\n", + " Tract length matrix [N,N].\n", + "\n" + ] + } + ], + "source": [ + "help(dset.load_sc)" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "6997cabe-3541-4ea5-8dff-cf635c853473", + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(200, 200)" + ] + }, + "execution_count": 10, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dset.load_sc('093')[0].shape" + ] } ], "metadata": { "kernelspec": { - "display_name": "Python 3", + "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, @@ -392,7 +486,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.6.9" + "version": "3.8.16" } }, "nbformat": 4, diff --git a/setup.py b/setup.py index e67e2ffaed5470af644c2b63895fe749fd0e6a58..93d8c0b28b7eeb74aeeed091d4c73d4138758a49 100644 --- a/setup.py +++ b/setup.py @@ -6,5 +6,6 @@ setup( version='0.2.2', install_requires=[ 'tvb-library', + 'pooch', ] ) diff --git a/tvb_ebrains_data/__init__.py b/tvb_ebrains_data/__init__.py index fdbce0343b566dc34733a40da5f53e9194b32606..437098fe26e6f43dd2905fd1da7481bb7849c0bc 100644 --- a/tvb_ebrains_data/__init__.py +++ b/tvb_ebrains_data/__init__.py @@ -1,4 +1,5 @@ from .data import HCPDataset +from .data import HCPDomhof from .data import Brains1000Dataset diff --git a/tvb_ebrains_data/data.py b/tvb_ebrains_data/data.py index e599f3c08454de1537a04dde46bcd7cfb826dbfc..740d5c889a420b9848585cab83a3e2c67410302f 100644 --- a/tvb_ebrains_data/data.py +++ b/tvb_ebrains_data/data.py @@ -3,6 +3,8 @@ import os import io import requests import numpy as np +import pooch +from glob import glob from tvb.simulator.lab import * logger = logging.getLogger(__name__) @@ -132,6 +134,108 @@ class HCPDataset(DataProxyConnectivityDataset): return W, D +class HCPDomhof(): + """ + - https://doi.org/10.25493/NVS8-XS5 + - https://doi.org/10.25493/F9DP-WCQ + + Available parcellations: + 031-MIST 038-CraddockSCorr2Level + 048-HarvardOxfordMaxProbThr0 056-CraddockSCorr2Level + 056-MIST 070-DesikanKilliany + 079-Shen2013 086-EconomoKoskinas + 092-AALV2 096-HarvardOxfordMaxProbThr0 + 100-Schaefer17Networks 103-MIST + 108-CraddockSCorr2Level 150-Destrieux + 156-Shen2013 160-CraddockSCorr2Level + 167-MIST 200-Schaefer17Networks + 210-Brainnetome 294-Julich-Brain + + The tsv files for the time-series are double space separated and have a trailing space ?? + ``` + find . -name '*.tsv' -exec sed -i 's/[ \t]*$//' {} \; + find . -name '*.tsv' -exec sed -i 's/ /,/g' {} \; + ``` + """ + parcellations = { + '031-MIST' : (None, None), # connectivity, bold + '038-CraddockSCorr2Level' : (None, None), + '048-HarvardOxfordMaxProbThr0' : (None, None), + '056-CraddockSCorr2Level' : (None, None), + '056-MIST' : (None, None), + '070-DesikanKilliany' : ('419c59ab4e01059265cad42e5e68d7e58b0381bc27fe47ad4d4218358ba76280', + '26812c39d3963924d3c8fdf65dc3e08b6be13e7d6a5f91e7a13734714958f5fc'), + '079-Shen2013' : (None, None), + '086-EconomoKoskinas' : (None, None), + '092-AALV2' : (None, None), + '096-HarvardOxfordMaxProbThr0' : (None, None), + '100-Schaefer17Networks' : ('de583e85dd4aa1c0521d61d73f290ad6da3ce4cd5b3538c62b7630f03e438157', + '650d5bf9a103299c6505a129051b5bc54a413a9d5ddffb574a4d165c12b1457f'), + '103-MIST' : (None, None), + '108-CraddockSCorr2Level' : (None, None), + '150-Destrieux' : (None, None), + '156-Shen2013' : (None, None), + '160-CraddockSCorr2Level' : (None, None), + '167-MIST' : (None, None), + '200-Schaefer17Networks' : ('5086f4b3405acff84ffe132cee17c67a90000a3fae98da50d4e14fb55d7f5d57', + 'md5:1f25b912465fe651f5338a7f106f5fe0'), + '210-Brainnetome' : (None, None), + '294-Julich-Brain' : (None, None), + } + + def __init__(self, data_root, parcellation='200-Schaefer17Networks'): + self.data_root = data_root + assert parcellation in self.parcellations, 'Incorrect parcellation name.' + _ = pooch.retrieve( + url=f'https://object.cscs.ch/v1/AUTH_227176556f3c4bb38df9feea4b91200c/hbp-d000067_Atlas_based_HCP_BOLD_pub/v1.0/{parcellation}.zip', + known_hash=self.parcellations[parcellation][1], + path=os.path.join(self.data_root, 'bold'), + processor=pooch.Unzip(extract_dir='.') + ) + _ = pooch.retrieve( + url=f'https://object.cscs.ch/v1/AUTH_227176556f3c4bb38df9feea4b91200c/hbp-d000059_Atlas_based_HCP_connectomes_v1.1_pub/{parcellation}.zip', + known_hash=self.parcellations[parcellation][0], + path=os.path.join(self.data_root, 'connectivity'), + processor=pooch.Unzip(extract_dir='.') + ) + self.subjects = [os.path.basename(p) for p in glob(os.path.join(self.data_root, 'bold', parcellation, '*[0-9]')) ] + self.parcellation = parcellation + + def load_sc(self, subj): + """ + Load structural connectivity. + + Parameters + ---------- + subj : str + Subject id from `self.subjects`. + + Returns + ------- + weights: ndarray + Weights matrix [N,N]. + tract_length: ndarray + Tract length matrix [N,N]. + """ + weights = np.loadtxt( + os.path.join(self.data_root, 'connectivity', self.parcellation, '1StructuralConnectivity', subj, 'Counts.csv'), + delimiter=' ', + ) + tract_lengths = np.loadtxt( + os.path.join(self.data_root, 'connectivity', self.parcellation, '1StructuralConnectivity', subj, 'Lengths.csv'), + delimiter=' ', + ) + return weights, tract_lengths + + def load_bold(self, subj): + bold = np.loadtxt( + os.path.join(self.data_root, 'bold', self.parcellation, subj, 'rfMRI_REST1_LR_BOLD.tsv'), + ) + return bold + + + + class Brains1000Dataset(DataProxyConnectivityDataset): """