Domhof HCP dataset

47181e2c · Jan Fousek · f2a35c6b · 47181e2c · 47181e2c · 47181e2c
Commit 47181e2c authored 1 year ago by Jan Fousek
--- a/notebooks/demo.ipynb
+++ b/notebooks/demo.ipynb
@@ -10,43 +10,12 @@
  },
  {
   "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
   "id": "5b441713-77b4-4e57-a188-9959c09784bd",
   "metadata": {
-    "execution": {
-     "iopub.execute_input": "2022-02-11T10:40:43.234120Z",
-     "iopub.status.busy": "2022-02-11T10:40:43.233485Z",
-     "iopub.status.idle": "2022-02-11T10:40:44.641758Z",
-     "shell.execute_reply": "2022-02-11T10:40:44.641057Z",
-     "shell.execute_reply.started": "2022-02-11T10:40:43.234047Z"
-    }
+    "tags": []
   },
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "2022-02-11 11:40:43,461 - WARNING - tvb.simulator.common - psutil module not available: no warnings will be issued when a\n",
-      "    simulation may require more memory than available\n",
-      "   INFO  log level set to INFO\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/izaak/local_repos/nostromo/tvb-ebrains-data/env/lib/python3.6/site-packages/tvb/datatypes/surfaces.py:63: UserWarning: Geodesic distance module is unavailable; some functionality for surfaces will be unavailable.\n",
-      "  warnings.warn(msg)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "WARNING  Token required outside collaboratory environment. Set EBRAINS_TOKEN in the environment or provide directly.\n"
-     ]
-    }
-   ],
+   "outputs": [],
   "source": [
    "from tvb_ebrains_data import Brains1000Dataset"
   ]
@@ -374,11 +343,136 @@
   "source": [
    "W.shape, D.shape"
   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "id": "896ad8f3-c824-48d7-a2dc-8503a808294e",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "from tvb_ebrains_data import HCPDomhof"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "id": "4e730e39-68cf-4abc-b0dd-14abd74e82e3",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [],
+   "source": [
+    "dset = HCPDomhof(data_root='.')"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "id": "4e58c1d7-743d-41aa-9a43-bf7b90cc195d",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['093', '124', '106', '020', '077']"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dset.subjects[:5]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "id": "5dcd7edd-8067-4d37-a55b-46a831279b75",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(1200, 200)"
+      ]
+     },
+     "execution_count": 7,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dset.load_bold('093').shape"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "a1a6c4ef-5a0f-4f81-a952-c0117c168e85",
+   "metadata": {
+    "tags": []
+   },
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Help on method load_sc in module tvb_ebrains_data.data:\n",
+      "\n",
+      "load_sc(subj) method of tvb_ebrains_data.data.HCPDomhof instance\n",
+      "    Load structural connectivity.\n",
+      "    \n",
+      "    Parameters\n",
+      "    ----------\n",
+      "        subj : str\n",
+      "            Subject id from `self.subjects`.\n",
+      "    \n",
+      "    Returns\n",
+      "    -------\n",
+      "        weights: ndarray\n",
+      "            Weights matrix [N,N].\n",
+      "        tract_length: ndarray\n",
+      "            Tract length matrix [N,N].\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "help(dset.load_sc)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "id": "6997cabe-3541-4ea5-8dff-cf635c853473",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(200, 200)"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "dset.load_sc('093')[0].shape"
+   ]
  }
 ],
 "metadata": {
  "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
@@ -392,7 +486,7 @@
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
-   "version": "3.6.9"
+   "version": "3.8.16"
  }
 },
 "nbformat": 4,

 %% Cell type:markdown id:ef541df6-5c1a-4a0f-a069-24f110a7bf1d tags:

 ## 1000BRAINS study, connectivity data

 %% Cell type:code id:5b441713-77b4-4e57-a188-9959c09784bd tags:

 ``` python
 from tvb_ebrains_data import Brains1000Dataset
 ```

-%% Output
-
-    2022-02-11 11:40:43,461 - WARNING - tvb.simulator.common - psutil module not available: no warnings will be issued when a
-        simulation may require more memory than available
-       INFO  log level set to INFO
-
-    /home/izaak/local_repos/nostromo/tvb-ebrains-data/env/lib/python3.6/site-packages/tvb/datatypes/surfaces.py:63: UserWarning: Geodesic distance module is unavailable; some functionality for surfaces will be unavailable.
-      warnings.warn(msg)
-
-    WARNING  Token required outside collaboratory environment. Set EBRAINS_TOKEN in the environment or provide directly.
-
 %% Cell type:code id:2f47000a-9bbe-4ecb-bac1-b09490af7d7b tags:

 ``` python
 dataset = Brains1000Dataset()
 print(dataset.__doc__)
 ```

 %% Output

    
        Caspers, S. et al (2021).
        1000BRAINS study, connectivity data.
    
        v1.0: https://doi.org/10.25493/61QA-KP8
        v1.1: https://doi.org/10.25493/6640-3XH
    

 %% Cell type:code id:af94d470-386c-4b33-ab1a-5c4b9482c535 tags:

 ``` python
 dataset.have_access()
 ```

 %% Output

    True

 %% Cell type:markdown id:38af980a-7a02-4a13-947a-729f84494c96 tags:

 If the call above returns `False`, the access request can be triggered by visiting the KG page of the dataset, or calling:

 ```python
 dataset.request_access()
 ```

 %% Cell type:code id:f608b41a-2bff-47b6-b30d-c99810cdb1f4 tags:

 ``` python
 subjs = dataset.list_subjects()
 ```

 %% Cell type:code id:e4c15491-0fbe-4339-b11c-dd9305a3be9d tags:

 ``` python
 len(set(subjs)), subjs[:10]
 ```

 %% Output

    (1031,
     ['sub_00000',
      'sub_00001',
      'sub_00002',
      'sub_00003',
      'sub_00004',
      'sub_00005',
      'sub_00006',
      'sub_00007',
      'sub_00008',
      'sub_00009'])

 %% Cell type:code id:c6c583f3-8efe-4758-9ebb-6e9d9681cbb1 tags:

 ``` python
 W = dataset.load_sc('sub_00008')
 ```

 %% Cell type:code id:4709f50f-8284-4a41-9de7-c357a9514399 tags:

 ``` python
 W.shape
 ```

 %% Output

    (100, 100)

 %% Cell type:markdown id:d5996ac2-f8cd-49ac-91a6-cd8c0938e15d tags:

 ## Parcellation-based structural connectomes (HCP)

 %% Cell type:code id:2e5431ed-cbfe-42bb-9357-b4f292eae0cf tags:

 ``` python
 from tvb_ebrains_data import HCPDataset
 ```

 %% Cell type:code id:8954ff48-9b28-4dd9-95f6-02de0924e817 tags:

 ``` python
 dataset = HCPDataset()
 ```

 %% Cell type:code id:a9c1fd3c-fac1-4208-bc97-d18b375263d0 tags:

 ``` python
 print(dataset.__doc__)
 ```

 %% Output

    
        Domhof, J. W. M., Jung, K., Eickhoff, S. B., & Popovych, O. V. (2021).
        Parcellation-based structural and resting-state functional brain
        connectomes of a healthy cohort [Data set]. EBRAINS.
    
        https://doi.org/10.25493%2F81EV-ZVT
    

 %% Cell type:code id:fc84c8f1-5dc9-47ed-8df2-f14a747fa709 tags:

 ``` python
 dataset.list_parcellations()
 ```

 %% Output

    ['031-MIST',
     '038-CraddockSCorr2Level',
     '048-HarvardOxfordMaxProbThr0',
     '056-CraddockSCorr2Level',
     '056-MIST',
     '070-DesikanKilliany',
     '079-Shen2013',
     '086-EconomoKoskinas',
     '092-AALV2',
     '096-HarvardOxfordMaxProbThr0',
     '100-Schaefer17Networks',
     '103-MIST',
     '108-CraddockSCorr2Level',
     '150-Destrieux',
     '156-Shen2013',
     '160-CraddockSCorr2Level',
     '167-MIST',
     '200-Schaefer17Networks',
     '210-Brainnetome']

 %% Cell type:code id:286635b0-99e2-42c2-889a-dc1126752df7 tags:

 ``` python
 subjs = dataset.list_subjects(parcellation='200-Schaefer17Networks')
 len(subjs), subjs[:10]
 ```

 %% Output

    (200, ['000', '001', '002', '003', '004', '005', '006', '007', '008', '009'])

 %% Cell type:code id:00af414e-941a-408d-ae3a-0010f430e00a tags:

 ``` python
 W, D = dataset.load_sc(subject='003', parcellation='200-Schaefer17Networks')
 ```

 %% Cell type:code id:44bfe644-8822-4075-b828-844c92d8b507 tags:

 ``` python
 W.shape, D.shape
 ```

 %% Output

    ((200, 200), (200, 200))
+
+%% Cell type:code id:896ad8f3-c824-48d7-a2dc-8503a808294e tags:
+
+``` python
+from tvb_ebrains_data import HCPDomhof
+```
+
+%% Cell type:code id:4e730e39-68cf-4abc-b0dd-14abd74e82e3 tags:
+
+``` python
+dset = HCPDomhof(data_root='.')
+```
+
+%% Cell type:code id:4e58c1d7-743d-41aa-9a43-bf7b90cc195d tags:
+
+``` python
+dset.subjects[:5]
+```
+
+%% Output
+
+    ['093', '124', '106', '020', '077']
+
+%% Cell type:code id:5dcd7edd-8067-4d37-a55b-46a831279b75 tags:
+
+``` python
+dset.load_bold('093').shape
+```
+
+%% Output
+
+    (1200, 200)
+
+%% Cell type:code id:a1a6c4ef-5a0f-4f81-a952-c0117c168e85 tags:
+
+``` python
+help(dset.load_sc)
+```
+
+%% Output
+
+    Help on method load_sc in module tvb_ebrains_data.data:
+    
+    load_sc(subj) method of tvb_ebrains_data.data.HCPDomhof instance
+        Load structural connectivity.
+    
+        Parameters
+        ----------
+            subj : str
+                Subject id from `self.subjects`.
+    
+        Returns
+        -------
+            weights: ndarray
+                Weights matrix [N,N].
+            tract_length: ndarray
+                Tract length matrix [N,N].
+    
+
+%% Cell type:code id:6997cabe-3541-4ea5-8dff-cf635c853473 tags:
+
+``` python
+dset.load_sc('093')[0].shape
+```
+
+%% Output
+
+    (200, 200)

--- a/setup.py
+++ b/setup.py
@@ -6,5 +6,6 @@ setup(
    version='0.2.2',
    install_requires=[
        'tvb-library',
+        'pooch',
    ]
 )
--- a/tvb_ebrains_data/__init__.py
+++ b/tvb_ebrains_data/__init__.py
 from .data import HCPDataset
+from .data import HCPDomhof
 from .data import Brains1000Dataset



--- a/tvb_ebrains_data/data.py
+++ b/tvb_ebrains_data/data.py
@@ -3,6 +3,8 @@ import os
 import io
 import requests
 import numpy as np
+import pooch
+from glob import glob
 from tvb.simulator.lab import *

 logger = logging.getLogger(__name__)
@@ -132,6 +134,108 @@ class HCPDataset(DataProxyConnectivityDataset):
        return W, D


+class HCPDomhof():
+    """
+    - https://doi.org/10.25493/NVS8-XS5
+    - https://doi.org/10.25493/F9DP-WCQ
+
+    Available parcellations: 
+       031-MIST                      038-CraddockSCorr2Level
+       048-HarvardOxfordMaxProbThr0  056-CraddockSCorr2Level
+       056-MIST                      070-DesikanKilliany 
+       079-Shen2013                  086-EconomoKoskinas 
+       092-AALV2                     096-HarvardOxfordMaxProbThr0
+       100-Schaefer17Networks        103-MIST 
+       108-CraddockSCorr2Level       150-Destrieux 
+       156-Shen2013                  160-CraddockSCorr2Level
+       167-MIST                      200-Schaefer17Networks 
+       210-Brainnetome               294-Julich-Brain
+
+    The tsv files for the time-series are double space separated and have a trailing space ??
+    ```
+    find . -name '*.tsv' -exec  sed -i 's/[ \t]*$//'  {} \;
+    find . -name '*.tsv' -exec  sed -i 's/  /,/g'  {} \;
+    ```
+    """
+    parcellations = {
+            '031-MIST'                       : (None, None), # connectivity, bold
+            '038-CraddockSCorr2Level'        : (None, None),
+            '048-HarvardOxfordMaxProbThr0'   : (None, None),
+            '056-CraddockSCorr2Level'        : (None, None),
+            '056-MIST'                       : (None, None),
+            '070-DesikanKilliany'            : ('419c59ab4e01059265cad42e5e68d7e58b0381bc27fe47ad4d4218358ba76280',
+                                                '26812c39d3963924d3c8fdf65dc3e08b6be13e7d6a5f91e7a13734714958f5fc'),
+            '079-Shen2013'                   : (None, None),
+            '086-EconomoKoskinas'            : (None, None),
+            '092-AALV2'                      : (None, None),
+            '096-HarvardOxfordMaxProbThr0'   : (None, None),
+            '100-Schaefer17Networks'         : ('de583e85dd4aa1c0521d61d73f290ad6da3ce4cd5b3538c62b7630f03e438157', 
+                                                '650d5bf9a103299c6505a129051b5bc54a413a9d5ddffb574a4d165c12b1457f'),
+            '103-MIST'                       : (None, None),
+            '108-CraddockSCorr2Level'        : (None, None),
+            '150-Destrieux'                  : (None, None),
+            '156-Shen2013'                   : (None, None),
+            '160-CraddockSCorr2Level'        : (None, None),
+            '167-MIST'                       : (None, None),
+            '200-Schaefer17Networks'         : ('5086f4b3405acff84ffe132cee17c67a90000a3fae98da50d4e14fb55d7f5d57', 
+                                                'md5:1f25b912465fe651f5338a7f106f5fe0'),
+            '210-Brainnetome'                : (None, None),
+            '294-Julich-Brain'               : (None, None),
+    }
+
+    def __init__(self, data_root, parcellation='200-Schaefer17Networks'):
+        self.data_root = data_root
+        assert parcellation in self.parcellations, 'Incorrect parcellation name.'
+        _ = pooch.retrieve(
+            url=f'https://object.cscs.ch/v1/AUTH_227176556f3c4bb38df9feea4b91200c/hbp-d000067_Atlas_based_HCP_BOLD_pub/v1.0/{parcellation}.zip',
+            known_hash=self.parcellations[parcellation][1],
+            path=os.path.join(self.data_root, 'bold'),
+            processor=pooch.Unzip(extract_dir='.')
+        )
+        _ = pooch.retrieve(
+            url=f'https://object.cscs.ch/v1/AUTH_227176556f3c4bb38df9feea4b91200c/hbp-d000059_Atlas_based_HCP_connectomes_v1.1_pub/{parcellation}.zip',
+            known_hash=self.parcellations[parcellation][0],
+            path=os.path.join(self.data_root, 'connectivity'),
+            processor=pooch.Unzip(extract_dir='.')
+        )
+        self.subjects = [os.path.basename(p) for p in glob(os.path.join(self.data_root, 'bold', parcellation, '*[0-9]')) ]
+        self.parcellation = parcellation
+
+    def load_sc(self, subj):
+        """
+        Load structural connectivity.
+
+        Parameters
+        ----------
+            subj : str
+                Subject id from `self.subjects`.
+
+        Returns
+        -------
+            weights: ndarray
+                Weights matrix [N,N].
+            tract_length: ndarray
+                Tract length matrix [N,N].
+        """
+        weights = np.loadtxt(
+            os.path.join(self.data_root, 'connectivity', self.parcellation, '1StructuralConnectivity', subj, 'Counts.csv'),
+            delimiter=' ',
+        )
+        tract_lengths = np.loadtxt(
+            os.path.join(self.data_root, 'connectivity', self.parcellation, '1StructuralConnectivity', subj, 'Lengths.csv'),
+            delimiter=' ',
+        )
+        return weights, tract_lengths
+
+    def load_bold(self, subj):
+        bold = np.loadtxt(
+            os.path.join(self.data_root, 'bold', self.parcellation, subj, 'rfMRI_REST1_LR_BOLD.tsv'),
+        )
+        return bold
+
+
+
+

 class Brains1000Dataset(DataProxyConnectivityDataset):
    """