Skip to content
Snippets Groups Projects
Commit 47181e2c authored by Jan Fousek's avatar Jan Fousek
Browse files

Domhof HCP dataset

parent f2a35c6b
Branches master
No related tags found
No related merge requests found
%% Cell type:markdown id:ef541df6-5c1a-4a0f-a069-24f110a7bf1d tags:
## 1000BRAINS study, connectivity data
%% Cell type:code id:5b441713-77b4-4e57-a188-9959c09784bd tags:
``` python
from tvb_ebrains_data import Brains1000Dataset
```
%% Output
2022-02-11 11:40:43,461 - WARNING - tvb.simulator.common - psutil module not available: no warnings will be issued when a
simulation may require more memory than available
INFO log level set to INFO
/home/izaak/local_repos/nostromo/tvb-ebrains-data/env/lib/python3.6/site-packages/tvb/datatypes/surfaces.py:63: UserWarning: Geodesic distance module is unavailable; some functionality for surfaces will be unavailable.
warnings.warn(msg)
WARNING Token required outside collaboratory environment. Set EBRAINS_TOKEN in the environment or provide directly.
%% Cell type:code id:2f47000a-9bbe-4ecb-bac1-b09490af7d7b tags:
``` python
dataset = Brains1000Dataset()
print(dataset.__doc__)
```
%% Output
Caspers, S. et al (2021).
1000BRAINS study, connectivity data.
v1.0: https://doi.org/10.25493/61QA-KP8
v1.1: https://doi.org/10.25493/6640-3XH
%% Cell type:code id:af94d470-386c-4b33-ab1a-5c4b9482c535 tags:
``` python
dataset.have_access()
```
%% Output
True
%% Cell type:markdown id:38af980a-7a02-4a13-947a-729f84494c96 tags:
If the call above returns `False`, the access request can be triggered by visiting the KG page of the dataset, or calling:
```python
dataset.request_access()
```
%% Cell type:code id:f608b41a-2bff-47b6-b30d-c99810cdb1f4 tags:
``` python
subjs = dataset.list_subjects()
```
%% Cell type:code id:e4c15491-0fbe-4339-b11c-dd9305a3be9d tags:
``` python
len(set(subjs)), subjs[:10]
```
%% Output
(1031,
['sub_00000',
'sub_00001',
'sub_00002',
'sub_00003',
'sub_00004',
'sub_00005',
'sub_00006',
'sub_00007',
'sub_00008',
'sub_00009'])
%% Cell type:code id:c6c583f3-8efe-4758-9ebb-6e9d9681cbb1 tags:
``` python
W = dataset.load_sc('sub_00008')
```
%% Cell type:code id:4709f50f-8284-4a41-9de7-c357a9514399 tags:
``` python
W.shape
```
%% Output
(100, 100)
%% Cell type:markdown id:d5996ac2-f8cd-49ac-91a6-cd8c0938e15d tags:
## Parcellation-based structural connectomes (HCP)
%% Cell type:code id:2e5431ed-cbfe-42bb-9357-b4f292eae0cf tags:
``` python
from tvb_ebrains_data import HCPDataset
```
%% Cell type:code id:8954ff48-9b28-4dd9-95f6-02de0924e817 tags:
``` python
dataset = HCPDataset()
```
%% Cell type:code id:a9c1fd3c-fac1-4208-bc97-d18b375263d0 tags:
``` python
print(dataset.__doc__)
```
%% Output
Domhof, J. W. M., Jung, K., Eickhoff, S. B., & Popovych, O. V. (2021).
Parcellation-based structural and resting-state functional brain
connectomes of a healthy cohort [Data set]. EBRAINS.
https://doi.org/10.25493%2F81EV-ZVT
%% Cell type:code id:fc84c8f1-5dc9-47ed-8df2-f14a747fa709 tags:
``` python
dataset.list_parcellations()
```
%% Output
['031-MIST',
'038-CraddockSCorr2Level',
'048-HarvardOxfordMaxProbThr0',
'056-CraddockSCorr2Level',
'056-MIST',
'070-DesikanKilliany',
'079-Shen2013',
'086-EconomoKoskinas',
'092-AALV2',
'096-HarvardOxfordMaxProbThr0',
'100-Schaefer17Networks',
'103-MIST',
'108-CraddockSCorr2Level',
'150-Destrieux',
'156-Shen2013',
'160-CraddockSCorr2Level',
'167-MIST',
'200-Schaefer17Networks',
'210-Brainnetome']
%% Cell type:code id:286635b0-99e2-42c2-889a-dc1126752df7 tags:
``` python
subjs = dataset.list_subjects(parcellation='200-Schaefer17Networks')
len(subjs), subjs[:10]
```
%% Output
(200, ['000', '001', '002', '003', '004', '005', '006', '007', '008', '009'])
%% Cell type:code id:00af414e-941a-408d-ae3a-0010f430e00a tags:
``` python
W, D = dataset.load_sc(subject='003', parcellation='200-Schaefer17Networks')
```
%% Cell type:code id:44bfe644-8822-4075-b828-844c92d8b507 tags:
``` python
W.shape, D.shape
```
%% Output
((200, 200), (200, 200))
%% Cell type:code id:896ad8f3-c824-48d7-a2dc-8503a808294e tags:
``` python
from tvb_ebrains_data import HCPDomhof
```
%% Cell type:code id:4e730e39-68cf-4abc-b0dd-14abd74e82e3 tags:
``` python
dset = HCPDomhof(data_root='.')
```
%% Cell type:code id:4e58c1d7-743d-41aa-9a43-bf7b90cc195d tags:
``` python
dset.subjects[:5]
```
%% Output
['093', '124', '106', '020', '077']
%% Cell type:code id:5dcd7edd-8067-4d37-a55b-46a831279b75 tags:
``` python
dset.load_bold('093').shape
```
%% Output
(1200, 200)
%% Cell type:code id:a1a6c4ef-5a0f-4f81-a952-c0117c168e85 tags:
``` python
help(dset.load_sc)
```
%% Output
Help on method load_sc in module tvb_ebrains_data.data:
load_sc(subj) method of tvb_ebrains_data.data.HCPDomhof instance
Load structural connectivity.
Parameters
----------
subj : str
Subject id from `self.subjects`.
Returns
-------
weights: ndarray
Weights matrix [N,N].
tract_length: ndarray
Tract length matrix [N,N].
%% Cell type:code id:6997cabe-3541-4ea5-8dff-cf635c853473 tags:
``` python
dset.load_sc('093')[0].shape
```
%% Output
(200, 200)
......
......@@ -6,5 +6,6 @@ setup(
version='0.2.2',
install_requires=[
'tvb-library',
'pooch',
]
)
from .data import HCPDataset
from .data import HCPDomhof
from .data import Brains1000Dataset
......
......@@ -3,6 +3,8 @@ import os
import io
import requests
import numpy as np
import pooch
from glob import glob
from tvb.simulator.lab import *
logger = logging.getLogger(__name__)
......@@ -132,6 +134,108 @@ class HCPDataset(DataProxyConnectivityDataset):
return W, D
class HCPDomhof():
"""
- https://doi.org/10.25493/NVS8-XS5
- https://doi.org/10.25493/F9DP-WCQ
Available parcellations:
031-MIST 038-CraddockSCorr2Level
048-HarvardOxfordMaxProbThr0 056-CraddockSCorr2Level
056-MIST 070-DesikanKilliany
079-Shen2013 086-EconomoKoskinas
092-AALV2 096-HarvardOxfordMaxProbThr0
100-Schaefer17Networks 103-MIST
108-CraddockSCorr2Level 150-Destrieux
156-Shen2013 160-CraddockSCorr2Level
167-MIST 200-Schaefer17Networks
210-Brainnetome 294-Julich-Brain
The tsv files for the time-series are double space separated and have a trailing space ??
```
find . -name '*.tsv' -exec sed -i 's/[ \t]*$//' {} \;
find . -name '*.tsv' -exec sed -i 's/ /,/g' {} \;
```
"""
parcellations = {
'031-MIST' : (None, None), # connectivity, bold
'038-CraddockSCorr2Level' : (None, None),
'048-HarvardOxfordMaxProbThr0' : (None, None),
'056-CraddockSCorr2Level' : (None, None),
'056-MIST' : (None, None),
'070-DesikanKilliany' : ('419c59ab4e01059265cad42e5e68d7e58b0381bc27fe47ad4d4218358ba76280',
'26812c39d3963924d3c8fdf65dc3e08b6be13e7d6a5f91e7a13734714958f5fc'),
'079-Shen2013' : (None, None),
'086-EconomoKoskinas' : (None, None),
'092-AALV2' : (None, None),
'096-HarvardOxfordMaxProbThr0' : (None, None),
'100-Schaefer17Networks' : ('de583e85dd4aa1c0521d61d73f290ad6da3ce4cd5b3538c62b7630f03e438157',
'650d5bf9a103299c6505a129051b5bc54a413a9d5ddffb574a4d165c12b1457f'),
'103-MIST' : (None, None),
'108-CraddockSCorr2Level' : (None, None),
'150-Destrieux' : (None, None),
'156-Shen2013' : (None, None),
'160-CraddockSCorr2Level' : (None, None),
'167-MIST' : (None, None),
'200-Schaefer17Networks' : ('5086f4b3405acff84ffe132cee17c67a90000a3fae98da50d4e14fb55d7f5d57',
'md5:1f25b912465fe651f5338a7f106f5fe0'),
'210-Brainnetome' : (None, None),
'294-Julich-Brain' : (None, None),
}
def __init__(self, data_root, parcellation='200-Schaefer17Networks'):
self.data_root = data_root
assert parcellation in self.parcellations, 'Incorrect parcellation name.'
_ = pooch.retrieve(
url=f'https://object.cscs.ch/v1/AUTH_227176556f3c4bb38df9feea4b91200c/hbp-d000067_Atlas_based_HCP_BOLD_pub/v1.0/{parcellation}.zip',
known_hash=self.parcellations[parcellation][1],
path=os.path.join(self.data_root, 'bold'),
processor=pooch.Unzip(extract_dir='.')
)
_ = pooch.retrieve(
url=f'https://object.cscs.ch/v1/AUTH_227176556f3c4bb38df9feea4b91200c/hbp-d000059_Atlas_based_HCP_connectomes_v1.1_pub/{parcellation}.zip',
known_hash=self.parcellations[parcellation][0],
path=os.path.join(self.data_root, 'connectivity'),
processor=pooch.Unzip(extract_dir='.')
)
self.subjects = [os.path.basename(p) for p in glob(os.path.join(self.data_root, 'bold', parcellation, '*[0-9]')) ]
self.parcellation = parcellation
def load_sc(self, subj):
"""
Load structural connectivity.
Parameters
----------
subj : str
Subject id from `self.subjects`.
Returns
-------
weights: ndarray
Weights matrix [N,N].
tract_length: ndarray
Tract length matrix [N,N].
"""
weights = np.loadtxt(
os.path.join(self.data_root, 'connectivity', self.parcellation, '1StructuralConnectivity', subj, 'Counts.csv'),
delimiter=' ',
)
tract_lengths = np.loadtxt(
os.path.join(self.data_root, 'connectivity', self.parcellation, '1StructuralConnectivity', subj, 'Lengths.csv'),
delimiter=' ',
)
return weights, tract_lengths
def load_bold(self, subj):
bold = np.loadtxt(
os.path.join(self.data_root, 'bold', self.parcellation, subj, 'rfMRI_REST1_LR_BOLD.tsv'),
)
return bold
class Brains1000Dataset(DataProxyConnectivityDataset):
"""
......
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment