From 2ba13ed85bc57e88b740b12b753f7486705e07f2 Mon Sep 17 00:00:00 2001 From: polarbean <harry.carey95@gmail.com> Date: Wed, 21 Aug 2024 18:39:31 +0200 Subject: [PATCH] no longer need metadata as we use brainglobe --- PyNutil/main.py | 1 - PyNutil/metadata/config.json | 28 -- PyNutil/metadata/metadata_loader.py | 19 - main.py | 396 ++++++++++++++++++ setup.py | 22 + tests/test_jsons/NOP_horizontal.json | 7 + tests/test_jsons/PVMouse_81264_test.json | 4 +- tests/test_jsons/test1.json | 8 - tests/test_jsons/test10_PyNutil_web.json | 1 - .../test8_PyNutil_bigcaudoputamen.json | 1 - .../test_jsons/test9_PyNutil_linear_only.json | 3 +- 11 files changed, 428 insertions(+), 62 deletions(-) delete mode 100644 PyNutil/metadata/config.json delete mode 100644 PyNutil/metadata/metadata_loader.py create mode 100644 main.py create mode 100644 setup.py create mode 100644 tests/test_jsons/NOP_horizontal.json delete mode 100644 tests/test_jsons/test1.json diff --git a/PyNutil/main.py b/PyNutil/main.py index 0feb176..c7773fe 100644 --- a/PyNutil/main.py +++ b/PyNutil/main.py @@ -83,7 +83,6 @@ class PyNutil: label_path=None, settings_file=None, ) -> None: - self.config, self.metadata_path = metadata_loader.load_config() if settings_file is not None: with open(settings_file, "r") as f: settings = json.load(f) diff --git a/PyNutil/metadata/config.json b/PyNutil/metadata/config.json deleted file mode 100644 index 019c718..0000000 --- a/PyNutil/metadata/config.json +++ /dev/null @@ -1,28 +0,0 @@ -{"annotation_volumes":{ - "allen2015":{ - "labels":"allen2015_colours.csv", - "volume":"None" - }, - "allen2017":{ - "labels":"allen2017_colours.csv", - "volume":"annotation_25_reoriented_2017.nrrd" - }, - "allen2022":{ - "labels":"allen2022_colours.csv", - "volume":"annotation_10_reoriented_2022.nrrd" - }, - "WHS_Atlas_Rat_Brain_v2":{ - "labels":"WHS_v2_colours.csv", - "volume":"None" - }, - "WHS_Atlas_Rat_Brain_v3":{ - "labels":"WHS_v3_colours.csv", - "volume":"None" - }, - "WHS_Atlas_Rat_Brain_v4":{ - "labels":"WHS_v4_colours.csv", - "volume":"None" - } -}, -"annotation_volume_directory":"PyNutil/PyNutil/metadata/annotation_volumes/" -} \ No newline at end of file diff --git a/PyNutil/metadata/metadata_loader.py b/PyNutil/metadata/metadata_loader.py deleted file mode 100644 index 2763d55..0000000 --- a/PyNutil/metadata/metadata_loader.py +++ /dev/null @@ -1,19 +0,0 @@ -import json -from pathlib import Path - - -def load_config() -> dict: - """ - Loads the config file - - :return: the configuration file - :rtype: dict - """ - # returns a path to the config file assuming that it is in the same directory as this script - path = str(Path(__file__).parent.parent.absolute()) + "/metadata/config.json" - # open the config file - with open(path, "r") as f: - # load the config file - config = json.load(f) - # return the config file and path - return config, path diff --git a/main.py b/main.py new file mode 100644 index 0000000..c7773fe --- /dev/null +++ b/main.py @@ -0,0 +1,396 @@ +from .metadata import metadata_loader +from .read_and_write import read_atlas_volume, write_points_to_meshview +from .coordinate_extraction import folder_to_atlas_space +from .counting_and_load import label_points, pixel_count_per_region +import json +import pandas as pd +from datetime import datetime +import numpy as np +import brainglobe_atlasapi +import os + + +class PyNutil: + """A utility class for working with brain atlases and segmentation data. + + Parameters + ---------- + segmentation_folder : str + The path to the folder containing the segmentation data. + alignment_json : str + The path to the alignment JSON file. + colour : int + The colour of the segmentation data to extract. + atlas_name : str + The name of the atlas volume to use. Uses BrainGlobe API name + settings_file : str, optional + The path to a JSON file containing the above parameters. + + Raises + ------ + ValueError + If any of the required parameters are None. + + Attributes + ---------- + segmentation_folder : str + The path to the folder containing the segmentation data. + alignment_json : str + The path to the alignment JSON file. + colour : int + The colour of the segmentation data to extract. + atlas : str + The name of the atlas volume being used. + atlas_volume : numpy.ndarray + The 3D array representing the atlas volume. + atlas_labels : pandas.DataFrame + A DataFrame containing the labels for the atlas volume. + pixel_points : numpy.ndarray + An array of pixel coordinates extracted from the segmentation data. + labeled_points : numpy.ndarray + An array of labeled pixel coordinates. + label_df : pandas.DataFrame + A DataFrame containing the pixel counts per region. + + Methods + ------- + load_atlas_data() + Loads the atlas volume and labels from disk. + get_coordinates(non_linear=True, method='all') + Extracts pixel coordinates from the segmentation data. + extract_coordinates(non_linear, method) + Extracts pixel coordinates from the segmentation data but is only used internally. + quantify_coordinates() + Quantifies the pixel coordinates by region. + label_points() + Labels the pixel coordinates by region but is only used internally. + count_pixels_per_region(labeled_points) + Counts the number of pixels per region but is only used internally. + save_analysis(output_folder) + Saves the pixel coordinates and pixel counts to disk. + write_points_to_meshview(output_folder) + Writes the pixel coordinates and labels to a JSON file for visualization but is only used internally. + + """ + + def __init__( + self, + segmentation_folder=None, + alignment_json=None, + colour=None, + atlas_name=None, + atlas_path=None, + label_path=None, + settings_file=None, + ) -> None: + if settings_file is not None: + with open(settings_file, "r") as f: + settings = json.load(f) + try: + segmentation_folder = settings["segmentation_folder"] + alignment_json = settings["alignment_json"] + colour = settings["colour"] + atlas_name = settings["atlas_name"] + except KeyError as exc: + raise KeyError( + "settings file must contain segmentation_folder, alignment_json, colour, and atlas_name" + ) from exc + # check if any values are None + if None in [segmentation_folder, alignment_json, colour, atlas_name]: + raise ValueError( + "segmentation_folder, alignment_json, colour, and volume_path must all be specified and not be None" + ) + # if atlas_name not in self.config["annotation_volumes"]: + # raise ValueError( + # f"Atlas {atlas_name} not found in config file, valid atlases are: \n{' , '.join(list(self.config['annotation_volumes'].keys()))}" + # ) + + self.segmentation_folder = segmentation_folder + self.alignment_json = alignment_json + self.colour = colour + self.atlas_name = atlas_name + if (atlas_path or label_path) and atlas_name: + raise ValueError("Please only specify an atlas_path and a label_path or an atlas_name, atlas and label paths are only used for loading custom atlases") + if atlas_path and label_path: + self.atlas_volume, self.atlas_labels = self.load_custom_atlas(atlas_path, label_path) + else: + self.atlas_volume, self.atlas_labels = self.load_atlas_data(atlas_name=atlas_name) + ###This is just because of the migration to BrainGlobe + + def load_atlas_data(self, atlas_name): + """Loads the atlas volume and labels from disk. + + Returns + ------- + tuple + A tuple containing the atlas volume as a numpy.ndarray and the atlas labels as a pandas.DataFrame. + + """ + # load the metadata json as well as the path to stored data files + # this could potentially be moved into init + print("loading atlas volume") + atlas = brainglobe_atlasapi.BrainGlobeAtlas(atlas_name=atlas_name) + atlas_structures = {'idx':[i['id'] for i in atlas.structures_list], + 'name':[i['name'] for i in atlas.structures_list], + 'r':[i['rgb_triplet'][0] for i in atlas.structures_list], + 'g':[i['rgb_triplet'][1] for i in atlas.structures_list], + 'b':[i['rgb_triplet'][2] for i in atlas.structures_list] + } + atlas_structures['idx'].insert(0,0) + atlas_structures['name'].insert(0,'Clear Label') + atlas_structures['r'].insert(0,0) + atlas_structures['g'].insert(0,0) + atlas_structures['b'].insert(0,0) + + atlas_labels = pd.DataFrame(atlas_structures) + if "allen_mouse_" in atlas_name: + print("reorienting allen atlas into quicknii space...") + atlas_volume = np.transpose(atlas.annotation,[2,0,1])[:,::-1,::-1] + else: + atlas_volume = atlas.annotation + print("atlas labels loaded ✅") + return atlas_volume, atlas_labels + + def load_custom_atlas(self, atlas_path, label_path): + atlas_volume = read_atlas_volume(atlas_path) + atlas_labels = pd.read_csv(label_path) + return atlas_volume, atlas_labels + + def get_coordinates( + self, non_linear=True, method="all", object_cutoff=0, use_flat=False + ): + """Extracts pixel coordinates from the segmentation data. + + Parameters + ---------- + non_linear : bool, optional + Whether to use non-linear registration. Default is True. + method : str, optional + The method to use for extracting coordinates. Valid options are 'per_pixel', 'per_object', or 'all'. + Default is 'all'. + object_cutoff : int, optional + The minimum number of pixels per object to be included in the analysis. Default is 1. + + Raises + ------ + ValueError + If the specified method is not recognized. + + """ + if not hasattr(self, "atlas_volume"): + raise ValueError( + "Please run build_quantifier before running get_coordinates" + ) + if method not in ["per_pixel", "per_object", "all"]: + raise ValueError( + f"method {method} not recognised, valid methods are: per_pixel, per_object, or all" + ) + print("extracting coordinates with method:", method) + ( + pixel_points, + centroids, + region_areas_list, + points_len, + centroids_len, + segmentation_filenames, + ) = folder_to_atlas_space( + self.segmentation_folder, + self.alignment_json, + self.atlas_labels, + pixel_id=self.colour, + non_linear=non_linear, + method=method, + object_cutoff=object_cutoff, + atlas_volume=self.atlas_volume, + use_flat=use_flat, + ) + self.pixel_points = pixel_points + self.centroids = centroids + ##points len and centroids len tell us how many points were extracted from each section + ##This will be used to split the data up later into per section files + self.points_len = points_len + self.centroids_len = centroids_len + self.segmentation_filenames = segmentation_filenames + self.region_areas_list = region_areas_list + self.method = method + + def quantify_coordinates(self): + """Quantifies the pixel coordinates by region. + + Raises + ------ + ValueError + If the pixel coordinates have not been extracted. + + """ + if not hasattr(self, "pixel_points") and not hasattr(self, "centroids"): + raise ValueError( + "Please run get_coordinates before running quantify_coordinates" + ) + print("quantifying coordinates") + labeled_points_centroids = None + labeled_points = None + if self.method == "per_object" or self.method == "all": + labeled_points_centroids = label_points( + self.centroids, self.atlas_volume, scale_factor=1 + ) + if self.method == "per_pixel" or self.method == "all": + labeled_points = label_points( + self.pixel_points, self.atlas_volume, scale_factor=1 + ) + + prev_pl = 0 + prev_cl = 0 + per_section_df = [] + current_centroids = None + current_points = None + for pl, cl, ra in zip( + self.points_len, self.centroids_len, self.region_areas_list + ): + if self.method == "per_object" or self.method == "all": + current_centroids = labeled_points_centroids[prev_cl : prev_cl + cl] + if self.method == "per_pixel" or self.method == "all": + current_points = labeled_points[prev_pl : prev_pl + pl] + current_df = pixel_count_per_region( + current_points, current_centroids, self.atlas_labels + ) + # create the df for section report and all report + # pixel_count_per_region returns a df with idx, pixel count, name and RGB. + # ra is region area list from + # merge current_df onto ra (region_areas_list) based on idx column + # (left means use only keys from left frame, preserve key order) + + """ + Merge region areas and object areas onto the atlas label file. + Remove duplicate columns + Calculate and add area_fraction to new column in the df. + """ + + all_region_df = self.atlas_labels.merge(ra, on="idx", how="left") + current_df_new = all_region_df.merge( + current_df, on="idx", how="left", suffixes=(None, "_y") + ).drop(columns=["name_y", "r_y", "g_y", "b_y"]) + current_df_new["area_fraction"] = ( + current_df_new["pixel_count"] / current_df_new["region_area"] + ) + current_df_new.fillna(0, inplace=True) + per_section_df.append(current_df_new) + prev_pl += pl + prev_cl += cl + + ##combine all the slice reports, groupby idx, name, rgb and sum region and object pixels. Remove area_fraction column and recalculate. + self.label_df = ( + pd.concat(per_section_df) + .groupby(["idx", "name", "r", "g", "b"]) + .sum() + .reset_index() + .drop(columns=["area_fraction"]) + ) + self.label_df["area_fraction"] = ( + self.label_df["pixel_count"] / self.label_df["region_area"] + ) + self.label_df.fillna(0, inplace=True) + """ + Potential source of error: + If there are duplicates in the label file, regional results will be duplicated and summed leading to incorrect results + """ + + # reorder the df to match the order of idx column in self.atlas_labels + self.label_df = self.label_df.set_index("idx") + self.label_df = self.label_df.reindex(index=self.atlas_labels["idx"]) + self.label_df = self.label_df.reset_index() + + self.labeled_points = labeled_points + self.labeled_points_centroids = labeled_points_centroids + self.per_section_df = per_section_df + + print("quantification complete ✅") + + def save_analysis(self, output_folder): + """Saves the pixel coordinates and pixel counts to different files in the specified + output folder. + + Parameters + ---------- + output_folder : str + The path to the output folder. + + Raises + ------ + ValueError + If the pixel coordinates have not been extracted. + + """ + if not os.path.exists(output_folder): + os.makedirs(output_folder) + + if not os.path.exists(f"{output_folder}/whole_series_report"): + os.makedirs(f"{output_folder}/whole_series_report") + + if not hasattr(self, "label_df"): + print("no quantification found so we will only save the coordinates") + print( + "if you want to save the quantification please run quantify_coordinates" + ) + else: + self.label_df.to_csv( + f"{output_folder}/whole_series_report/counts.csv", + sep=";", + na_rep="", + index=False, + ) + if not os.path.exists(f"{output_folder}/per_section_meshview"): + os.makedirs(f"{output_folder}/per_section_meshview") + if not os.path.exists(f"{output_folder}/per_section_reports"): + os.makedirs(f"{output_folder}/per_section_reports") + if not os.path.exists(f"{output_folder}/whole_series_meshview"): + os.makedirs(f"{output_folder}/whole_series_meshview") + + prev_pl = 0 + prev_cl = 0 + + for pl, cl, fn, df in zip( + self.points_len, + self.centroids_len, + self.segmentation_filenames, + self.per_section_df, + ): + split_fn = fn.split(os.sep)[-1].split(".")[0] + df.to_csv( + f"{output_folder}/per_section_reports/{split_fn}.csv", + sep=";", + na_rep="", + index=False, + ) + if self.method == "per_pixel" or self.method == "all": + write_points_to_meshview( + self.pixel_points[prev_pl : pl + prev_pl], + self.labeled_points[prev_pl : pl + prev_pl], + f"{output_folder}/per_section_meshview/{split_fn}_pixels.json", + self.atlas_labels, + ) + if self.method == "per_object" or self.method == "all": + write_points_to_meshview( + self.centroids[prev_cl : cl + prev_cl], + self.labeled_points_centroids[prev_cl : cl + prev_cl], + f"{output_folder}/per_section_meshview/{split_fn}_centroids.json", + self.atlas_labels, + ) + prev_cl += cl + prev_pl += pl + + if self.method == "per_pixel" or self.method == "all": + write_points_to_meshview( + self.pixel_points, + self.labeled_points, + f"{output_folder}/whole_series_meshview/pixels_meshview.json", + self.atlas_labels, + ) + if self.method == "per_object" or self.method == "all": + write_points_to_meshview( + self.centroids, + self.labeled_points_centroids, + f"{output_folder}/whole_series_meshview/objects_meshview.json", + self.atlas_labels, + ) + print("analysis saved ✅") diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..bdc1e56 --- /dev/null +++ b/setup.py @@ -0,0 +1,22 @@ +from setuptools import setup, find_packages +from pathlib import Path +this_directory = Path(__file__).parent +long_description = (this_directory / "README.md").read_text() + +setup( + name="PyNutil", + version='0.1.4', + packages=find_packages(), + license='MIT', + description='a package to translate data between common coordinate templates', + long_description=long_description, + long_description_content_type='text/markdown', + install_requires=[ + 'numpy', + 'nibabel', + 'scipy', + 'networkx', + 'pandas', + 'requests' + ] +) \ No newline at end of file diff --git a/tests/test_jsons/NOP_horizontal.json b/tests/test_jsons/NOP_horizontal.json new file mode 100644 index 0000000..76d91d9 --- /dev/null +++ b/tests/test_jsons/NOP_horizontal.json @@ -0,0 +1,7 @@ +{ + "atlas_name": "allen_mouse_25um", + "segmentation_folder": "tests/test_data/ttA_2877_NOP_segmentations", + "alignment_json": "tests/test_data/ttA_2877_NOP_horizontal_final_2017.json", + "nonlinear": true, + "colour": [0, 0, 255] +} \ No newline at end of file diff --git a/tests/test_jsons/PVMouse_81264_test.json b/tests/test_jsons/PVMouse_81264_test.json index 963261b..ca88d12 100644 --- a/tests/test_jsons/PVMouse_81264_test.json +++ b/tests/test_jsons/PVMouse_81264_test.json @@ -1,5 +1,5 @@ -{ "volume_path": "allen2017", - "label_path": "annotation_volumes/allen2017_colours.csv", +{ + "atlas_name": "allen_mouse_25um", "segmentation_folder": "test_data/ext-d000033_PVMouseExtraction_pub-Nutil_Quantifier_analysis-81264-Input_dir", "alignment_json": "test_data/PVMouse_81264_nonlin.json", "nonlinear": true, diff --git a/tests/test_jsons/test1.json b/tests/test_jsons/test1.json deleted file mode 100644 index ca83423..0000000 --- a/tests/test_jsons/test1.json +++ /dev/null @@ -1,8 +0,0 @@ -{ "atlas_name": "allen_mouse_25um", - "segmentation_folder": "test_data/ttA_2877_NOP_segmentations", - "alignment_json": "test_data/ttA_2877_NOP_horizontal_final_2017.json", - "nonlinear": true, - "colour": [0, 0, 255], - "points_json_path": "outputs/test1_points.json", - "counts_per_label_name": "outputs/test1_counts_per_allenID_2017.csv" -} \ No newline at end of file diff --git a/tests/test_jsons/test10_PyNutil_web.json b/tests/test_jsons/test10_PyNutil_web.json index 397836f..5249eee 100644 --- a/tests/test_jsons/test10_PyNutil_web.json +++ b/tests/test_jsons/test10_PyNutil_web.json @@ -1,6 +1,5 @@ { "atlas_name": "allen_mouse_25um", - "label_path": "PyNutil/annotation_volumes/allen2017_colours.csv", "segmentation_folder": "PyNutil/test_data/PyTest_web", "alignment_json": "PyNutil/test_data/PyNutil_test.waln", "nonlinear": true, diff --git a/tests/test_jsons/test8_PyNutil_bigcaudoputamen.json b/tests/test_jsons/test8_PyNutil_bigcaudoputamen.json index 089d7f9..06e671c 100644 --- a/tests/test_jsons/test8_PyNutil_bigcaudoputamen.json +++ b/tests/test_jsons/test8_PyNutil_bigcaudoputamen.json @@ -1,6 +1,5 @@ { "atlas_name": "allen_mouse_25um", - "label_path": "PyNutil/annotation_volumes/allen2017_colours.csv", "segmentation_folder": "PyNutil/test_data/PyTest_bigcaudoputamen_seg", "alignment_json": "PyNutil/test_data/PyNutil_testdataset_Nonlin_SY_fixed_bigcaudoputamen.json", "nonlinear": true, diff --git a/tests/test_jsons/test9_PyNutil_linear_only.json b/tests/test_jsons/test9_PyNutil_linear_only.json index febb006..afba3d5 100644 --- a/tests/test_jsons/test9_PyNutil_linear_only.json +++ b/tests/test_jsons/test9_PyNutil_linear_only.json @@ -1,6 +1,5 @@ { - "volume_path": "allen2017", - "label_path": "PyNutil/annotation_volumes/allen2017_colours.csv", + "atlas_name": "allen_mouse_25um", "segmentation_folder": "PyNutil/test_data/PyTest_linear_seg", "alignment_json": "PyNutil/test_data/PyNutil_testdataset_linear.json", "nonlinear": true, -- GitLab