Merge pull request #14 from Neural-Systems-at-UIO/OOP

Oop

Merge pull request #14 from Neural-Systems-at-UIO/OOP
Oop
6d508aa4 · Sharon Yates · GitHub · 7fcc802a · c845b6e1 · 6d508aa4
Commit 6d508aa4 authored 1 year ago by Sharon Yates Committed by GitHub 1 year ago
--- a/PyNutil/coordinate_extraction.py
+++ b/PyNutil/coordinate_extraction.py
@@ -3,7 +3,7 @@ import pandas as pd
 from DeepSlice.coord_post_processing.spacing_and_indexing import number_sections
 import json
 from .read_and_write import load_visualign_json
-from .counting_and_load import label_points
+from .counting_and_load import label_points, flat_to_dataframe
 from .visualign_deformations import triangulate, transform_vec
 from glob import glob
 from tqdm import tqdm
@@ -87,13 +87,14 @@ def transform_to_atlas_space(anchoring, y, x, reg_height, reg_width):
 def folder_to_atlas_space(
    folder,
    quint_alignment,
+    atlas_labels,
    pixel_id=[0, 0, 0],
    non_linear=True,
    method="all",
    object_cutoff=0,
 ):
    """Apply Segmentation to atlas space to all segmentations in a folder."""
-
+    """Return pixel_points, centroids, points_len, centroids_len, segmentation_filenames, """
    # This should be loaded above and passed as an argument
    slices = load_visualign_json(quint_alignment)

@@ -103,25 +104,37 @@ def folder_to_atlas_space(
        for file in glob(folder + "/*")
        if any([file.endswith(type) for type in segmentation_file_types])
    ]
+    flat_files = [
+        file
+        for file in glob(folder + "/flat_files/*")
+        if any([file.endswith('.flat')])
+    ]
    # Order segmentations and section_numbers
    # segmentations = [x for _,x in sorted(zip(section_numbers,segmentations))]
    # section_numbers.sort()
    points_list = [None] * len(segmentations)
    centroids_list = [None] * len(segmentations)
+    region_areas_list = [None] * len(segmentations)
    threads = []
+    flat_file_nrs = [int(number_sections([ff])[0]) for ff in flat_files]
    for segmentation_path, index in zip(segmentations, range(len(segmentations))):
        seg_nr = int(number_sections([segmentation_path])[0])
        current_slice_index = np.where([s["nr"] == seg_nr for s in slices])
        current_slice = slices[current_slice_index[0][0]]
+        current_flat_file_index = np.where([f == seg_nr for f in flat_file_nrs])
+        current_flat = flat_files[current_flat_file_index[0][0]]
        x = threading.Thread(
            target=segmentation_to_atlas_space,
            args=(
                current_slice,
                segmentation_path,
+                atlas_labels, 
+                current_flat,
                pixel_id,
                non_linear,
                points_list,
                centroids_list,
+                region_areas_list,
                index,
                method,
                object_cutoff,
@@ -143,6 +156,7 @@ def folder_to_atlas_space(
    return (
        np.array(points),
        np.array(centroids),
+        region_areas_list,
        points_len,
        centroids_len,
        segmentations,
@@ -152,13 +166,17 @@ def folder_to_atlas_space(
 def segmentation_to_atlas_space(
    slice,
    segmentation_path,
+    atlas_labels, 
+    flat_file_atlas,
    pixel_id="auto",
    non_linear=True,
    points_list=None,
    centroids_list=None,
+    region_areas_list=None, 
    index=None,
    method="per_pixel",
    object_cutoff=0,
+    
 ):
    """Combines many functions to convert a segmentation to atlas space. It takes care
    of deformations."""
@@ -177,6 +195,7 @@ def segmentation_to_atlas_space(
    seg_width = segmentation.shape[1]
    reg_height = slice["height"]
    reg_width = slice["width"]
+    region_areas = flat_to_dataframe(flat_file_atlas, atlas_labels, (seg_width,seg_height))
    # This calculates reg/seg
    y_scale, x_scale = transform_to_registration(
        seg_height, seg_width, reg_height, reg_width
@@ -226,6 +245,7 @@ def segmentation_to_atlas_space(
    )
    points_list[index] = np.array(points)
    centroids_list[index] = np.array(centroids)
+    region_areas_list[index] = region_areas


 def get_centroids(segmentation, pixel_id, y_scale, x_scale, object_cutoff=0):

--- a/PyNutil/counting_and_load.py
+++ b/PyNutil/counting_and_load.py
 import numpy as np
 import pandas as pd
 import struct
+import matplotlib.pyplot as plt
+import os
+import nrrd
+import cv2


 # related to counting and load
@@ -84,6 +88,7 @@ def pixel_count_per_region(
    # Find colours corresponding to each region ID and add to the pandas dataframe

    # Look up name, r, g, b in df_allen_colours in df_counts_per_label based on "idx"
+    #Sharon, remove this here
    new_rows = []
    for index, row in df_counts_per_label.iterrows():
        mask = df_label_colours["idx"] == row["idx"]
@@ -112,13 +117,16 @@ def pixel_count_per_region(


 """Read flat file and write into an np array"""
+"""Read flat file, write into an np array, assign label file values, return array"""

-
-def flat_to_array(flat_file):
+def flat_to_dataframe(flat_file, labelfile, rescaleXY=False):
+    if rescaleXY:
+        rescaleX, rescaleY = rescaleXY
    with open(flat_file, "rb") as f:
        # I don't know what b is, w and h are the width and height that we get from the
        # flat file header
        b, w, h = struct.unpack(">BII", f.read(9))
+        print(w, h)
        # Data is a one dimensional list of values
        # It has the shape width times height
        data = struct.unpack(">" + ("xBH"[b] * (w * h)), f.read(b * w * h))
@@ -133,8 +141,31 @@ def flat_to_array(flat_file):
            image[y, x] = image_data[x + y * w]

    image_arr = np.array(image)
-    return image_arr
+    #return image_arr
+    if rescaleXY:
+        w,h= rescaleXY
+        image_arr = cv2.resize(image_arr, (h, w), interpolation=cv2.INTER_NEAREST)
+
+
+    """assign label file values into image array"""
+    allen_id_image = np.zeros((h, w))  # create an empty image array
+    coordsy, coordsx = np.meshgrid(list(range(w)), list(range(h)))
+    values = image_arr[coordsy,coordsx]  # assign x,y coords from image_array into values
+    lbidx = labelfile["idx"].values
+    allen_id_image = lbidx[values.astype(int)]
+    #return allen_id_image
+
+    #def count_per_uniqueidx()

+    """count pixels for unique idx"""
+    unique_ids, counts = np.unique(allen_id_image, return_counts=True)
+
+    area_per_label = list(zip(unique_ids, counts))
+    # create a list of unique regions and pixel counts per region
+
+    df_area_per_label = pd.DataFrame(area_per_label, columns=["idx", "region_area"])
+    # create a pandas df with regions and pixel counts
+    return(df_area_per_label)

 # Import flat files, count pixels per label, np.unique... etc. nitrc.org/plugins/mwiki/index.php?title=visualign:Deformation

@@ -146,3 +177,4 @@ def flat_to_array(flat_file):
       b,w,h=struct.unpack(">BII",f.read(9))
       data=struct.unpack(">"+("xBH"[b]*(w*h)),f.read(b*w*h))
 """
+
--- a/messing_around_files/load_workflow.py
+++ b/messing_around_files/load_workflow.py
@@ -3,34 +3,32 @@
 import struct
 import matplotlib.pyplot as plt
 import pandas as pd
+import cv2
 import numpy as np

-from read_and_write import FlattoArray, LabeltoArray
+#from read_and_write import flat_to_array, label_to_array
+from counting_and_load import flat_to_dataframe

 base = r"../test_data/tTA_2877_NOP_s037_atlasmap/2877_NOP_tTA_lacZ_Xgal_s037_nl.flat"
 label = r"../annotation_volumes\allen2017_colours.csv"
+##optional
+seg = r"../test_data/tTA_2877_NOP_s037_seg/2877_NOP_tTA_lacZ_Xgal_resize_Simple_Seg_s037.png"
+segim = cv2.imread(seg)
+#the indexing [:2] means the first two values and [::-1] means reverse the list
+segXY = segim.shape[:2][::-1]
+#image_arr = flat_to_array(base, label)

-image_arr = FlattoArray(base)
+#plt.imshow(flat_to_array(base, label))

-plt.imshow(FlattoArray(base))
+df_area_per_label = flat_to_dataframe(base, label, segXY)

-"""assign label file values into image array"""
-labelfile = pd.read_csv(r"../annotation_volumes\allen2017_colours.csv")
-allen_id_image = np.zeros((h, w))  # create an empty image array
-coordsy, coordsx = np.meshgrid(list(range(w)), list(range(h)))
-values = image_arr[coordsx, coordsy]  # assign x,y coords from image_array into values
-lbidx = labelfile["idx"].values
-allen_id_image = lbidx[values.astype(int)]  # assign allen IDs into image array
+"""count pixels in np array for unique idx, return pd df"""
+#unique_ids, counts = np.unique(allen_id_image, return_counts=True)

-plt.imshow(allen_id_image)
-
-"""count pixels for unique idx"""
-unique_ids, counts = np.unique(allen_id_image, return_counts=True)
-
-area_per_label = list(zip(unique_ids, counts))
+#area_per_label = list(zip(unique_ids, counts))
 # create a list of unique regions and pixel counts per region

-df_area_per_label = pd.DataFrame(area_per_label, columns=["idx", "area_count"])
+#df_area_per_label = pd.DataFrame(area_per_label, columns=["idx", "area_count"])
 # create a pandas df with regions and pixel counts


@@ -61,7 +59,7 @@ df_area_per_label_name = pd.DataFrame(new_rows)

 print(df_area_per_label_name)
 df_area_per_label_name.to_csv(
-    "../outputs/test5_s037_area_per_idx.csv", sep=";", na_rep="", index=False
+    "../outputs/NOP_s037_regionareas.csv", sep=";", na_rep="", index=False
 )



--- a/PyNutil/main.py
+++ b/PyNutil/main.py
@@ -131,6 +131,7 @@ class PyNutil:
        atlas_labels = pd.read_csv(f"{atlas_root_path}{atlas_label_path}")
        print("atlas labels loaded ✅")
        return atlas_volume, atlas_labels
+    

    def get_coordinates(self, non_linear=True, method="all", object_cutoff=0):
        """Extracts pixel coordinates from the segmentation data.
@@ -163,12 +164,14 @@ class PyNutil:
        (
            pixel_points,
            centroids,
+            region_areas_list,
            points_len,
            centroids_len,
            segmentation_filenames,
        ) = folder_to_atlas_space(
            self.segmentation_folder,
            self.alignment_json,
+            self.atlas_labels,
            pixel_id=self.colour,
            non_linear=non_linear,
            method=method,
@@ -181,6 +184,7 @@ class PyNutil:
        self.points_len = points_len
        self.centroids_len = centroids_len
        self.segmentation_filenames = segmentation_filenames
+        self.region_areas_list = region_areas_list

    def quantify_coordinates(self):
        """Quantifies the pixel coordinates by region.
@@ -207,15 +211,12 @@ class PyNutil:
                self.pixel_points, self.atlas_volume, scale_factor=1
            )

-        self.label_df = pixel_count_per_region(
-            labeled_points, labeled_points_centroids, self.atlas_labels
-        )
        prev_pl = 0
        prev_cl = 0
        per_section_df = []
        current_centroids = None
        current_points = None
-        for pl,cl in zip(self.points_len, self.centroids_len):
+        for pl,cl,ra in zip(self.points_len, self.centroids_len, self.region_areas_list):
            if hasattr(self, "centroids"):
                current_centroids = labeled_points_centroids[prev_cl : prev_cl + cl]
            if hasattr(self, "pixel_points"):
@@ -223,10 +224,79 @@ class PyNutil:
            current_df = pixel_count_per_region(
                current_points, current_centroids, self.atlas_labels
            )
-            per_section_df.append(current_df)
+
+# create the df for section report and all report
+# pixel_count_per_region returns a df with idx, pixel count, name and RGB.
+# ra is region area list from 
+# merge current_df onto ra (region_areas_list) based on idx column
+#(left means use only keys from left frame, preserve key order)
+            
+            """
+            Merge region areas and object areas onto the atlas label file.
+            Remove duplicate columns
+            Calculate and add area_fraction to new column in the df.  
+            """
+            all_region_df = self.atlas_labels.merge(ra, on = 'idx', how='left')           
+            current_df_new = all_region_df.merge(current_df, on= 'idx', how= 'left', suffixes= (None,"_y")).drop(columns=["a","VIS", "MSH", "name_y","r_y","g_y","b_y"])
+            current_df_new["area_fraction"] = current_df_new["pixel_count"] / current_df_new["region_area"]
+            current_df_new.fillna(0, inplace=True)
+
+            # Several alternatives for the merge code above
+            """
+            new_rows = []
+            for index, row in all_region_df.iterrows():
+                mask = current_df["idx"] == row ["idx"]
+                current_region_row = current_df[mask]
+                region_area = current_region_row["pixel_count"].values
+                object_count = current_region_row["object_count"].values
+
+                row["pixel_count"] = region_area[0]
+                row["object_count"] = object_count[0]
+
+                new_rows.append[row]
+            
+            current_df_new = pd.DataFrame(new_rows)
+            """
+            
+            """
+            new_rows = []
+            for index, row in current_df.iterrows():
+                mask = self.atlas_labels["idx"] == row["idx"]
+                current_region_row = self.atlas_labels[mask]
+                current_region_name = current_region_row["name"].values
+                current_region_red = current_region_row["r"].values
+                current_region_green = current_region_row["g"].values
+                current_region_blue = current_region_row["b"].values
+
+                row["name"] = current_region_name[0]
+                row["r"] = current_region_red[0]
+                row["g"] = current_region_green[0]
+                row["b"] = current_region_blue[0]
+                
+                new_rows.append(row)
+
+            current_df_new = pd.DataFrame(new_rows)"""
+            
+            #per_section_df.append(current_df_new)
+            per_section_df.append(current_df_new)
            prev_pl += pl
            prev_cl += cl
+        
+        
+        ##combine all the slice reports, groupby idx, name, rgb and sum region and object pixels. Remove area_fraction column and recalculate.
+        self.label_df =  pd.concat(per_section_df).groupby(['idx','name','r','g','b']).sum().reset_index().drop(columns=['area_fraction'])
+        self.label_df["area_fraction"] = self.label_df["pixel_count"] / self.label_df["region_area"]
+        self.label_df.fillna(0, inplace=True)
+        """
+        Potential source of error:
+        If there are duplicates in the label file, regional results will be duplicated and summed leading to incorrect results
+        """

+        #reorder the df to match the order of idx column in self.atlas_labels        
+        self.label_df = self.label_df.set_index('idx')
+        self.label_df = self.label_df.reindex(index=self.atlas_labels['idx'])
+        self.label_df = self.label_df.reset_index()
+        
        self.labeled_points = labeled_points
        self.labeled_points_centroids = labeled_points_centroids
        self.per_section_df = per_section_df
@@ -250,6 +320,9 @@ class PyNutil:
        """
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)
+            
+        if not os.path.exists(f"{output_folder}/whole_series_report"):
+            os.makedirs(f"{output_folder}/whole_series_report")

        if not hasattr(self, "label_df"):
            print("no quantification found so we will only save the coordinates")
@@ -257,13 +330,16 @@ class PyNutil:
                "if you want to save the quantification please run quantify_coordinates"
            )
        else:
+            
            self.label_df.to_csv(
-                f"{output_folder}/counts.csv", sep=";", na_rep="", index=False
+                f"{output_folder}/whole_series_report/counts.csv", sep=";", na_rep="", index=False
            )
        if not os.path.exists(f"{output_folder}/per_section_meshview"):
            os.makedirs(f"{output_folder}/per_section_meshview")
        if not os.path.exists(f"{output_folder}/per_section_reports"):
            os.makedirs(f"{output_folder}/per_section_reports")
+        if not os.path.exists(f"{output_folder}/whole_series_meshview"):
+            os.makedirs(f"{output_folder}/whole_series_meshview")

        prev_pl = 0
        prev_cl = 0
@@ -302,14 +378,14 @@ class PyNutil:
            write_points_to_meshview(
                self.pixel_points,
                self.labeled_points,
-                f"{output_folder}/pixels_meshview.json",
+                f"{output_folder}/whole_series_meshview/pixels_meshview.json",
                self.atlas_labels,
            )
        if hasattr(self, "centroids"):
            write_points_to_meshview(
                self.centroids,
                self.labeled_points_centroids,
-                f"{output_folder}/objects_meshview.json",
+                f"{output_folder}/whole_series_meshview/objects_meshview.json",
                self.atlas_labels,
            )
        print("analysis saved ✅")
--- a/PyNutil/read_and_write.py
+++ b/PyNutil/read_and_write.py
@@ -60,8 +60,8 @@ def save_dataframe_as_csv(df_to_save, output_csv):
    df_to_save.to_csv(output_csv, sep=";", na_rep="", index=False)


-def flat_to_array(flatfile):
-    """Read flat file and write into an np array, return array"""
+def flat_to_array(flatfile, labelfile):
+    """Read flat file, write into an np array, assign label file values, return array"""
    with open(flatfile, "rb") as f:
        # i dont know what b is, w and h are the width and height that we get from the
        # flat file header
@@ -80,7 +80,16 @@ def flat_to_array(flatfile):
            image[y, x] = imagedata[x + y * w]

    image_arr = np.array(image)
-    return image_arr
+    #return image_arr
+
+    """assign label file values into image array"""
+    labelfile = pd.read_csv(labelfile)
+    allen_id_image = np.zeros((h, w))  # create an empty image array
+    coordsy, coordsx = np.meshgrid(list(range(w)), list(range(h)))
+    values = image_arr[coordsx, coordsy]  # assign x,y coords from image_array into values
+    lbidx = labelfile["idx"].values
+    allen_id_image = lbidx[values.astype(int)]
+    return allen_id_image


 def label_to_array(label_path, image_array):
@@ -111,5 +120,6 @@ def files_in_directory(directory):


 def read_atlas_volume(atlas_volume_path):
+    """return data from atlas volume"""
    data, header = nrrd.read(atlas_volume_path)
    return data
--- a/annotation_volumes/allen2017_colours.csv
+++ b/annotation_volumes/allen2017_colours.csv
--- a/messing_around_files/testing_openflatfile.py
+++ b/messing_around_files/testing_openflatfile.py
@@ -38,7 +38,7 @@ with open(base, "rb") as f:

 """assign label file values into image array"""

-labelfile = pd.read_csv(r"metadata/annotation_volumes\allen2017_colours.csv")
+labelfile = pd.read_csv(r"annotation_volumes\allen2017_colours.csv")
 #labelfile[]
 print(list(zip(val,count)))


--- a/testOOP.py
+++ b/testOOP.py
@@ -9,3 +9,6 @@ pnt.get_coordinates(object_cutoff=0)
 pnt.quantify_coordinates()

 pnt.save_analysis("outputs/test8_PyNutil")
+
+# remove name, r, g, b, from pixel_
+# add to region_areas df
\ No newline at end of file
--- a/test_data/PyTest_seg/flat_files/test_s001_nl.flat
+++ b/test_data/PyTest_seg/flat_files/test_s001_nl.flat
--- a/test_data/PyTest_seg/flat_files/test_s001_nl.png
+++ b/test_data/PyTest_seg/flat_files/test_s001_nl.png
--- a/test_data/PyTest_seg/flat_files/test_s001_nl_rbw.png
+++ b/test_data/PyTest_seg/flat_files/test_s001_nl_rbw.png
--- a/test_data/PyTest_seg/flat_files/test_s002_nl.flat
+++ b/test_data/PyTest_seg/flat_files/test_s002_nl.flat
--- a/test_data/PyTest_seg/flat_files/test_s002_nl.png
+++ b/test_data/PyTest_seg/flat_files/test_s002_nl.png
--- a/test_data/PyTest_seg/flat_files/test_s002_nl_rbw.png
+++ b/test_data/PyTest_seg/flat_files/test_s002_nl_rbw.png
--- a/test_data/PyTest_seg/flat_files/test_s003_nl.flat
+++ b/test_data/PyTest_seg/flat_files/test_s003_nl.flat
--- a/test_data/PyTest_seg/flat_files/test_s003_nl.png
+++ b/test_data/PyTest_seg/flat_files/test_s003_nl.png
--- a/test_data/PyTest_seg/flat_files/test_s003_nl_rbw.png
+++ b/test_data/PyTest_seg/flat_files/test_s003_nl_rbw.png
--- a/test_data/PyTest_seg/flat_files/test_s004_nl.flat
+++ b/test_data/PyTest_seg/flat_files/test_s004_nl.flat
--- a/test_data/PyTest_seg/flat_files/test_s004_nl.png
+++ b/test_data/PyTest_seg/flat_files/test_s004_nl.png
--- a/test_data/PyTest_seg/flat_files/test_s004_nl_rbw.png
+++ b/test_data/PyTest_seg/flat_files/test_s004_nl_rbw.png