From b8185d70b8049ca454c22c75e08d798e3036f587 Mon Sep 17 00:00:00 2001
From: polarbean <harry.carey95@gmail.com>
Date: Mon, 17 Mar 2025 20:02:42 +0100
Subject: [PATCH] damage included in both normal and custom reports

---
 PyNutil/io/file_operations.py               |   4 +-
 PyNutil/io/read_and_write.py                |  13 ---
 PyNutil/main.py                             |   4 +
 PyNutil/processing/coordinate_extraction.py | 117 +++++++------------
 PyNutil/processing/counting_and_load.py     | 119 ++++++++++++++------
 PyNutil/processing/data_analysis.py         |  21 +++-
 PyNutil/processing/transformations.py       |   3 +-
 PyNutil/processing/utils.py                 |  12 +-
 demos/basic_example_custom_atlas.py         |   2 +-
 9 files changed, 158 insertions(+), 137 deletions(-)

diff --git a/PyNutil/io/file_operations.py b/PyNutil/io/file_operations.py
index f4d08af..8e4656f 100644
--- a/PyNutil/io/file_operations.py
+++ b/PyNutil/io/file_operations.py
@@ -54,7 +54,9 @@ def save_analysis_output(
     os.makedirs(f"{output_folder}/per_section_meshview", exist_ok=True)
     os.makedirs(f"{output_folder}/per_section_reports", exist_ok=True)
     os.makedirs(f"{output_folder}/whole_series_meshview", exist_ok=True)
-
+    # Filter out rows where 'region_area' is 0 in label_df
+    if label_df is not None and "region_area" in label_df.columns:
+        label_df = label_df[label_df["region_area"] != 0]
     if label_df is not None:
         label_df.to_csv(
             f"{output_folder}/whole_series_report/{prepend}counts.csv",
diff --git a/PyNutil/io/read_and_write.py b/PyNutil/io/read_and_write.py
index 33740a4..c4c5e22 100644
--- a/PyNutil/io/read_and_write.py
+++ b/PyNutil/io/read_and_write.py
@@ -270,19 +270,6 @@ def flat_to_array(file, labelfile):
     return allen_id_image
 
 
-def label_to_array(label_path, image_array):
-    """assign label file values into image array, return array"""
-    labelfile = pd.read_csv(label_path)
-    allen_id_image = np.zeros((h, w))  # create an empty image array
-    coordsy, coordsx = np.meshgrid(list(range(w)), list(range(h)))
-    values = image_array[
-        coordsx, coordsy
-    ]  # assign x,y coords from image_array into values
-    lbidx = labelfile["idx"].values
-    allen_id_image = lbidx[values.astype(int)]  # assign allen IDs into image array
-    return allen_id_image
-
-
 def files_in_directory(directory):
     """return list of flat file names in a directory"""
     list_of_files = []
diff --git a/PyNutil/main.py b/PyNutil/main.py
index 9c3ec78..c28a63c 100644
--- a/PyNutil/main.py
+++ b/PyNutil/main.py
@@ -152,6 +152,8 @@ class PyNutil:
                 self.points_len,
                 self.centroids_len,
                 self.segmentation_filenames,
+                self.per_point_undamaged,
+                self.per_centroid_undamaged
             ) = folder_to_atlas_space(
                 self.segmentation_folder,
                 self.alignment_json,
@@ -201,6 +203,8 @@ class PyNutil:
                 self.points_labels,
                 self.centroids_labels,
                 self.atlas_labels,
+                self.per_point_undamaged,
+                self.per_centroid_undamaged
             )
             if self.custom_regions_dict is not None:
                 self.custom_label_df, self.label_df = apply_custom_regions(
diff --git a/PyNutil/processing/coordinate_extraction.py b/PyNutil/processing/coordinate_extraction.py
index 37e5358..210be15 100644
--- a/PyNutil/processing/coordinate_extraction.py
+++ b/PyNutil/processing/coordinate_extraction.py
@@ -76,75 +76,6 @@ def create_damage_mask(section, grid_spacing):
 
     return binary_image
 
-def create_threads(
-    segmentations,
-    slices,
-    flat_files,
-    flat_file_nrs,
-    atlas_labels,
-    pixel_id,
-    non_linear,
-    points_list,
-    centroids_list,
-    region_areas_list,
-    object_cutoff,
-    atlas_volume,
-    use_flat,
-):
-    """
-    Creates threads for processing segmentations.
-
-    Args:
-        segmentations (list): List of segmentation files.
-        slices (list): List of slices.
-        flat_files (list): List of flat files.
-        flat_file_nrs (list): List of flat file section numbers.
-        atlas_labels (DataFrame): DataFrame with atlas labels.
-        pixel_id (list): Pixel ID to match.
-        non_linear (bool): Whether to use non-linear transformation.
-        points_list (list): List to store points.
-        centroids_list (list): List to store centroids.
-        region_areas_list (list): List to store region areas.
-        object_cutoff (int): Pixel cutoff to remove small objects.
-        atlas_volume (ndarray): Volume with atlas labels.
-        use_flat (bool): Whether to use flat files.
-
-    Returns:
-        list: List of threads.
-    """
-    threads = []
-    for segmentation_path, index in zip(segmentations, range(len(segmentations))):
-        seg_nr = int(number_sections([segmentation_path])[0])
-        current_slice_index = np.where([s["nr"] == seg_nr for s in slices])
-        current_slice = slices[current_slice_index[0][0]]
-        if current_slice["anchoring"] == []:
-            continue
-        current_flat = get_current_flat_file(
-            seg_nr, flat_files, flat_file_nrs, use_flat
-        )
-
-        x = threading.Thread(
-            target=segmentation_to_atlas_space,
-            args=(
-                current_slice,
-                segmentation_path,
-                atlas_labels,
-                current_flat,
-                pixel_id,
-                non_linear,
-                points_list,
-                centroids_list,
-                region_areas_list,
-                index,
-                object_cutoff,
-                atlas_volume,
-                use_flat,
-            ),
-        )
-        threads.append(x)
-    return threads
-
-
 def folder_to_atlas_space(
     folder,
     quint_alignment,
@@ -174,7 +105,7 @@ def folder_to_atlas_space(
     slices, gridspacing = load_visualign_json(quint_alignment)
     segmentations = get_segmentations(folder)
     flat_files, flat_file_nrs = get_flat_files(folder, use_flat)
-    points_list, centroids_list, region_areas_list, centroids_labels, points_labels = (
+    points_list, centroids_list, region_areas_list, centroids_labels, points_labels,per_point_undamaged_list,per_centroid_undamaged_list  = (
         initialize_lists(len(segmentations))
     )
     threads = create_threads(
@@ -190,14 +121,16 @@ def folder_to_atlas_space(
         centroids_labels,
         points_labels,
         region_areas_list,
+        per_point_undamaged_list,
+        per_centroid_undamaged_list,
         object_cutoff,
         atlas_volume,
         use_flat,
         gridspacing
     )
     start_and_join_threads(threads)
-    points, centroids, points_labels, centroids_labels, points_len, centroids_len = (
-        process_results(points_list, centroids_list, points_labels, centroids_labels)
+    points, centroids, points_labels, centroids_labels, points_len, centroids_len, per_point_undamaged_list, per_centroid_undamaged_list = (
+        process_results(points_list, centroids_list, points_labels, centroids_labels, per_point_undamaged_list, per_centroid_undamaged_list)
     )
     return (
         points,
@@ -208,6 +141,8 @@ def folder_to_atlas_space(
         points_len,
         centroids_len,
         segmentations,
+        per_point_undamaged_list,
+        per_centroid_undamaged_list
     )
 
 
@@ -225,7 +160,8 @@ def initialize_lists(length):
     centroids_list = [np.array([])] * length
     centroids_labels = [np.array([])] * length
     points_labels = [np.array([])] * length
-
+    per_point_undamaged_list = [np.array([])] * length
+    per_centroid_undamaged_list = [np.array([])] * length
     region_areas_list = [
         pd.DataFrame(
             {
@@ -247,6 +183,8 @@ def initialize_lists(length):
         region_areas_list,
         centroids_labels,
         points_labels,
+        per_point_undamaged_list,
+        per_centroid_undamaged_list
     )
 
 
@@ -263,6 +201,8 @@ def create_threads(
     centroids_labels,
     points_labels,
     region_areas_list,
+    per_point_undamaged_list,
+    per_centroid_undamaged_list,
     object_cutoff,
     atlas_volume,
     use_flat,
@@ -316,6 +256,8 @@ def create_threads(
                 points_labels,
                 centroids_labels,
                 region_areas_list,
+                per_point_undamaged_list,
+                per_centroid_undamaged_list,
                 index,
                 object_cutoff,
                 atlas_volume,
@@ -406,6 +348,8 @@ def segmentation_to_atlas_space(
     points_labels=None,
     centroids_labels=None,
     region_areas_list=None,
+    per_point_undamaged_list=None,
+    per_centroid_undamaged_list=None,
     index=None,
     object_cutoff=0,
     atlas_volume=None,
@@ -467,15 +411,26 @@ def segmentation_to_atlas_space(
     per_centroid_labels = atlas_map[
         np.round(scaled_centroidsX).astype(int), np.round(scaled_centroidsY).astype(int)
     ]
-    # per_point_region =
+    if damage_mask is not None:
+        damage_mask = cv2.resize(damage_mask.astype(np.uint8), (atlas_map.shape[::-1]), interpolation=cv2.INTER_NEAREST).astype(bool)
+        per_point_undamaged = damage_mask[
+                np.round(scaled_x).astype(int), np.round(scaled_y).astype(int)
+            ]
+        per_centroid_undamaged = damage_mask[
+            np.round(scaled_centroidsX).astype(int), np.round(scaled_centroidsY).astype(int)
+            ]
+    else:
+        per_point_undamaged = np.ones(scaled_x.shape, dtype=bool)
+        per_centroid_undamaged =  np.ones(scaled_centroidsX.shape, dtype=bool)
+    per_point_labels = per_point_labels[per_point_undamaged]
+    per_centroid_labels = per_centroid_labels[per_centroid_undamaged]
     new_x, new_y, centroids_new_x, centroids_new_y = get_transformed_coordinates(
         non_linear,
         slice_dict,
-        scaled_x,
-        scaled_y,
-        centroids,
-        scaled_centroidsX,
-        scaled_centroidsY,
+        scaled_x[per_point_undamaged],
+        scaled_y[per_point_undamaged],
+        scaled_centroidsX[per_centroid_undamaged],
+        scaled_centroidsY[per_centroid_undamaged],
         triangulation,
     )
     points, centroids = transform_points_to_atlas_space(
@@ -493,7 +448,11 @@ def segmentation_to_atlas_space(
     centroids_labels[index] = np.array(
         per_centroid_labels if centroids is not None else []
     )
+    per_centroid_undamaged_list[index] = np.array(
+        per_centroid_undamaged if centroids is not None else []
+    )
     points_labels[index] = np.array(per_point_labels if points is not None else [])
+    per_point_undamaged_list[index] = np.array(per_point_undamaged if points is not None else [])
 
 
 def get_triangulation(slice_dict, reg_width, reg_height, non_linear):
diff --git a/PyNutil/processing/counting_and_load.py b/PyNutil/processing/counting_and_load.py
index c09dc67..c52c45a 100644
--- a/PyNutil/processing/counting_and_load.py
+++ b/PyNutil/processing/counting_and_load.py
@@ -50,7 +50,7 @@ def label_points(points, label_volume, scale_factor=1):
 
 # related to counting_and_load
 def pixel_count_per_region(
-    labels_dict_points, labeled_dict_centroids, df_label_colours
+    labels_dict_points, labeled_dict_centroids, current_points_undamaged, current_centroids_undamaged, df_label_colours
 ):
     """
     Counts the number of pixels per region and writes to a DataFrame.
@@ -63,47 +63,91 @@ def pixel_count_per_region(
     Returns:
         DataFrame: DataFrame with counts and colours per region.
     """
-    counted_labels_points, label_counts_points = np.unique(
-        labels_dict_points, return_counts=True
+    counted_labels_points_undamaged, label_counts_points_undamaged = np.unique(
+        labels_dict_points[current_points_undamaged], return_counts=True
     )
-    counted_labels_centroids, label_counts_centroids = np.unique(
-        labeled_dict_centroids, return_counts=True
+    counted_labels_points_damaged, label_counts_points_damaged = np.unique(
+        labels_dict_points[~current_points_undamaged], return_counts=True
     )
-    # Which regions have pixels, and how many pixels are there per region
-    dummy_column = np.zeros(len(counted_labels_points))
-    counts_per_label = list(
-        zip(counted_labels_points, label_counts_points, dummy_column)
+    counted_labels_centroids_undamaged, label_counts_centroids_undamaged = np.unique(
+        labeled_dict_centroids[current_centroids_undamaged], return_counts=True
+    )
+    counted_labels_centroids_damaged, label_counts_centroids_damaged = np.unique(
+        labeled_dict_centroids[~current_centroids_undamaged], return_counts=True
     )
+    # Which regions have pixels, and how many pixels are there per region
     # Create a list of unique regions and pixel counts per region
+    counts_per_label = {
+        "idx" : [],
+        "name" : [],
+        "r" : [],
+        "g" : [],
+        "b" : [],
+        "pixel_count" : [],
+        "undamaged_pixel_count" : [],
+        "damaged_pixel_counts" : [],
+        "object_count" : [],
+        "undamaged_object_count" : [],
+        "damaged_object_count" : [],
+        }
+    for index, row in df_label_colours.iterrows():
+        if row["idx"] in counted_labels_points_undamaged:
+            clpu = label_counts_points_undamaged[counted_labels_points_undamaged == row["idx"]][0]
+        else:
+            clpu = 0
+        if row["idx"] in counted_labels_points_damaged:
+            clpd = label_counts_points_damaged[counted_labels_points_damaged == row["idx"]][0]
+        else:
+            clpd = 0
+        if row["idx"] in counted_labels_centroids_undamaged:
+            clcu = counted_labels_centroids_undamaged[counted_labels_centroids_undamaged == row["idx"]][0]
+        else:
+            clcu = 0
+        if row["idx"] in counted_labels_centroids_damaged:
+            clcd = counted_labels_centroids_damaged[counted_labels_centroids_damaged == row["idx"]][0]
+        else:
+            clcd = 0
+        if clcd==clcu==clpd==clpu==0:
+            continue
+
+        counts_per_label["idx"].append(
+            row["idx"]
+        )
+        counts_per_label["name"].append(
+            row["name"]
+        )
+        counts_per_label["r"].append(
+            int(row["r"])
+        )
+        counts_per_label["g"].append(
+            int(row["g"])
+        )
+        counts_per_label["b"].append(
+            int(row["b"])
+        )
+        counts_per_label["pixel_count"].append(
+            clpu + clpd
+        )
+        counts_per_label["undamaged_pixel_count"].append(
+            clpu
+        )
+        counts_per_label["damaged_pixel_counts"].append(
+            clpd
+        )
+        counts_per_label["object_count"].append(
+            clcu + clcd
+        )
+        counts_per_label["undamaged_object_count"].append(
+            clcu
+        )
+        counts_per_label["damaged_object_count"].append(
+            clcd
+        )
+
     df_counts_per_label = pd.DataFrame(
-        counts_per_label, columns=["idx", "pixel_count", "object_count"]
+        counts_per_label
     )
-    for clc, lcc in zip(counted_labels_centroids, label_counts_centroids):
-        df_counts_per_label.loc[df_counts_per_label["idx"] == clc, "object_count"] = lcc
-    new_rows = []
-    for index, row in df_counts_per_label.iterrows():
-        mask = df_label_colours["idx"] == row["idx"].astype(int)
-        current_region_row = df_label_colours[mask]
-        current_region_name = current_region_row["name"].values
-        current_region_red = current_region_row["r"].values
-        current_region_green = current_region_row["g"].values
-        current_region_blue = current_region_row["b"].values
-        row["name"] = current_region_name[0]
-        row["r"] = int(current_region_red[0])
-        row["g"] = int(current_region_green[0])
-        row["b"] = int(current_region_blue[0])
-        new_rows.append(row)
-
-    df_counts_per_label_name = pd.DataFrame(
-        new_rows, columns=["idx", "name", "pixel_count", "object_count", "r", "g", "b"]
-    )
-    # Task for Sharon:
-    # If you can get the areas per region from the flat file here
-    # you can then use those areas to calculate the load per region here
-    # and add to dataframe
-    # see messing around pyflat.py
-
-    return df_counts_per_label_name
+    return df_counts_per_label
 
 
 """Read flat file and write into an np array"""
@@ -298,6 +342,9 @@ def flat_to_dataframe(
         df_area_per_label["region_area"] = df_area_per_label["undamaged_region_area"] + df_area_per_label["damaged_region_area"]
     else:
         df_area_per_label = count_pixels_per_label(image, scale_factor)
+        df_area_per_label["undamaged_region_area"] = df_area_per_label["region_area"]
+        df_area_per_label["damaged_region_area"] = 0
+
     return df_area_per_label
 
 
diff --git a/PyNutil/processing/data_analysis.py b/PyNutil/processing/data_analysis.py
index cd7c3e2..70ab405 100644
--- a/PyNutil/processing/data_analysis.py
+++ b/PyNutil/processing/data_analysis.py
@@ -36,7 +36,6 @@ def apply_custom_regions(df, custom_regions_dict):
     # Update the original df with new columns
     df["custom_region_name"] = df["idx"].map(name_mapping).fillna("")
     temp_df = df.copy()
-    temp_df["idx"] = temp_df["idx"].map(id_mapping)
 
     temp_df["r"] = temp_df["idx"].map(
         lambda x: rgb_mapping[x][0] if x in rgb_mapping else None
@@ -47,6 +46,7 @@ def apply_custom_regions(df, custom_regions_dict):
     temp_df["b"] = temp_df["idx"].map(
         lambda x: rgb_mapping[x][2] if x in rgb_mapping else None
     )
+    temp_df["idx"] = temp_df["idx"].map(id_mapping)
 
     # Group and aggregate
     grouped_df = (
@@ -55,8 +55,14 @@ def apply_custom_regions(df, custom_regions_dict):
         .agg(
             {
                 "pixel_count": "sum",
+                "undamaged_pixel_count": "sum",
+                "damaged_pixel_counts": "sum",
                 "region_area": "sum",
+                "undamaged_region_area": "sum",
+                "damaged_region_area": "sum",
                 "object_count": "sum",
+                "undamaged_object_count": "sum",
+                "damaged_object_count": "sum",
                 "r": "first",
                 "g": "first",
                 "b": "first",
@@ -68,6 +74,7 @@ def apply_custom_regions(df, custom_regions_dict):
     grouped_df = grouped_df.rename(columns={"custom_region_name": "name"})
 
     grouped_df["area_fraction"] = grouped_df["pixel_count"] / grouped_df["region_area"]
+    grouped_df["undamaged_area_fraction"] = grouped_df["undamaged_pixel_count"] / grouped_df["undamaged_region_area"]
     common_columns = [col for col in df.columns if col in grouped_df.columns]
     grouped_df = grouped_df.reindex(
         columns=common_columns
@@ -82,7 +89,8 @@ def quantify_labeled_points(
     labeled_points,
     labeled_points_centroids,
     atlas_labels,
-    # atlas_volume,
+    per_point_undamaged,
+    per_centroid_undamaged
 ):
     """
     Quantifies labeled points and returns various DataFrames.
@@ -109,6 +117,8 @@ def quantify_labeled_points(
         centroids_len,
         region_areas_list,
         atlas_labels,
+        per_point_undamaged,
+        per_centroid_undamaged
     )
     label_df = _combine_slice_reports(per_section_df, atlas_labels)
 
@@ -122,6 +132,8 @@ def _quantify_per_section(
     centroids_len,
     region_areas_list,
     atlas_labels,
+    per_point_undamaged,
+    per_centroid_undamaged
 ):
     """
     Quantifies labeled points per section.
@@ -144,8 +156,10 @@ def _quantify_per_section(
     for pl, cl, ra in zip(points_len, centroids_len, region_areas_list):
         current_centroids = labeled_points_centroids[prev_cl : prev_cl + cl]
         current_points = labeled_points[prev_pl : prev_pl + pl]
+        current_points_undamaged = per_point_undamaged[prev_pl : prev_pl + pl]
+        current_centroids_undamaged = per_centroid_undamaged[prev_cl : prev_cl + cl]
         current_df = pixel_count_per_region(
-            current_points, current_centroids, atlas_labels
+            current_points, current_centroids, current_points_undamaged, current_centroids_undamaged, atlas_labels
         )
         current_df_new = _merge_dataframes(current_df, ra, atlas_labels)
         per_section_df.append(current_df_new)
@@ -197,6 +211,7 @@ def _combine_slice_reports(per_section_df, atlas_labels):
         .drop(columns=["area_fraction"])
     )
     label_df["area_fraction"] = label_df["pixel_count"] / label_df["region_area"]
+    label_df["undamaged_area_fraction"] = label_df["undamaged_pixel_count"] / label_df["undamaged_region_area"]
     label_df.fillna(0, inplace=True)
 
     label_df = label_df.set_index("idx")
diff --git a/PyNutil/processing/transformations.py b/PyNutil/processing/transformations.py
index a61fd0c..62b8ddb 100644
--- a/PyNutil/processing/transformations.py
+++ b/PyNutil/processing/transformations.py
@@ -52,7 +52,6 @@ def get_transformed_coordinates(
     slice_dict,
     scaled_x,
     scaled_y,
-    centroids,
     scaled_centroidsX,
     scaled_centroidsY,
     triangulation,
@@ -77,7 +76,7 @@ def get_transformed_coordinates(
     if non_linear and "markers" in slice_dict:
         if scaled_x is not None:
             new_x, new_y = transform_vec(triangulation, scaled_x, scaled_y)
-        if centroids is not None:
+        if scaled_centroidsX is not None:
             centroids_new_x, centroids_new_y = transform_vec(
                 triangulation, scaled_centroidsX, scaled_centroidsY
             )
diff --git a/PyNutil/processing/utils.py b/PyNutil/processing/utils.py
index a7162d7..4f3e800 100644
--- a/PyNutil/processing/utils.py
+++ b/PyNutil/processing/utils.py
@@ -198,7 +198,7 @@ def start_and_join_threads(threads):
     [t.join() for t in threads]
 
 
-def process_results(points_list, centroids_list, points_labels, centroids_labels):
+def process_results(points_list, centroids_list, points_labels, centroids_labels, points_undamaged_list, centroids_undamaged_list):
     """
     Processes the results from the threads.
 
@@ -219,16 +219,24 @@ def process_results(points_list, centroids_list, points_labels, centroids_labels
     ]
     points_labels = [pl for pl in points_labels if None not in pl]
     centroids_labels = [cl for cl in centroids_labels if None not in cl]
+    points_undamaged_list = [pul for pul in points_undamaged_list if None not in pul]
+    centroids_undamaged_list = [cul for cul in centroids_undamaged_list if None not in cul]
     if len(points_list) == 0:
         points = np.array([])
         points_labels = np.array([])
+        points_undamaged = np.array([])
     else:
         points = np.concatenate(points_list)
         points_labels = np.concatenate(points_labels)
+        points_undamaged = np.concatenate(points_undamaged_list)
+
     if len(centroids_list) == 0:
         centroids = np.array([])
         centroids_labels = np.array([])
+        centroids_undamaged = np.array([])
     else:
         centroids = np.concatenate(centroids_list)
         centroids_labels = np.concatenate(centroids_labels)
-    return points, centroids, points_labels, centroids_labels, points_len, centroids_len
+        centroids_undamaged = np.concatenate(centroids_undamaged_list)
+
+    return points, centroids, points_labels, centroids_labels, points_len, centroids_len, points_undamaged, centroids_undamaged
diff --git a/demos/basic_example_custom_atlas.py b/demos/basic_example_custom_atlas.py
index 63d4f00..d586e12 100644
--- a/demos/basic_example_custom_atlas.py
+++ b/demos/basic_example_custom_atlas.py
@@ -40,4 +40,4 @@ pnt = PyNutil(
 )
 pnt.get_coordinates(object_cutoff=0, use_flat=False)
 pnt.quantify_coordinates()
-pnt.save_analysis("../test_result/damage_regions_test_14_02_2025")
+pnt.save_analysis("../test_result/damage_regions_test_17_03_2025")
-- 
GitLab