inference.pp_cluster.infer
Post-processing Clustering
Provides functionalities related to post-processing clustering using the radiomics features.
1""" 2### Post-processing Clustering 3 4Provides functionalities related to post-processing clustering using the radiomics features. 5""" 6 7from pathlib import Path 8import pandas as pd 9import pickle 10import argparse 11import json 12from typing import Any, List, Dict 13 14 15def load_json(path: Path) -> Any: 16 """ 17 Loads a JSON file from the specified path. 18 19 Args: 20 path (Path): A Path representing the file path. 21 22 Returns: 23 Any: The data loaded from the JSON file. 24 """ 25 with open(path, 'r') as f: 26 return json.load(f) 27 28 29def save_json(path: Path, data: Any) -> None: 30 """ 31 Saves data to a JSON file at the specified path. 32 33 Args: 34 path (Path): A Path representing the file path. 35 data (Any): The data to be serialized and saved. 36 """ 37 with open(path, 'w') as f: 38 json.dump(data, f, indent=4) 39 40 41def get_cluster(radiomics_df: pd.DataFrame, cluster_artifacts: Dict[str, Any]) -> List[int]: 42 """ 43 Assigns clusters to each case based on radiomics features. 44 45 Args: 46 radiomics_df (pd.DataFrame): DataFrame containing radiomics features. 47 cluster_artifacts (Dict[str, Any]): Dictionary containing cluster artifacts including normalizer, PCA, and KMeans models. 48 49 Returns: 50 List[int]: List of cluster assignments for each case. 51 """ 52 # Normalize the radiomics features 53 normalizer = cluster_artifacts['normalizer'] 54 normal_values = normalizer.transform(radiomics_df.iloc[:, 1:].values) 55 56 # Apply PCA transformation 57 pca = cluster_artifacts['pca'] 58 scores_pca = pca.transform(normal_values) 59 60 # Predict cluster assignments using KMeans 61 kmeans = cluster_artifacts['kmeans'] 62 cluster_assignment = kmeans.predict(scores_pca) 63 64 return cluster_assignment.tolist() 65 66 67def get_cluster_artifacts(task: str) -> Dict[str, Any]: 68 """ 69 Retrieves cluster artifacts based on the specified task. 70 71 Args: 72 task (str): The task identifier (e.g., 'BraTS-SSA', 'BraTS-PED'). 73 74 Returns: 75 Dict[str, Any]: Dictionary containing cluster artifacts. 76 """ 77 script_dir = Path(__file__).resolve().parent.parent 78 if task == 'BraTS-SSA': 79 pkl_path = script_dir / "kmeans-cluster-artifacts" / "SSA_cluster.pkl" 80 elif task == 'BraTS-PED': 81 pkl_path = script_dir / "kmeans-cluster-artifacts" / "PEDS_cluster.pkl" 82 else: 83 raise ValueError(f"Unsupported task: {task}") 84 85 with open(pkl_path, 'rb') as f: 86 return pickle.load(f) 87 88 89def main() -> None: 90 """ 91 Main function to assign clusters to cases based on radiomics features. 92 93 Parses command-line arguments, loads radiomics data and cluster artifacts, 94 assigns clusters, and saves the updated data to a JSON file. 95 """ 96 parser = argparse.ArgumentParser(description='Cluster Assignment for BraTS Data') 97 parser.add_argument("-i", "--input_json", type=str, required=True, help="Path to input radiomics JSON file") 98 parser.add_argument("-o", "--output_json", type=str, required=True, help="Path to output JSON file with cluster assignments") 99 parser.add_argument("-c", "--cluster_pickle", type=str, required=True, help="Path to cluster artifacts pickle file") 100 args = parser.parse_args() 101 102 # Load radiomics data from JSON 103 data = load_json(Path(args.input_json)) 104 df_radiomics = pd.DataFrame(data) 105 106 # Load cluster artifacts from pickle file 107 with open(Path(args.cluster_pickle), 'rb') as f: 108 cluster_artifacts = pickle.load(f) 109 110 # Assign clusters based on radiomics features 111 cluster_assignment = get_cluster(df_radiomics, cluster_artifacts) 112 113 # Update each case with its cluster assignment 114 for case, cluster in zip(data, cluster_assignment): 115 case['cluster'] = int(cluster) 116 117 # Save the updated data to the output JSON file 118 save_json(Path(args.output_json), data) 119 120 121if __name__ == '__main__': 122 main() 123 124# Example Command to Run the Script: 125# python pp_cluster/infer.py -i /home/abhijeet/Code/BraTS2024/datalist/radiomics/BraTS2024-GLI-training_data1_v2-radiomics.json -o datalist/pp_cluster_assignment/GLI.json -c kmeans-cluster-artifacts/GLI_cluster.pkl
16def load_json(path: Path) -> Any: 17 """ 18 Loads a JSON file from the specified path. 19 20 Args: 21 path (Path): A Path representing the file path. 22 23 Returns: 24 Any: The data loaded from the JSON file. 25 """ 26 with open(path, 'r') as f: 27 return json.load(f)
Loads a JSON file from the specified path.
Args: path (Path): A Path representing the file path.
Returns: Any: The data loaded from the JSON file.
30def save_json(path: Path, data: Any) -> None: 31 """ 32 Saves data to a JSON file at the specified path. 33 34 Args: 35 path (Path): A Path representing the file path. 36 data (Any): The data to be serialized and saved. 37 """ 38 with open(path, 'w') as f: 39 json.dump(data, f, indent=4)
Saves data to a JSON file at the specified path.
Args: path (Path): A Path representing the file path. data (Any): The data to be serialized and saved.
42def get_cluster(radiomics_df: pd.DataFrame, cluster_artifacts: Dict[str, Any]) -> List[int]: 43 """ 44 Assigns clusters to each case based on radiomics features. 45 46 Args: 47 radiomics_df (pd.DataFrame): DataFrame containing radiomics features. 48 cluster_artifacts (Dict[str, Any]): Dictionary containing cluster artifacts including normalizer, PCA, and KMeans models. 49 50 Returns: 51 List[int]: List of cluster assignments for each case. 52 """ 53 # Normalize the radiomics features 54 normalizer = cluster_artifacts['normalizer'] 55 normal_values = normalizer.transform(radiomics_df.iloc[:, 1:].values) 56 57 # Apply PCA transformation 58 pca = cluster_artifacts['pca'] 59 scores_pca = pca.transform(normal_values) 60 61 # Predict cluster assignments using KMeans 62 kmeans = cluster_artifacts['kmeans'] 63 cluster_assignment = kmeans.predict(scores_pca) 64 65 return cluster_assignment.tolist()
Assigns clusters to each case based on radiomics features.
Args: radiomics_df (pd.DataFrame): DataFrame containing radiomics features. cluster_artifacts (Dict[str, Any]): Dictionary containing cluster artifacts including normalizer, PCA, and KMeans models.
Returns: List[int]: List of cluster assignments for each case.
68def get_cluster_artifacts(task: str) -> Dict[str, Any]: 69 """ 70 Retrieves cluster artifacts based on the specified task. 71 72 Args: 73 task (str): The task identifier (e.g., 'BraTS-SSA', 'BraTS-PED'). 74 75 Returns: 76 Dict[str, Any]: Dictionary containing cluster artifacts. 77 """ 78 script_dir = Path(__file__).resolve().parent.parent 79 if task == 'BraTS-SSA': 80 pkl_path = script_dir / "kmeans-cluster-artifacts" / "SSA_cluster.pkl" 81 elif task == 'BraTS-PED': 82 pkl_path = script_dir / "kmeans-cluster-artifacts" / "PEDS_cluster.pkl" 83 else: 84 raise ValueError(f"Unsupported task: {task}") 85 86 with open(pkl_path, 'rb') as f: 87 return pickle.load(f)
Retrieves cluster artifacts based on the specified task.
Args: task (str): The task identifier (e.g., 'BraTS-SSA', 'BraTS-PED').
Returns: Dict[str, Any]: Dictionary containing cluster artifacts.
90def main() -> None: 91 """ 92 Main function to assign clusters to cases based on radiomics features. 93 94 Parses command-line arguments, loads radiomics data and cluster artifacts, 95 assigns clusters, and saves the updated data to a JSON file. 96 """ 97 parser = argparse.ArgumentParser(description='Cluster Assignment for BraTS Data') 98 parser.add_argument("-i", "--input_json", type=str, required=True, help="Path to input radiomics JSON file") 99 parser.add_argument("-o", "--output_json", type=str, required=True, help="Path to output JSON file with cluster assignments") 100 parser.add_argument("-c", "--cluster_pickle", type=str, required=True, help="Path to cluster artifacts pickle file") 101 args = parser.parse_args() 102 103 # Load radiomics data from JSON 104 data = load_json(Path(args.input_json)) 105 df_radiomics = pd.DataFrame(data) 106 107 # Load cluster artifacts from pickle file 108 with open(Path(args.cluster_pickle), 'rb') as f: 109 cluster_artifacts = pickle.load(f) 110 111 # Assign clusters based on radiomics features 112 cluster_assignment = get_cluster(df_radiomics, cluster_artifacts) 113 114 # Update each case with its cluster assignment 115 for case, cluster in zip(data, cluster_assignment): 116 case['cluster'] = int(cluster) 117 118 # Save the updated data to the output JSON file 119 save_json(Path(args.output_json), data)
Main function to assign clusters to cases based on radiomics features.
Parses command-line arguments, loads radiomics data and cluster artifacts, assigns clusters, and saves the updated data to a JSON file.