inference.pp_cluster.infer

Post-processing Clustering

Provides functionalities related to post-processing clustering using the radiomics features.

  1"""
  2### Post-processing Clustering
  3
  4Provides functionalities related to post-processing clustering using the radiomics features.
  5"""
  6
  7from pathlib import Path
  8import pandas as pd
  9import pickle
 10import argparse
 11import json
 12from typing import Any, List, Dict
 13
 14
 15def load_json(path: Path) -> Any:
 16    """
 17    Loads a JSON file from the specified path.
 18
 19    Args:
 20        path (Path): A Path representing the file path.
 21
 22    Returns:
 23        Any: The data loaded from the JSON file.
 24    """
 25    with open(path, 'r') as f:
 26        return json.load(f)
 27
 28
 29def save_json(path: Path, data: Any) -> None:
 30    """
 31    Saves data to a JSON file at the specified path.
 32
 33    Args:
 34        path (Path): A Path representing the file path.
 35        data (Any): The data to be serialized and saved.
 36    """
 37    with open(path, 'w') as f:
 38        json.dump(data, f, indent=4)
 39
 40
 41def get_cluster(radiomics_df: pd.DataFrame, cluster_artifacts: Dict[str, Any]) -> List[int]:
 42    """
 43    Assigns clusters to each case based on radiomics features.
 44
 45    Args:
 46        radiomics_df (pd.DataFrame): DataFrame containing radiomics features.
 47        cluster_artifacts (Dict[str, Any]): Dictionary containing cluster artifacts including normalizer, PCA, and KMeans models.
 48
 49    Returns:
 50        List[int]: List of cluster assignments for each case.
 51    """
 52    # Normalize the radiomics features
 53    normalizer = cluster_artifacts['normalizer']
 54    normal_values = normalizer.transform(radiomics_df.iloc[:, 1:].values)
 55    
 56    # Apply PCA transformation
 57    pca = cluster_artifacts['pca']
 58    scores_pca = pca.transform(normal_values)
 59    
 60    # Predict cluster assignments using KMeans
 61    kmeans = cluster_artifacts['kmeans']
 62    cluster_assignment = kmeans.predict(scores_pca)
 63    
 64    return cluster_assignment.tolist()
 65
 66
 67def get_cluster_artifacts(task: str) -> Dict[str, Any]:
 68    """
 69    Retrieves cluster artifacts based on the specified task.
 70
 71    Args:
 72        task (str): The task identifier (e.g., 'BraTS-SSA', 'BraTS-PED').
 73
 74    Returns:
 75        Dict[str, Any]: Dictionary containing cluster artifacts.
 76    """
 77    script_dir = Path(__file__).resolve().parent.parent
 78    if task == 'BraTS-SSA':
 79        pkl_path = script_dir / "kmeans-cluster-artifacts" / "SSA_cluster.pkl"
 80    elif task == 'BraTS-PED':
 81        pkl_path = script_dir / "kmeans-cluster-artifacts" / "PEDS_cluster.pkl"
 82    else:
 83        raise ValueError(f"Unsupported task: {task}")
 84    
 85    with open(pkl_path, 'rb') as f:
 86        return pickle.load(f)
 87
 88
 89def main() -> None:
 90    """
 91    Main function to assign clusters to cases based on radiomics features.
 92
 93    Parses command-line arguments, loads radiomics data and cluster artifacts,
 94    assigns clusters, and saves the updated data to a JSON file.
 95    """
 96    parser = argparse.ArgumentParser(description='Cluster Assignment for BraTS Data')
 97    parser.add_argument("-i", "--input_json", type=str, required=True, help="Path to input radiomics JSON file")
 98    parser.add_argument("-o", "--output_json", type=str, required=True, help="Path to output JSON file with cluster assignments")
 99    parser.add_argument("-c", "--cluster_pickle", type=str, required=True, help="Path to cluster artifacts pickle file")
100    args = parser.parse_args()
101
102    # Load radiomics data from JSON
103    data = load_json(Path(args.input_json))
104    df_radiomics = pd.DataFrame(data)
105
106    # Load cluster artifacts from pickle file
107    with open(Path(args.cluster_pickle), 'rb') as f:
108        cluster_artifacts = pickle.load(f)
109
110    # Assign clusters based on radiomics features
111    cluster_assignment = get_cluster(df_radiomics, cluster_artifacts)
112
113    # Update each case with its cluster assignment
114    for case, cluster in zip(data, cluster_assignment):
115        case['cluster'] = int(cluster)
116
117    # Save the updated data to the output JSON file
118    save_json(Path(args.output_json), data)
119
120
121if __name__ == '__main__':
122    main()
123
124# Example Command to Run the Script:
125# python pp_cluster/infer.py -i /home/abhijeet/Code/BraTS2024/datalist/radiomics/BraTS2024-GLI-training_data1_v2-radiomics.json -o datalist/pp_cluster_assignment/GLI.json -c kmeans-cluster-artifacts/GLI_cluster.pkl
def load_json(path: pathlib.Path) -> Any:
16def load_json(path: Path) -> Any:
17    """
18    Loads a JSON file from the specified path.
19
20    Args:
21        path (Path): A Path representing the file path.
22
23    Returns:
24        Any: The data loaded from the JSON file.
25    """
26    with open(path, 'r') as f:
27        return json.load(f)

Loads a JSON file from the specified path.

Args: path (Path): A Path representing the file path.

Returns: Any: The data loaded from the JSON file.

def save_json(path: pathlib.Path, data: Any) -> None:
30def save_json(path: Path, data: Any) -> None:
31    """
32    Saves data to a JSON file at the specified path.
33
34    Args:
35        path (Path): A Path representing the file path.
36        data (Any): The data to be serialized and saved.
37    """
38    with open(path, 'w') as f:
39        json.dump(data, f, indent=4)

Saves data to a JSON file at the specified path.

Args: path (Path): A Path representing the file path. data (Any): The data to be serialized and saved.

def get_cluster( radiomics_df: pandas.core.frame.DataFrame, cluster_artifacts: Dict[str, Any]) -> List[int]:
42def get_cluster(radiomics_df: pd.DataFrame, cluster_artifacts: Dict[str, Any]) -> List[int]:
43    """
44    Assigns clusters to each case based on radiomics features.
45
46    Args:
47        radiomics_df (pd.DataFrame): DataFrame containing radiomics features.
48        cluster_artifacts (Dict[str, Any]): Dictionary containing cluster artifacts including normalizer, PCA, and KMeans models.
49
50    Returns:
51        List[int]: List of cluster assignments for each case.
52    """
53    # Normalize the radiomics features
54    normalizer = cluster_artifacts['normalizer']
55    normal_values = normalizer.transform(radiomics_df.iloc[:, 1:].values)
56    
57    # Apply PCA transformation
58    pca = cluster_artifacts['pca']
59    scores_pca = pca.transform(normal_values)
60    
61    # Predict cluster assignments using KMeans
62    kmeans = cluster_artifacts['kmeans']
63    cluster_assignment = kmeans.predict(scores_pca)
64    
65    return cluster_assignment.tolist()

Assigns clusters to each case based on radiomics features.

Args: radiomics_df (pd.DataFrame): DataFrame containing radiomics features. cluster_artifacts (Dict[str, Any]): Dictionary containing cluster artifacts including normalizer, PCA, and KMeans models.

Returns: List[int]: List of cluster assignments for each case.

def get_cluster_artifacts(task: str) -> Dict[str, Any]:
68def get_cluster_artifacts(task: str) -> Dict[str, Any]:
69    """
70    Retrieves cluster artifacts based on the specified task.
71
72    Args:
73        task (str): The task identifier (e.g., 'BraTS-SSA', 'BraTS-PED').
74
75    Returns:
76        Dict[str, Any]: Dictionary containing cluster artifacts.
77    """
78    script_dir = Path(__file__).resolve().parent.parent
79    if task == 'BraTS-SSA':
80        pkl_path = script_dir / "kmeans-cluster-artifacts" / "SSA_cluster.pkl"
81    elif task == 'BraTS-PED':
82        pkl_path = script_dir / "kmeans-cluster-artifacts" / "PEDS_cluster.pkl"
83    else:
84        raise ValueError(f"Unsupported task: {task}")
85    
86    with open(pkl_path, 'rb') as f:
87        return pickle.load(f)

Retrieves cluster artifacts based on the specified task.

Args: task (str): The task identifier (e.g., 'BraTS-SSA', 'BraTS-PED').

Returns: Dict[str, Any]: Dictionary containing cluster artifacts.

def main() -> None:
 90def main() -> None:
 91    """
 92    Main function to assign clusters to cases based on radiomics features.
 93
 94    Parses command-line arguments, loads radiomics data and cluster artifacts,
 95    assigns clusters, and saves the updated data to a JSON file.
 96    """
 97    parser = argparse.ArgumentParser(description='Cluster Assignment for BraTS Data')
 98    parser.add_argument("-i", "--input_json", type=str, required=True, help="Path to input radiomics JSON file")
 99    parser.add_argument("-o", "--output_json", type=str, required=True, help="Path to output JSON file with cluster assignments")
100    parser.add_argument("-c", "--cluster_pickle", type=str, required=True, help="Path to cluster artifacts pickle file")
101    args = parser.parse_args()
102
103    # Load radiomics data from JSON
104    data = load_json(Path(args.input_json))
105    df_radiomics = pd.DataFrame(data)
106
107    # Load cluster artifacts from pickle file
108    with open(Path(args.cluster_pickle), 'rb') as f:
109        cluster_artifacts = pickle.load(f)
110
111    # Assign clusters based on radiomics features
112    cluster_assignment = get_cluster(df_radiomics, cluster_artifacts)
113
114    # Update each case with its cluster assignment
115    for case, cluster in zip(data, cluster_assignment):
116        case['cluster'] = int(cluster)
117
118    # Save the updated data to the output JSON file
119    save_json(Path(args.output_json), data)

Main function to assign clusters to cases based on radiomics features.

Parses command-line arguments, loads radiomics data and cluster artifacts, assigns clusters, and saves the updated data to a JSON file.