inference.runner_ped

Pediatric Brain Tumor Segmentation Pipeline

Provides functionalities related to running the pipeline for pediatric brain tumor segmentation in MRI.

View Source

  1"""
  2### Pediatric Brain Tumor Segmentation Pipeline
  3
  4Provides functionalities related to running the pipeline for pediatric brain tumor segmentation in MRI.
  5"""
  6
  7
  8from pathlib import Path
  9import os
 10import shutil
 11import tempfile
 12from typing import Tuple, List, Dict, Any
 13import pandas as pd
 14
 15from .nnunet.install_model import install_model_from_zip
 16from .ensembler.ped_weighted_ensemble import ped_ensemble
 17from .nnunet.runner import run_infer_nnunet
 18from .mednext.runner import run_infer_mednext
 19from .swinunetr.runner import run_infer_swinunetr
 20from .pp_cluster.infer import get_cluster, get_cluster_artifacts
 21from .radiomics.feature_extraction_v2 import extract_all, save_json, load_json
 22from .postproc.postprocess_cc import remove_small_component
 23from .postproc.postprocess_lblredef import label_redefinition
 24
 25# Task identifier
 26TASK: str = 'BraTS-PED'
 27
 28# Read the weights_cv.json
 29# Determine the directory where this script is located
 30script_dir = Path(__file__).resolve().parent
 31    
 32json_path: Path = script_dir / "ensembler" / "weights_cv.json"
 33ENSEMBLE_WEIGHTS: Dict[str, float] = load_json(json_path)[TASK]
 34NAME_MAPPER: Dict[str, str] = load_json(script_dir / "weights" / "name.json")
 35CLUSTER_ARTIFACT: Any = get_cluster_artifacts(TASK)
 36THRESHOLD_FILE_CC: Path = script_dir / "postproc" / "cc_thresholds_cv.json"
 37THRESHOLD_FILE_LBLDEF: Path = script_dir / "postproc" / "lblredef_thresholds_cv.json"
 38
 39# Configuration constants
 40CONSTANTS: Dict[str, str] = {
 41    'pyradiomics_paramfile': str(script_dir / "radiomics" / "params.yaml"),
 42    'nnunet_model_path': str(script_dir / "weights" / "BraTS2024_PEDs_nnunetv2_model.zip"),
 43    'mednext_model_path': str(script_dir / "weights" / "BraTS2024_PEDs_MedNeXt_model.zip"),
 44    'swinunetr_model_path': str(script_dir / "weights" / "swinunetr_peds_trunc.zip"),
 45}
 46
 47
 48def maybe_make_dir(path: Path) -> Path:
 49    """Create a directory if it does not exist.
 50
 51    Args:
 52        path (Path): The path of the directory to create.
 53
 54    Returns:
 55        Path: The path to the created directory.
 56    """
 57    os.makedirs(path, exist_ok=True)
 58    return Path(path)
 59
 60
 61def lbl_redefination(
 62    task: str,
 63    threshold_file: Path,
 64    input_dir: Path,
 65    df_radiomics: pd.DataFrame,
 66    out_dir: Path
 67) -> None:
 68    """Perform label redefinition by copying files from input to output directory.
 69
 70    Args:
 71        task (str): The task identifier.
 72        threshold_file (Path): Path to the threshold file for label redefinition.
 73        input_dir (Path): Directory containing input files.
 74        df_radiomics (pd.DataFrame): DataFrame containing radiomics data.
 75        out_dir (Path): Directory to store output files.
 76    """
 77    # Copy files from input_dir to out_dir
 78    for file in input_dir.iterdir():
 79        shutil.copy(file, out_dir)
 80
 81
 82def postprocess_single(
 83    input_dir: Path,
 84    seg_dir: Path,
 85    out_dir: Path
 86) -> Path:
 87    """Postprocess a single segmentation case.
 88
 89    This function performs radiomics extraction, cluster determination, removes small components,
 90    and redefines labels based on the processed data.
 91
 92    Args:
 93        input_dir (Path): Directory containing the input data.
 94        seg_dir (Path): Directory containing the segmentation results.
 95        out_dir (Path): Directory to store the postprocessed segmentation.
 96
 97    Returns:
 98        Path: Path to the postprocessed segmentation file.
 99    """
100    case_name: str = input_dir.name
101    seg_path: Path = seg_dir / f"{case_name}.nii.gz"
102    out_path: Path = maybe_make_dir(out_dir) / f"{case_name}.nii.gz"
103
104    # Compute radiomics features
105    try:
106        df_radiomics: pd.DataFrame = extract_all(
107            CONSTANTS['pyradiomics_paramfile'],
108            input_dir.parent,
109            case_name,
110            seg_path,
111            1,
112            region='wt',
113            tmpp='/tmp/',
114            seg_suffix='',
115            sequences=['-t1n', '-t1c', '-t2w', '-t2f']
116        )
117        # Determine the cluster based on radiomics features
118        cluster: int = get_cluster(df_radiomics, CLUSTER_ARTIFACT)[0]
119    except Exception as e:
120        print(f"Error in radiomics extraction: {e}")
121        df_radiomics = pd.DataFrame([{"StudyID": case_name}])
122        cluster = 4
123
124    df_radiomics['cluster'] = int(cluster)
125
126    # Remove disconnected regions from segmentation
127    removed_cc_dir: Path = maybe_make_dir(seg_dir / 'remove_cc')
128    remove_small_component(
129        TASK,
130        THRESHOLD_FILE_CC,
131        seg_dir,
132        df_radiomics,
133        removed_cc_dir
134    )
135
136    # Redefine labels based on the postprocessed segmentation
137    label_redefinition(
138        TASK,
139        THRESHOLD_FILE_LBLDEF,
140        removed_cc_dir,
141        df_radiomics,
142        out_dir
143    )
144
145    return out_path
146
147
148def infer_single(
149    input_path: Path,
150    out_dir: Path
151) -> None:
152    """Perform inference on a single input directory.
153
154    This function sets up a temporary directory, copies input files, runs inference models,
155    ensembles the predictions, and postprocesses the segmentation.
156
157    Args:
158        input_path (Path): Input directory containing the 4 nii.gz files.
159        out_dir (Path): Output directory to store the segmentation results.
160    """
161    with tempfile.TemporaryDirectory() as tmpdirname:
162        temp_dir: Path = Path(tmpdirname)
163        print(f'Storing artifacts in temporary directory {temp_dir}')
164
165        input_folder_raw: Path = maybe_make_dir(temp_dir / 'inp')
166        name: str = input_path.name
167        print(f'Processing case: {name}')
168
169        shutil.copytree(input_path, input_folder_raw / name)
170
171        # Rename files based on NAME_MAPPER
172        for key, val in NAME_MAPPER.items():
173            src: Path = input_folder_raw / name / f'{name}{key}'
174            dest: Path = input_folder_raw / name / f'{name}{val}'
175            os.rename(src, dest)
176            one_image: Path = dest
177
178        # Run inference using nnUNet
179        nnunet_npz_path_list: List[Path] = run_infer_nnunet(
180            input_folder_raw / name,
181            maybe_make_dir(temp_dir / 'nnunet'),
182            TASK,
183            name,
184            ensemble=False
185        )
186
187        # Run inference using MedNeXt
188        mednext_npz_path_list: List[Path]
189        mednext_pkl_path_list: List[Path]
190        mednext_npz_path_list, mednext_pkl_path_list = run_infer_mednext(
191            input_folder_raw / name,
192            maybe_make_dir(temp_dir / 'mednext'),
193            TASK,
194            name,
195            ensemble=False
196        )
197
198        # Run inference using SwinUNETR
199        swinunetr_npz_path_list: List[Path] = run_infer_swinunetr(
200            Path(input_path),
201            maybe_make_dir(temp_dir / 'swinunetr'),
202            TASK
203        )
204
205        # Ensemble the predictions
206        ensemble_folder: Path = maybe_make_dir(input_folder_raw / 'ensemble')
207        ensembled_pred_nii_path: Path = ped_ensemble(
208            swinunetr_npz_path_list,
209            nnunet_npz_path_list,
210            mednext_npz_path_list,
211            mednext_pkl_path_list,
212            ensemble_folder,
213            one_image,
214            weights=[
215                ENSEMBLE_WEIGHTS['SwinUNETR'],
216                ENSEMBLE_WEIGHTS['nnUNetv2'],
217                ENSEMBLE_WEIGHTS['MedNeXt']
218            ]
219        )
220
221        # Postprocess the ensembled segmentation
222        radiomics: Path = postprocess_single(
223            input_path,
224            ensemble_folder,
225            out_dir
226        )
227
228
229def batch_processor(
230    input_folder: str,
231    output_folder: str
232) -> None:
233    """Process a batch of input directories for inference.
234
235    Args:
236        input_folder (str): Path to the folder containing input directories.
237        output_folder (str): Path to the folder to store output segmentations.
238    """
239    for input_path in Path(input_folder).iterdir():
240        infer_single(input_path, Path(output_folder))
241
242
243def setup_model_weights() -> None:
244    """Install model weights from zip files."""
245    install_model_from_zip(CONSTANTS['nnunet_model_path'])
246    install_model_from_zip(CONSTANTS['swinunetr_model_path'])
247    install_model_from_zip(CONSTANTS['mednext_model_path'], mednext=True)
248
249
250def batch_postprocess(
251    input_folder: str,
252    seg_folder: str,
253    output_folder: str
254) -> int:
255    """Postprocess a batch of segmentation results.
256
257    Args:
258        input_folder (str): Path to the folder containing input directories.
259        seg_folder (str): Path to the folder containing segmentation results.
260        output_folder (str): Path to the folder to store postprocessed results.
261
262    Returns:
263        int: Number of processed cases.
264    """
265    l_radiomics: List[Path] = []
266    l_path: List[Path] = [x for x in Path(input_folder).iterdir() if x.is_dir()]
267    
268    for input_path in l_path:
269        radiomics: Path = postprocess_single(input_path, Path(seg_folder), Path(output_folder))
270        l_radiomics.append(radiomics)
271    
272    save_json(output_folder / 'val_radiomics.json', l_radiomics)
273    return len(list(Path(input_folder).iterdir()))
274
275
276if __name__ == "__main__":
277    setup_model_weights()
278    batch_processor('./ins', './outs')

TASK: str = 'BraTS-PED'

script_dir = PosixPath('/home/runner/work/HOPE-Segmenter-Kids/HOPE-Segmenter-Kids/segmenter_backend/inference')

json_path: pathlib.Path = PosixPath('/home/runner/work/HOPE-Segmenter-Kids/HOPE-Segmenter-Kids/segmenter_backend/inference/ensembler/weights_cv.json')

ENSEMBLE_WEIGHTS: Dict[str, float] = {'nnUNetv2': 0.330839468, 'MedNeXt': 0.338249355, 'SwinUNETR': 0.330911177}

NAME_MAPPER: Dict[str, str] = {'-t1n.nii.gz': '_0000.nii.gz', '-t1c.nii.gz': '_0001.nii.gz', '-t2w.nii.gz': '_0002.nii.gz', '-t2f.nii.gz': '_0003.nii.gz'}

CLUSTER_ARTIFACT: Any = {'normalizer': Normalizer(), 'pca_components': Index(['wt_t2f_gldm_DependenceVariance', 'wt_t1n_firstorder_Median', 'wt_t2w_firstorder_Median', 'wt_t1c_firstorder_Median', 'wt_t1c_firstorder_10Percentile', 'wt_t2f_firstorder_10Percentile', 'wt_t1n_gldm_DependenceVariance', 'wt_t2w_gldm_DependenceVariance', 'wt_t1n_firstorder_10Percentile'], dtype='object'), 'pca_num_components': 9, 'pca': PCA(n_components=9), 'kmeans_components': 9, 'kmeans': KMeans(n_clusters=9, random_state=1000000008)}

THRESHOLD_FILE_CC: pathlib.Path = PosixPath('/home/runner/work/HOPE-Segmenter-Kids/HOPE-Segmenter-Kids/segmenter_backend/inference/postproc/cc_thresholds_cv.json')

THRESHOLD_FILE_LBLDEF: pathlib.Path = PosixPath('/home/runner/work/HOPE-Segmenter-Kids/HOPE-Segmenter-Kids/segmenter_backend/inference/postproc/lblredef_thresholds_cv.json')

CONSTANTS: Dict[str, str] = {'pyradiomics_paramfile': '/home/runner/work/HOPE-Segmenter-Kids/HOPE-Segmenter-Kids/segmenter_backend/inference/radiomics/params.yaml', 'nnunet_model_path': '/home/runner/work/HOPE-Segmenter-Kids/HOPE-Segmenter-Kids/segmenter_backend/inference/weights/BraTS2024_PEDs_nnunetv2_model.zip', 'mednext_model_path': '/home/runner/work/HOPE-Segmenter-Kids/HOPE-Segmenter-Kids/segmenter_backend/inference/weights/BraTS2024_PEDs_MedNeXt_model.zip', 'swinunetr_model_path': '/home/runner/work/HOPE-Segmenter-Kids/HOPE-Segmenter-Kids/segmenter_backend/inference/weights/swinunetr_peds_trunc.zip'}

def maybe_make_dir(path: pathlib.Path) -> pathlib.Path: View Source

49def maybe_make_dir(path: Path) -> Path:
50    """Create a directory if it does not exist.
51
52    Args:
53        path (Path): The path of the directory to create.
54
55    Returns:
56        Path: The path to the created directory.
57    """
58    os.makedirs(path, exist_ok=True)
59    return Path(path)

Create a directory if it does not exist.

Args: path (Path): The path of the directory to create.

Returns: Path: The path to the created directory.

def lbl_redefination( task: str, threshold_file: pathlib.Path, input_dir: pathlib.Path, df_radiomics: pandas.core.frame.DataFrame, out_dir: pathlib.Path) -> None: View Source

62def lbl_redefination(
63    task: str,
64    threshold_file: Path,
65    input_dir: Path,
66    df_radiomics: pd.DataFrame,
67    out_dir: Path
68) -> None:
69    """Perform label redefinition by copying files from input to output directory.
70
71    Args:
72        task (str): The task identifier.
73        threshold_file (Path): Path to the threshold file for label redefinition.
74        input_dir (Path): Directory containing input files.
75        df_radiomics (pd.DataFrame): DataFrame containing radiomics data.
76        out_dir (Path): Directory to store output files.
77    """
78    # Copy files from input_dir to out_dir
79    for file in input_dir.iterdir():
80        shutil.copy(file, out_dir)

Perform label redefinition by copying files from input to output directory.

Args: task (str): The task identifier. threshold_file (Path): Path to the threshold file for label redefinition. input_dir (Path): Directory containing input files. df_radiomics (pd.DataFrame): DataFrame containing radiomics data. out_dir (Path): Directory to store output files.

def postprocess_single( input_dir: pathlib.Path, seg_dir: pathlib.Path, out_dir: pathlib.Path) -> pathlib.Path: View Source

 83def postprocess_single(
 84    input_dir: Path,
 85    seg_dir: Path,
 86    out_dir: Path
 87) -> Path:
 88    """Postprocess a single segmentation case.
 89
 90    This function performs radiomics extraction, cluster determination, removes small components,
 91    and redefines labels based on the processed data.
 92
 93    Args:
 94        input_dir (Path): Directory containing the input data.
 95        seg_dir (Path): Directory containing the segmentation results.
 96        out_dir (Path): Directory to store the postprocessed segmentation.
 97
 98    Returns:
 99        Path: Path to the postprocessed segmentation file.
100    """
101    case_name: str = input_dir.name
102    seg_path: Path = seg_dir / f"{case_name}.nii.gz"
103    out_path: Path = maybe_make_dir(out_dir) / f"{case_name}.nii.gz"
104
105    # Compute radiomics features
106    try:
107        df_radiomics: pd.DataFrame = extract_all(
108            CONSTANTS['pyradiomics_paramfile'],
109            input_dir.parent,
110            case_name,
111            seg_path,
112            1,
113            region='wt',
114            tmpp='/tmp/',
115            seg_suffix='',
116            sequences=['-t1n', '-t1c', '-t2w', '-t2f']
117        )
118        # Determine the cluster based on radiomics features
119        cluster: int = get_cluster(df_radiomics, CLUSTER_ARTIFACT)[0]
120    except Exception as e:
121        print(f"Error in radiomics extraction: {e}")
122        df_radiomics = pd.DataFrame([{"StudyID": case_name}])
123        cluster = 4
124
125    df_radiomics['cluster'] = int(cluster)
126
127    # Remove disconnected regions from segmentation
128    removed_cc_dir: Path = maybe_make_dir(seg_dir / 'remove_cc')
129    remove_small_component(
130        TASK,
131        THRESHOLD_FILE_CC,
132        seg_dir,
133        df_radiomics,
134        removed_cc_dir
135    )
136
137    # Redefine labels based on the postprocessed segmentation
138    label_redefinition(
139        TASK,
140        THRESHOLD_FILE_LBLDEF,
141        removed_cc_dir,
142        df_radiomics,
143        out_dir
144    )
145
146    return out_path

Postprocess a single segmentation case.

This function performs radiomics extraction, cluster determination, removes small components, and redefines labels based on the processed data.

Args: input_dir (Path): Directory containing the input data. seg_dir (Path): Directory containing the segmentation results. out_dir (Path): Directory to store the postprocessed segmentation.

Returns: Path: Path to the postprocessed segmentation file.

def infer_single(input_path: pathlib.Path, out_dir: pathlib.Path) -> None: View Source

149def infer_single(
150    input_path: Path,
151    out_dir: Path
152) -> None:
153    """Perform inference on a single input directory.
154
155    This function sets up a temporary directory, copies input files, runs inference models,
156    ensembles the predictions, and postprocesses the segmentation.
157
158    Args:
159        input_path (Path): Input directory containing the 4 nii.gz files.
160        out_dir (Path): Output directory to store the segmentation results.
161    """
162    with tempfile.TemporaryDirectory() as tmpdirname:
163        temp_dir: Path = Path(tmpdirname)
164        print(f'Storing artifacts in temporary directory {temp_dir}')
165
166        input_folder_raw: Path = maybe_make_dir(temp_dir / 'inp')
167        name: str = input_path.name
168        print(f'Processing case: {name}')
169
170        shutil.copytree(input_path, input_folder_raw / name)
171
172        # Rename files based on NAME_MAPPER
173        for key, val in NAME_MAPPER.items():
174            src: Path = input_folder_raw / name / f'{name}{key}'
175            dest: Path = input_folder_raw / name / f'{name}{val}'
176            os.rename(src, dest)
177            one_image: Path = dest
178
179        # Run inference using nnUNet
180        nnunet_npz_path_list: List[Path] = run_infer_nnunet(
181            input_folder_raw / name,
182            maybe_make_dir(temp_dir / 'nnunet'),
183            TASK,
184            name,
185            ensemble=False
186        )
187
188        # Run inference using MedNeXt
189        mednext_npz_path_list: List[Path]
190        mednext_pkl_path_list: List[Path]
191        mednext_npz_path_list, mednext_pkl_path_list = run_infer_mednext(
192            input_folder_raw / name,
193            maybe_make_dir(temp_dir / 'mednext'),
194            TASK,
195            name,
196            ensemble=False
197        )
198
199        # Run inference using SwinUNETR
200        swinunetr_npz_path_list: List[Path] = run_infer_swinunetr(
201            Path(input_path),
202            maybe_make_dir(temp_dir / 'swinunetr'),
203            TASK
204        )
205
206        # Ensemble the predictions
207        ensemble_folder: Path = maybe_make_dir(input_folder_raw / 'ensemble')
208        ensembled_pred_nii_path: Path = ped_ensemble(
209            swinunetr_npz_path_list,
210            nnunet_npz_path_list,
211            mednext_npz_path_list,
212            mednext_pkl_path_list,
213            ensemble_folder,
214            one_image,
215            weights=[
216                ENSEMBLE_WEIGHTS['SwinUNETR'],
217                ENSEMBLE_WEIGHTS['nnUNetv2'],
218                ENSEMBLE_WEIGHTS['MedNeXt']
219            ]
220        )
221
222        # Postprocess the ensembled segmentation
223        radiomics: Path = postprocess_single(
224            input_path,
225            ensemble_folder,
226            out_dir
227        )

Perform inference on a single input directory.

This function sets up a temporary directory, copies input files, runs inference models, ensembles the predictions, and postprocesses the segmentation.

Args: input_path (Path): Input directory containing the 4 nii.gz files. out_dir (Path): Output directory to store the segmentation results.

def batch_processor(input_folder: str, output_folder: str) -> None: View Source

230def batch_processor(
231    input_folder: str,
232    output_folder: str
233) -> None:
234    """Process a batch of input directories for inference.
235
236    Args:
237        input_folder (str): Path to the folder containing input directories.
238        output_folder (str): Path to the folder to store output segmentations.
239    """
240    for input_path in Path(input_folder).iterdir():
241        infer_single(input_path, Path(output_folder))

Process a batch of input directories for inference.

Args: input_folder (str): Path to the folder containing input directories. output_folder (str): Path to the folder to store output segmentations.

def setup_model_weights() -> None: View Source

244def setup_model_weights() -> None:
245    """Install model weights from zip files."""
246    install_model_from_zip(CONSTANTS['nnunet_model_path'])
247    install_model_from_zip(CONSTANTS['swinunetr_model_path'])
248    install_model_from_zip(CONSTANTS['mednext_model_path'], mednext=True)

Install model weights from zip files.

def batch_postprocess(input_folder: str, seg_folder: str, output_folder: str) -> int: View Source

251def batch_postprocess(
252    input_folder: str,
253    seg_folder: str,
254    output_folder: str
255) -> int:
256    """Postprocess a batch of segmentation results.
257
258    Args:
259        input_folder (str): Path to the folder containing input directories.
260        seg_folder (str): Path to the folder containing segmentation results.
261        output_folder (str): Path to the folder to store postprocessed results.
262
263    Returns:
264        int: Number of processed cases.
265    """
266    l_radiomics: List[Path] = []
267    l_path: List[Path] = [x for x in Path(input_folder).iterdir() if x.is_dir()]
268    
269    for input_path in l_path:
270        radiomics: Path = postprocess_single(input_path, Path(seg_folder), Path(output_folder))
271        l_radiomics.append(radiomics)
272    
273    save_json(output_folder / 'val_radiomics.json', l_radiomics)
274    return len(list(Path(input_folder).iterdir()))

Postprocess a batch of segmentation results.

Args: input_folder (str): Path to the folder containing input directories. seg_folder (str): Path to the folder containing segmentation results. output_folder (str): Path to the folder to store postprocessed results.

Returns: int: Number of processed cases.