inference.radiomics.feature_extraction_v2
Radimoics Feature Extraction Module
Provides functionalities related to radioimcs feature extraction from MRI sequences.
1""" 2### Radimoics Feature Extraction Module 3 4Provides functionalities related to radioimcs feature extraction from MRI sequences. 5""" 6 7 8from pathlib import Path 9from typing import Any, List, Dict, Tuple, Union, Optional 10import os 11import time 12import json 13import argparse 14import numpy as np 15import pandas as pd 16import nibabel as nib 17import cc3d 18from scipy.ndimage import binary_dilation, generate_binary_structure 19import shutil 20import warnings 21 22 23def load_json(path: Path) -> Any: 24 """ 25 Loads a JSON file from the specified path. 26 27 Args: 28 path (Path): A Path representing the file path. 29 30 Returns: 31 Any: The data loaded from the JSON file. 32 """ 33 with open(path, 'r') as f: 34 return json.load(f) 35 36 37def save_json(path: Path, data: Any) -> None: 38 """ 39 Saves data to a JSON file at the specified path. 40 41 Args: 42 path (Path): A Path representing the file path. 43 data (Any): The data to be serialized and saved. 44 """ 45 with open(path, 'w') as f: 46 json.dump(data, f, indent=4) 47 48 49def load_jsonl(path: Path) -> List[Any]: 50 """ 51 Loads a JSONL file (JSON lines) from the specified path. 52 53 Args: 54 path (Path): A Path representing the file path. 55 56 Returns: 57 List[Any]: A list of data loaded from the JSONL file. 58 """ 59 with open(path, 'r') as f: 60 return [json.loads(line) for line in f] 61 62 63def save_jsonl(path: Path, data: List[Any]) -> None: 64 """ 65 Saves data to a JSONL file at the specified path. 66 67 Args: 68 path (Path): A Path representing the file path. 69 data (List[Any]): A list of data to be serialized and saved. 70 """ 71 with open(path, 'w') as f: 72 for line in data: 73 json.dump(line, f) 74 f.write('\n') 75 76 77def maybe_make_dir(path: str) -> Path: 78 """ 79 Creates a directory at the specified path if it does not exist. 80 81 Args: 82 path (str): A string representing the directory path. 83 84 Returns: 85 Path: A Path object for the created or existing directory. 86 """ 87 dir_path = Path(path) 88 if not dir_path.exists(): 89 dir_path.mkdir(parents=True) 90 return dir_path 91 92 93def maybe_remove_dir(path: str) -> Path: 94 """ 95 Removes a directory at the specified path if it exists. 96 97 Args: 98 path (str): A string representing the directory path. 99 100 Returns: 101 Path: A Path object for the removed or existing directory. 102 """ 103 path_obj = Path(path) 104 if path_obj.exists() and path_obj.is_dir(): 105 try: 106 shutil.rmtree(path_obj) 107 print(f"Directory {path} removed successfully.") 108 except Exception as e: 109 print(f"Error removing directory {path}: {e}") 110 return path_obj 111 112 113def extract_feature( 114 paramfile: Path, 115 data_path: Path, 116 seg_path: Path, 117 pid: str, 118 seq: str, 119 region: str = 'wt', 120 tmpp: str = '.' 121) -> Dict[str, Any]: 122 """ 123 Extract features from a single MRI sequence. 124 125 Args: 126 paramfile (Path): Path to the parameter file for feature extraction. 127 data_path (Path): Path to the data directory. 128 seg_path (Path): Path to the segmentation file. 129 pid (str): Patient ID. 130 seq (str): Sequence identifier. 131 region (str, optional): BraTS Region of interest. Defaults to 'wt'. 132 tmpp (str, optional): Temporary directory path. Defaults to '.'. 133 134 Returns: 135 Dict[str, Any]: Dictionary of extracted features. 136 """ 137 img_path = data_path / f"{pid}{seq}.nii.gz" 138 tmp_path = maybe_make_dir(tmpp) / f"{pid}{seq}.json" 139 140 # Construct and execute the feature extraction command 141 cmd = f"pyradiomics {str(img_path)} {str(seg_path)} -o {str(tmp_path)} -f json --param {str(paramfile)}" 142 os.system(cmd) 143 144 # Load and process the extracted features 145 new_dict = {'StudyID': pid} 146 dt = load_json(tmp_path) 147 tmp_path.unlink() # Remove temporary file 148 149 old_dict = dt[0] 150 for k in old_dict.keys(): 151 if k.startswith('original_shape_'): 152 new_k = k.replace('original_shape_', f'{region}_shape_', 1) 153 new_dict[new_k] = old_dict[k] 154 elif k.startswith('original_'): 155 new_k = k.replace('original_', f'{region}{seq.replace("-", "_")}_', 1) 156 new_dict[new_k] = old_dict[k] 157 158 return new_dict 159 160 161def extract_case( 162 paramfile: Path, 163 data_path: Path, 164 seg_path: Path, 165 pid: str, 166 region: str = 'wt', 167 tmpp: str = '.', 168 sequences: List[str] = ['-t1n', '-t1c', '-t2w', '-t2f'] 169) -> Dict[str, Any]: 170 """ 171 Extract features for a single case across multiple sequences. 172 173 Args: 174 paramfile (Path): Path to the parameter file for feature extraction. 175 data_path (Path): Path to the data directory. 176 seg_path (Path): Path to the segmentation file. 177 pid (str): Patient ID. 178 region (str, optional): BraTS Region of interest. Defaults to 'wt'. 179 tmpp (str, optional): Temporary directory path. Defaults to '.'. 180 sequences (List[str], optional): List of MRI sequences to process. Defaults to ['-t1n', '-t1c', '-t2w', '-t2f']. 181 182 Returns: 183 Dict[str, Any]: Dictionary of aggregated features for the case. 184 """ 185 new_dict: Dict[str, Any] = {} 186 for i, seq in enumerate(sequences): 187 feature = extract_feature(paramfile, data_path, seg_path, pid, seq, region, tmpp) 188 189 # Aggregate features, avoiding duplication of certain keys 190 if i == 0: 191 for k in feature.keys(): 192 new_dict[k] = feature[k] 193 else: 194 for k in feature.keys(): 195 if not (k.startswith('StudyID') or k.startswith(f'{region}_shape')): 196 new_dict[k] = feature[k] 197 return new_dict 198 199 200def create_dilation( 201 seg_path: Path, 202 out_path: Path, 203 dilation_factor: int = 3, 204 region: str = 'wt' 205) -> None: 206 """ 207 Create a dilated segmentation mask. 208 209 Args: 210 seg_path (Path): Path to the original segmentation file. 211 out_path (Path): Path to save the dilated segmentation. 212 dilation_factor (int, optional): Number of dilation iterations. Defaults to 3. 213 region (str, optional): BraTS Region of interest. Defaults to 'wt'. 214 """ 215 img_obj = nib.load(seg_path) 216 img_data = img_obj.get_fdata() 217 218 # Create a binary segmentation mask 219 if region == 'wt': 220 binary_seg = np.where(img_data > 0, 1, 0) 221 else: 222 warnings.warn(f"Invalid region: {region}. Computing whole tumor instead.") 223 binary_seg = np.where(img_data > 0, 1, 0) 224 225 # dilation_struct = generate_binary_structure(3, 1) 226 # dilated_seg = binary_dilation(binary_seg, structure=dilation_struct, iterations=dilation_factor) 227 # Identify connected components and retain the largest one 228 labels_out, n = cc3d.connected_components(binary_seg, connectivity=26, return_N=True) 229 vol_max = 0 230 label_max = 0 231 for i in range(n): 232 tmp = np.where(labels_out == i + 1, 1, 0) 233 vol = np.count_nonzero(tmp) 234 if vol > vol_max: 235 vol_max = vol 236 label_max = i + 1 237 238 dilated_seg = np.where(labels_out == label_max, 1, 0) 239 seg_obj = nib.Nifti1Image(dilated_seg.astype(np.int8), img_obj.affine) 240 nib.save(seg_obj, out_path) 241 242 243def extract_all( 244 paramfile: Path, 245 data_path: Path, 246 case_: str, 247 seg_path: Path, 248 dilation_factor: int = 3, 249 region: str = 'wt', 250 tmpp: str = '.', 251 seg_suffix: str = '-seg', 252 sequences: List[str] = ['-t1n', '-t1c', '-t2w', '-t2f'] 253) -> pd.DataFrame: 254 """ 255 Extract all features for a given case. 256 257 Args: 258 paramfile (Path): Path to the parameter file for feature extraction. 259 data_path (Path): Path to the data directory. 260 case_ (str): Case identifier. 261 seg_path (Path): Path to the segmentation file. 262 dilation_factor (int, optional): Number of dilation iterations. Defaults to 3. 263 region (str, optional): BraTS Region of interest. Defaults to 'wt'. 264 tmpp (str, optional): Temporary directory path. Defaults to '.'. 265 seg_suffix (str, optional): Suffix for segmentation files. Defaults to '-seg'. 266 sequences (List[str], optional): List of sequences to process. Defaults to ['-t1n', '-t1c', '-t2w', '-t2f']. 267 268 Returns: 269 pd.DataFrame: DataFrame containing all extracted features. 270 """ 271 cases = [data_path / case_] 272 features: List[Dict[str, Any]] = [] 273 t0 = time.time() 274 275 for i, case in enumerate(cases): 276 dilated_seg_path = maybe_make_dir(tmpp) / f"{case.name}_{region}_dilated.nii.gz" 277 278 # Create a dilated segmentation mask 279 create_dilation(seg_path, dilated_seg_path, dilation_factor, region) 280 281 # Extract features from the dilated segmentation 282 features.append( 283 extract_case(paramfile, data_path / case, dilated_seg_path, case.name, region, tmpp, sequences) 284 ) 285 286 dilated_seg_path.unlink() # Remove temporary dilated segmentation 287 288 t1 = (time.time() - t0) / 60.0 289 print(f"{i + 1:04d} {case} extraction time: {t1:.1f} min") 290 291 return pd.DataFrame(features) 292 293 294if __name__ == "__main__": 295 parser = argparse.ArgumentParser() 296 parser.add_argument("-i", "--input_dir", type=str, help="Input directory containing cases") 297 parser.add_argument("-o", "--output", type=str, help="Output directory for extracted features") 298 parser.add_argument("-d", "--dilation", type=int, help="Dilation factor") 299 parser.add_argument("-r", "--region", type=str, help="Region of interest") 300 parser.add_argument("-p", "--param", type=str, help="Parameter file for feature extraction") 301 parser.add_argument("-t", "--tmpp", default='./tmp', type=str, help="Temporary directory path") 302 parser.add_argument("-s", "--seg-suffix", default='-seg', type=str, help="Segmentation file suffix") 303 parser.add_argument("-seq", "--sequences", nargs='+', default=['-t1n', '-t1c', '-t2w', '-t2f'], help="MRI sequences to process") 304 args = parser.parse_args() 305 306 print(f"Arguments: {args}") 307 308 # Ensure temporary directory exists 309 maybe_make_dir(args.tmpp) 310 311 # Extract all features for the specified case 312 extract_all( 313 paramfile=Path(args.param), 314 data_path=Path(args.input_dir), 315 case_='Case1', 316 seg_path=Path(args.output), 317 dilation_factor=args.dilation, 318 region=args.region, 319 tmpp=args.tmpp, 320 seg_suffix=args.seg_suffix, 321 sequences=args.sequences 322 ) 323 324 # Remove temporary directory after processing 325 maybe_remove_dir(args.tmpp)
24def load_json(path: Path) -> Any: 25 """ 26 Loads a JSON file from the specified path. 27 28 Args: 29 path (Path): A Path representing the file path. 30 31 Returns: 32 Any: The data loaded from the JSON file. 33 """ 34 with open(path, 'r') as f: 35 return json.load(f)
Loads a JSON file from the specified path.
Args: path (Path): A Path representing the file path.
Returns: Any: The data loaded from the JSON file.
38def save_json(path: Path, data: Any) -> None: 39 """ 40 Saves data to a JSON file at the specified path. 41 42 Args: 43 path (Path): A Path representing the file path. 44 data (Any): The data to be serialized and saved. 45 """ 46 with open(path, 'w') as f: 47 json.dump(data, f, indent=4)
Saves data to a JSON file at the specified path.
Args: path (Path): A Path representing the file path. data (Any): The data to be serialized and saved.
50def load_jsonl(path: Path) -> List[Any]: 51 """ 52 Loads a JSONL file (JSON lines) from the specified path. 53 54 Args: 55 path (Path): A Path representing the file path. 56 57 Returns: 58 List[Any]: A list of data loaded from the JSONL file. 59 """ 60 with open(path, 'r') as f: 61 return [json.loads(line) for line in f]
Loads a JSONL file (JSON lines) from the specified path.
Args: path (Path): A Path representing the file path.
Returns: List[Any]: A list of data loaded from the JSONL file.
64def save_jsonl(path: Path, data: List[Any]) -> None: 65 """ 66 Saves data to a JSONL file at the specified path. 67 68 Args: 69 path (Path): A Path representing the file path. 70 data (List[Any]): A list of data to be serialized and saved. 71 """ 72 with open(path, 'w') as f: 73 for line in data: 74 json.dump(line, f) 75 f.write('\n')
Saves data to a JSONL file at the specified path.
Args: path (Path): A Path representing the file path. data (List[Any]): A list of data to be serialized and saved.
78def maybe_make_dir(path: str) -> Path: 79 """ 80 Creates a directory at the specified path if it does not exist. 81 82 Args: 83 path (str): A string representing the directory path. 84 85 Returns: 86 Path: A Path object for the created or existing directory. 87 """ 88 dir_path = Path(path) 89 if not dir_path.exists(): 90 dir_path.mkdir(parents=True) 91 return dir_path
Creates a directory at the specified path if it does not exist.
Args: path (str): A string representing the directory path.
Returns: Path: A Path object for the created or existing directory.
94def maybe_remove_dir(path: str) -> Path: 95 """ 96 Removes a directory at the specified path if it exists. 97 98 Args: 99 path (str): A string representing the directory path. 100 101 Returns: 102 Path: A Path object for the removed or existing directory. 103 """ 104 path_obj = Path(path) 105 if path_obj.exists() and path_obj.is_dir(): 106 try: 107 shutil.rmtree(path_obj) 108 print(f"Directory {path} removed successfully.") 109 except Exception as e: 110 print(f"Error removing directory {path}: {e}") 111 return path_obj
Removes a directory at the specified path if it exists.
Args: path (str): A string representing the directory path.
Returns: Path: A Path object for the removed or existing directory.
114def extract_feature( 115 paramfile: Path, 116 data_path: Path, 117 seg_path: Path, 118 pid: str, 119 seq: str, 120 region: str = 'wt', 121 tmpp: str = '.' 122) -> Dict[str, Any]: 123 """ 124 Extract features from a single MRI sequence. 125 126 Args: 127 paramfile (Path): Path to the parameter file for feature extraction. 128 data_path (Path): Path to the data directory. 129 seg_path (Path): Path to the segmentation file. 130 pid (str): Patient ID. 131 seq (str): Sequence identifier. 132 region (str, optional): BraTS Region of interest. Defaults to 'wt'. 133 tmpp (str, optional): Temporary directory path. Defaults to '.'. 134 135 Returns: 136 Dict[str, Any]: Dictionary of extracted features. 137 """ 138 img_path = data_path / f"{pid}{seq}.nii.gz" 139 tmp_path = maybe_make_dir(tmpp) / f"{pid}{seq}.json" 140 141 # Construct and execute the feature extraction command 142 cmd = f"pyradiomics {str(img_path)} {str(seg_path)} -o {str(tmp_path)} -f json --param {str(paramfile)}" 143 os.system(cmd) 144 145 # Load and process the extracted features 146 new_dict = {'StudyID': pid} 147 dt = load_json(tmp_path) 148 tmp_path.unlink() # Remove temporary file 149 150 old_dict = dt[0] 151 for k in old_dict.keys(): 152 if k.startswith('original_shape_'): 153 new_k = k.replace('original_shape_', f'{region}_shape_', 1) 154 new_dict[new_k] = old_dict[k] 155 elif k.startswith('original_'): 156 new_k = k.replace('original_', f'{region}{seq.replace("-", "_")}_', 1) 157 new_dict[new_k] = old_dict[k] 158 159 return new_dict
Extract features from a single MRI sequence.
Args: paramfile (Path): Path to the parameter file for feature extraction. data_path (Path): Path to the data directory. seg_path (Path): Path to the segmentation file. pid (str): Patient ID. seq (str): Sequence identifier. region (str, optional): BraTS Region of interest. Defaults to 'wt'. tmpp (str, optional): Temporary directory path. Defaults to '.'.
Returns: Dict[str, Any]: Dictionary of extracted features.
162def extract_case( 163 paramfile: Path, 164 data_path: Path, 165 seg_path: Path, 166 pid: str, 167 region: str = 'wt', 168 tmpp: str = '.', 169 sequences: List[str] = ['-t1n', '-t1c', '-t2w', '-t2f'] 170) -> Dict[str, Any]: 171 """ 172 Extract features for a single case across multiple sequences. 173 174 Args: 175 paramfile (Path): Path to the parameter file for feature extraction. 176 data_path (Path): Path to the data directory. 177 seg_path (Path): Path to the segmentation file. 178 pid (str): Patient ID. 179 region (str, optional): BraTS Region of interest. Defaults to 'wt'. 180 tmpp (str, optional): Temporary directory path. Defaults to '.'. 181 sequences (List[str], optional): List of MRI sequences to process. Defaults to ['-t1n', '-t1c', '-t2w', '-t2f']. 182 183 Returns: 184 Dict[str, Any]: Dictionary of aggregated features for the case. 185 """ 186 new_dict: Dict[str, Any] = {} 187 for i, seq in enumerate(sequences): 188 feature = extract_feature(paramfile, data_path, seg_path, pid, seq, region, tmpp) 189 190 # Aggregate features, avoiding duplication of certain keys 191 if i == 0: 192 for k in feature.keys(): 193 new_dict[k] = feature[k] 194 else: 195 for k in feature.keys(): 196 if not (k.startswith('StudyID') or k.startswith(f'{region}_shape')): 197 new_dict[k] = feature[k] 198 return new_dict
Extract features for a single case across multiple sequences.
Args: paramfile (Path): Path to the parameter file for feature extraction. data_path (Path): Path to the data directory. seg_path (Path): Path to the segmentation file. pid (str): Patient ID. region (str, optional): BraTS Region of interest. Defaults to 'wt'. tmpp (str, optional): Temporary directory path. Defaults to '.'. sequences (List[str], optional): List of MRI sequences to process. Defaults to ['-t1n', '-t1c', '-t2w', '-t2f'].
Returns: Dict[str, Any]: Dictionary of aggregated features for the case.
201def create_dilation( 202 seg_path: Path, 203 out_path: Path, 204 dilation_factor: int = 3, 205 region: str = 'wt' 206) -> None: 207 """ 208 Create a dilated segmentation mask. 209 210 Args: 211 seg_path (Path): Path to the original segmentation file. 212 out_path (Path): Path to save the dilated segmentation. 213 dilation_factor (int, optional): Number of dilation iterations. Defaults to 3. 214 region (str, optional): BraTS Region of interest. Defaults to 'wt'. 215 """ 216 img_obj = nib.load(seg_path) 217 img_data = img_obj.get_fdata() 218 219 # Create a binary segmentation mask 220 if region == 'wt': 221 binary_seg = np.where(img_data > 0, 1, 0) 222 else: 223 warnings.warn(f"Invalid region: {region}. Computing whole tumor instead.") 224 binary_seg = np.where(img_data > 0, 1, 0) 225 226 # dilation_struct = generate_binary_structure(3, 1) 227 # dilated_seg = binary_dilation(binary_seg, structure=dilation_struct, iterations=dilation_factor) 228 # Identify connected components and retain the largest one 229 labels_out, n = cc3d.connected_components(binary_seg, connectivity=26, return_N=True) 230 vol_max = 0 231 label_max = 0 232 for i in range(n): 233 tmp = np.where(labels_out == i + 1, 1, 0) 234 vol = np.count_nonzero(tmp) 235 if vol > vol_max: 236 vol_max = vol 237 label_max = i + 1 238 239 dilated_seg = np.where(labels_out == label_max, 1, 0) 240 seg_obj = nib.Nifti1Image(dilated_seg.astype(np.int8), img_obj.affine) 241 nib.save(seg_obj, out_path)
Create a dilated segmentation mask.
Args: seg_path (Path): Path to the original segmentation file. out_path (Path): Path to save the dilated segmentation. dilation_factor (int, optional): Number of dilation iterations. Defaults to 3. region (str, optional): BraTS Region of interest. Defaults to 'wt'.
244def extract_all( 245 paramfile: Path, 246 data_path: Path, 247 case_: str, 248 seg_path: Path, 249 dilation_factor: int = 3, 250 region: str = 'wt', 251 tmpp: str = '.', 252 seg_suffix: str = '-seg', 253 sequences: List[str] = ['-t1n', '-t1c', '-t2w', '-t2f'] 254) -> pd.DataFrame: 255 """ 256 Extract all features for a given case. 257 258 Args: 259 paramfile (Path): Path to the parameter file for feature extraction. 260 data_path (Path): Path to the data directory. 261 case_ (str): Case identifier. 262 seg_path (Path): Path to the segmentation file. 263 dilation_factor (int, optional): Number of dilation iterations. Defaults to 3. 264 region (str, optional): BraTS Region of interest. Defaults to 'wt'. 265 tmpp (str, optional): Temporary directory path. Defaults to '.'. 266 seg_suffix (str, optional): Suffix for segmentation files. Defaults to '-seg'. 267 sequences (List[str], optional): List of sequences to process. Defaults to ['-t1n', '-t1c', '-t2w', '-t2f']. 268 269 Returns: 270 pd.DataFrame: DataFrame containing all extracted features. 271 """ 272 cases = [data_path / case_] 273 features: List[Dict[str, Any]] = [] 274 t0 = time.time() 275 276 for i, case in enumerate(cases): 277 dilated_seg_path = maybe_make_dir(tmpp) / f"{case.name}_{region}_dilated.nii.gz" 278 279 # Create a dilated segmentation mask 280 create_dilation(seg_path, dilated_seg_path, dilation_factor, region) 281 282 # Extract features from the dilated segmentation 283 features.append( 284 extract_case(paramfile, data_path / case, dilated_seg_path, case.name, region, tmpp, sequences) 285 ) 286 287 dilated_seg_path.unlink() # Remove temporary dilated segmentation 288 289 t1 = (time.time() - t0) / 60.0 290 print(f"{i + 1:04d} {case} extraction time: {t1:.1f} min") 291 292 return pd.DataFrame(features)
Extract all features for a given case.
Args: paramfile (Path): Path to the parameter file for feature extraction. data_path (Path): Path to the data directory. case_ (str): Case identifier. seg_path (Path): Path to the segmentation file. dilation_factor (int, optional): Number of dilation iterations. Defaults to 3. region (str, optional): BraTS Region of interest. Defaults to 'wt'. tmpp (str, optional): Temporary directory path. Defaults to '.'. seg_suffix (str, optional): Suffix for segmentation files. Defaults to '-seg'. sequences (List[str], optional): List of sequences to process. Defaults to ['-t1n', '-t1c', '-t2w', '-t2f'].
Returns: pd.DataFrame: DataFrame containing all extracted features.