Source code for WORC.plotting.plot_hyperparameters
#!/usr/bin/env python# Copyright 2016-2020 Biomedical Imaging Group Rotterdam, Departments of# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.importosimportpandasaspdimportWORC.addexceptionsasae
[docs]defplot_hyperparameters(prediction,label_type=None,estsize=50,output=None,removeconstants=False,verbose=False):"""Gather which hyperparameters have been used in the best workflows. Parameters ---------- prediction: pandas dataframe or string, mandatory output of trainclassifier function, either a pandas dataframe or a HDF5 file estsize: integer, default 50 Number of estimators that should be taken into account. output: filename of csv, default None Output file to write to. If None, not output is written, but just returned as a variable. removeconstants: boolean, default False Determine whether to remove any hyperparameters which have the same value in all workflows. verbose: boolean, default False Whether to show print messages or not. """# Load the prediction fileiftype(prediction)isnotpd.core.frame.DataFrame:ifos.path.isfile(prediction):prediction=pd.read_hdf(prediction)else:raiseae.WORCIOError(f'{prediction} is not an existing file!')# Select the estimator from the pandas dataframe to usekeys=prediction.keys()iflabel_typeisNone:label_type=keys[0]eliflen(label_type.split(','))!=1:# Multiclass, just take the prediction labellabel_type=keys[0]prediction=prediction[label_type]# Loop over classifierstotal=len(prediction.classifiers)forcnum,clsinenumerate(prediction.classifiers):ifverbose:print(f'Extracting hyperparameters for iteration {cnum+1} / {total}.')# Get parameters and select only a set numberparameters=cls.cv_results_['params']iflen(parameters)>estsize:parameters=parameters[0:estsize]# Additional information besides the parametersforiinrange(0,estsize):# Add which (cross-validation) iteration is used and the rankparameters[i]['Iteration']=cnum+1parameters[i]['Rank']=i+1# Add some statisticsparameters[i]['Metric']=cls.scoringparameters[i]['mean_train_score']=\
cls.cv_results_['mean_train_score'][i]parameters[i]['mean_fit_time']=\
cls.cv_results_['mean_fit_time'][i]parameters[i]['std_train_score']=\
cls.cv_results_['std_train_score'][i]parameters[i]['generalization_score']=\
cls.cv_results_['generalization_score'][i]parameters[i]['rank_generalization_score']=\
cls.cv_results_['rank_generalization_score'][i]# NOTE: while this is called test score, it is the score on the# validation dataset(s)parameters[i]['mean_validation_score']=\
cls.cv_results_['mean_test_score'][i]parameters[i]['std_validation_score']=\
cls.cv_results_['std_test_score'][i]# Intialize data object if this is the first iterationifcnum==0:data={k:list()forkinparameters[i]}# Add to general data objectforpinparameters:forkinp.keys():data[k].append(p[k])# Optionally, remove any hyperparameters which have the same# value in all workflows.n_parameters=len(list(data.keys()))ifremoveconstants:ifverbose:print('Removing parameters with constant values.')keys=list(data.keys())forkinkeys:# First convert all values to strings so we can use settempdata=[str(i)foriindata[k]]# Count unique values, and if only one, deleten_unique=len(list(set(tempdata)))ifn_unique==1:ifverbose:print(f'\t Removing parameter {k}.')deldata[k]# Write to csv if output name is providedifoutputisnotNone:ifverbose:print(f'Writing output to {output}.')# First, specify order of columns for easy readingcolumns=list(data.keys())starters=['Iteration','Rank','Metric','mean_validation_score','mean_train_score','mean_fit_time']forkeyinstarters:columns.remove(key)columns=starters+columns# Write to dataframedf=pd.DataFrame(data)df.to_csv(output,index=False,columns=columns)# Display some informationifverbose:print(f'Number of hyperparameters: {n_parameters}.')ifremoveconstants:n_parameters_unique=len(list(data.keys()))print(f'Number of hyperparameters with unique values: {n_parameters_unique}.')returndata