#!/usr/bin/env python# Copyright 2016-2020 Biomedical Imaging Group Rotterdam, Departments of# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands## Licensed under the Apache License, Version 2.0 (the "License");# you may not use this file except in compliance with the License.# You may obtain a copy of the License at## http://www.apache.org/licenses/LICENSE-2.0## Unless required by applicable law or agreed to in writing, software# distributed under the License is distributed on an "AS IS" BASIS,# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.# See the License for the specific language governing permissions and# limitations under the License.fromsklearn.imputeimportSimpleImputer,KNNImputer
[docs]classImputer(object):"""Module for feature imputation."""
[docs]def__init__(self,missing_values='nan',strategy='mean',n_neighbors=5):''' Imputation of feature values using either sklearn, missingpy or (WIP) fancyimpute approaches. Parameters ---------- missing_values : number, string, np.nan (default) or None The placeholder for the missing values. All occurrences of `missing_values` will be imputed. strategy : string, optional (default="mean") The imputation strategy. Supported using sklearn: - If "mean", then replace missing values using the mean along each column. Can only be used with numeric data. - If "median", then replace missing values using the median along each column. Can only be used with numeric data. - If "most_frequent", then replace missing using the most frequent value along each column. Can be used with strings or numeric data. - If "constant", then replace missing values with fill_value. Can be used with strings or numeric data. Supported using missingpy: - If 'knn', then use a nearest neighbor search. Can be used with strings or numeric data. WIP: More strategies using fancyimpute n_neighbors : int, optional (default = 5) Number of neighboring samples to use for imputation if method is knn. '''# Set parameters to objectsself.missing_values=missing_valuesself.strategy=strategyself.n_neighbors=n_neighbors# Depending on the imputations strategy, use a specific toolboxifstrategyin['mean','median','most_frequent','constant']:self.Imputer=\
SimpleImputer(missing_values=self.missing_values,strategy=self.strategy)elifstrategy=='knn':ifmissing_values=='nan':# Slightly different API for missingpyself.missing_values='NaN'self.Imputer=KNNImputer(missing_values=self.missing_values,n_neighbors=self.n_neighbors)