/*
 * Decompiled with CFR 0.152.
 */
package weka.filters.unsupervised.attribute;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Enumeration;
import java.util.Vector;
import weka.core.AbstractInstance;
import weka.core.Attribute;
import weka.core.Capabilities;
import weka.core.DenseInstance;
import weka.core.Instance;
import weka.core.Instances;
import weka.core.Option;
import weka.core.Range;
import weka.core.RevisionUtils;
import weka.core.SparseInstance;
import weka.core.Utils;
import weka.core.WeightedInstancesHandler;
import weka.filters.UnsupervisedFilter;
import weka.filters.unsupervised.attribute.PotentialClassIgnorer;

public class Discretize
extends PotentialClassIgnorer
implements UnsupervisedFilter,
WeightedInstancesHandler {
    static final long serialVersionUID = -1358531742174527279L;
    protected Range m_DiscretizeCols = new Range();
    protected int m_NumBins = 10;
    protected double m_DesiredWeightOfInstancesPerInterval = -1.0;
    protected double[][] m_CutPoints = null;
    protected boolean m_MakeBinary = false;
    protected boolean m_UseBinNumbers = false;
    protected boolean m_FindNumBins = false;
    protected boolean m_UseEqualFrequency = false;
    protected String m_DefaultCols;

    public Discretize() {
        this.m_DefaultCols = "first-last";
        this.setAttributeIndices("first-last");
    }

    public Discretize(String cols) {
        this.m_DefaultCols = cols;
        this.setAttributeIndices(cols);
    }

    @Override
    public Enumeration<Option> listOptions() {
        Vector<Option> result = new Vector<Option>();
        result.addElement(new Option("\tSpecifies the (maximum) number of bins to divide numeric attributes into.\n\t(default = 10)", "B", 1, "-B <num>"));
        result.addElement(new Option("\tSpecifies the desired weight of instances per bin for\n\tequal-frequency binning. If this is set to a positive\n\tnumber then the -B option will be ignored.\n\t(default = -1)", "M", 1, "-M <num>"));
        result.addElement(new Option("\tUse equal-frequency instead of equal-width discretization.", "F", 0, "-F"));
        result.addElement(new Option("\tOptimize number of bins using leave-one-out estimate\n\tof estimated entropy (for equal-width discretization).\n\tIf this is set then the -B option will be ignored.", "O", 0, "-O"));
        result.addElement(new Option("\tSpecifies list of columns to Discretize. First and last are valid indexes.\n\t(default: first-last)", "R", 1, "-R <col1,col2-col4,...>"));
        result.addElement(new Option("\tInvert matching sense of column indexes.", "V", 0, "-V"));
        result.addElement(new Option("\tOutput binary attributes for discretized attributes.", "D", 0, "-D"));
        result.addElement(new Option("\tUse bin numbers rather than ranges for discretized attributes.", "Y", 0, "-Y"));
        result.addAll(Collections.list(super.listOptions()));
        return result.elements();
    }

    @Override
    public void setOptions(String[] options) throws Exception {
        this.setMakeBinary(Utils.getFlag('D', options));
        this.setUseBinNumbers(Utils.getFlag('Y', options));
        this.setUseEqualFrequency(Utils.getFlag('F', options));
        this.setFindNumBins(Utils.getFlag('O', options));
        this.setInvertSelection(Utils.getFlag('V', options));
        String weight = Utils.getOption('M', options);
        if (weight.length() != 0) {
            this.setDesiredWeightOfInstancesPerInterval(new Double(weight));
        } else {
            this.setDesiredWeightOfInstancesPerInterval(-1.0);
        }
        String numBins = Utils.getOption('B', options);
        if (numBins.length() != 0) {
            this.setBins(Integer.parseInt(numBins));
        } else {
            this.setBins(10);
        }
        String convertList = Utils.getOption('R', options);
        if (convertList.length() != 0) {
            this.setAttributeIndices(convertList);
        } else {
            this.setAttributeIndices(this.m_DefaultCols);
        }
        if (this.getInputFormat() != null) {
            this.setInputFormat(this.getInputFormat());
        }
        super.setOptions(options);
        Utils.checkForRemainingOptions(options);
    }

    @Override
    public String[] getOptions() {
        Vector<String> result = new Vector<String>();
        if (this.getMakeBinary()) {
            result.add("-D");
        }
        if (this.getUseBinNumbers()) {
            result.add("-Y");
        }
        if (this.getUseEqualFrequency()) {
            result.add("-F");
        }
        if (this.getFindNumBins()) {
            result.add("-O");
        }
        if (this.getInvertSelection()) {
            result.add("-V");
        }
        result.add("-B");
        result.add("" + this.getBins());
        result.add("-M");
        result.add("" + this.getDesiredWeightOfInstancesPerInterval());
        if (!this.getAttributeIndices().equals("")) {
            result.add("-R");
            result.add(this.getAttributeIndices());
        }
        Collections.addAll(result, super.getOptions());
        return result.toArray(new String[result.size()]);
    }

    @Override
    public Capabilities getCapabilities() {
        Capabilities result = super.getCapabilities();
        result.disableAll();
        result.enableAllAttributes();
        result.enable(Capabilities.Capability.MISSING_VALUES);
        result.enableAllClasses();
        result.enable(Capabilities.Capability.MISSING_CLASS_VALUES);
        if (!this.getMakeBinary()) {
            result.enable(Capabilities.Capability.NO_CLASS);
        }
        return result;
    }

    @Override
    public boolean setInputFormat(Instances instanceInfo) throws Exception {
        if (this.m_MakeBinary && this.m_IgnoreClass) {
            throw new IllegalArgumentException("Can't ignore class when changing the number of attributes!");
        }
        super.setInputFormat(instanceInfo);
        this.m_DiscretizeCols.setUpper(instanceInfo.numAttributes() - 1);
        this.m_CutPoints = null;
        if (this.getFindNumBins() && this.getUseEqualFrequency()) {
            throw new IllegalArgumentException("Bin number optimization in conjunction with equal-frequency binning not implemented.");
        }
        return false;
    }

    @Override
    public boolean input(Instance instance) {
        if (this.getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (this.m_NewBatch) {
            this.resetQueue();
            this.m_NewBatch = false;
        }
        if (this.m_CutPoints != null) {
            this.convertInstance(instance);
            return true;
        }
        this.bufferInput(instance);
        return false;
    }

    @Override
    public boolean batchFinished() {
        if (this.getInputFormat() == null) {
            throw new IllegalStateException("No input instance format defined");
        }
        if (this.m_CutPoints == null) {
            this.calculateCutPoints();
            this.setOutputFormat();
            for (int i2 = 0; i2 < this.getInputFormat().numInstances(); ++i2) {
                this.convertInstance(this.getInputFormat().instance(i2));
            }
        }
        this.flushInput();
        this.m_NewBatch = true;
        return this.numPendingOutput() != 0;
    }

    public String globalInfo() {
        return "An instance filter that discretizes a range of numeric attributes in the dataset into nominal attributes. Discretization is by simple binning. Skips the class attribute if set.";
    }

    public String findNumBinsTipText() {
        return "Optimize number of equal-width bins using leave-one-out. Doesn't work for equal-frequency binning";
    }

    public boolean getFindNumBins() {
        return this.m_FindNumBins;
    }

    public void setFindNumBins(boolean newFindNumBins) {
        this.m_FindNumBins = newFindNumBins;
    }

    public String makeBinaryTipText() {
        return "Make resulting attributes binary.";
    }

    public boolean getMakeBinary() {
        return this.m_MakeBinary;
    }

    public void setMakeBinary(boolean makeBinary) {
        this.m_MakeBinary = makeBinary;
    }

    public String useBinNumbersTipText() {
        return "Use bin numbers (eg BXofY) rather than ranges for for discretized attributes";
    }

    public boolean getUseBinNumbers() {
        return this.m_UseBinNumbers;
    }

    public void setUseBinNumbers(boolean useBinNumbers) {
        this.m_UseBinNumbers = useBinNumbers;
    }

    public String desiredWeightOfInstancesPerIntervalTipText() {
        return "Sets the desired weight of instances per interval for equal-frequency binning.";
    }

    public double getDesiredWeightOfInstancesPerInterval() {
        return this.m_DesiredWeightOfInstancesPerInterval;
    }

    public void setDesiredWeightOfInstancesPerInterval(double newDesiredNumber) {
        this.m_DesiredWeightOfInstancesPerInterval = newDesiredNumber;
    }

    public String useEqualFrequencyTipText() {
        return "If set to true, equal-frequency binning will be used instead of equal-width binning.";
    }

    public boolean getUseEqualFrequency() {
        return this.m_UseEqualFrequency;
    }

    public void setUseEqualFrequency(boolean newUseEqualFrequency) {
        this.m_UseEqualFrequency = newUseEqualFrequency;
    }

    public String binsTipText() {
        return "Number of bins.";
    }

    public int getBins() {
        return this.m_NumBins;
    }

    public void setBins(int numBins) {
        this.m_NumBins = numBins;
    }

    public String invertSelectionTipText() {
        return "Set attribute selection mode. If false, only selected (numeric) attributes in the range will be discretized; if true, only non-selected attributes will be discretized.";
    }

    public boolean getInvertSelection() {
        return this.m_DiscretizeCols.getInvert();
    }

    public void setInvertSelection(boolean invert) {
        this.m_DiscretizeCols.setInvert(invert);
    }

    public String attributeIndicesTipText() {
        return "Specify range of attributes to act on. This is a comma separated list of attribute indices, with \"first\" and \"last\" valid values. Specify an inclusive range with \"-\". E.g: \"first-3,5,6-10,last\".";
    }

    public String getAttributeIndices() {
        return this.m_DiscretizeCols.getRanges();
    }

    public void setAttributeIndices(String rangeList) {
        this.m_DiscretizeCols.setRanges(rangeList);
    }

    public void setAttributeIndicesArray(int[] attributes) {
        this.setAttributeIndices(Range.indicesToRangeList(attributes));
    }

    public double[] getCutPoints(int attributeIndex) {
        if (this.m_CutPoints == null) {
            return null;
        }
        return this.m_CutPoints[attributeIndex];
    }

    public String getBinRangesString(int attributeIndex) {
        if (this.m_CutPoints == null) {
            return null;
        }
        double[] cutPoints = this.m_CutPoints[attributeIndex];
        if (cutPoints == null) {
            return "All";
        }
        StringBuilder sb = new StringBuilder();
        boolean first = true;
        int n = cutPoints.length;
        for (int j = 0; j <= n; ++j) {
            if (first) {
                first = false;
            } else {
                sb.append(',');
            }
            sb.append(Discretize.binRangeString(cutPoints, j));
        }
        return sb.toString();
    }

    private static String binRangeString(double[] cutPoints, int j) {
        assert (cutPoints != null);
        int n = cutPoints.length;
        assert (0 <= j && j <= n);
        return j == 0 ? "(-inf-" + Utils.doubleToString(cutPoints[0], 6) + "]" : (j == n ? "(" + Utils.doubleToString(cutPoints[n - 1], 6) + "-inf)" : "(" + Utils.doubleToString(cutPoints[j - 1], 6) + "-" + Utils.doubleToString(cutPoints[j], 6) + "]");
    }

    protected void calculateCutPoints() {
        this.m_CutPoints = new double[this.getInputFormat().numAttributes()][];
        for (int i2 = this.getInputFormat().numAttributes() - 1; i2 >= 0; --i2) {
            if (!this.m_DiscretizeCols.isInRange(i2) || !this.getInputFormat().attribute(i2).isNumeric() || this.getInputFormat().classIndex() == i2) continue;
            if (this.m_FindNumBins) {
                this.findNumBins(i2);
                continue;
            }
            if (!this.m_UseEqualFrequency) {
                this.calculateCutPointsByEqualWidthBinning(i2);
                continue;
            }
            this.calculateCutPointsByEqualFrequencyBinning(i2);
        }
    }

    protected void calculateCutPointsByEqualWidthBinning(int index) {
        double max = 0.0;
        double min = 1.0;
        for (int i2 = 0; i2 < this.getInputFormat().numInstances(); ++i2) {
            Instance currentInstance = this.getInputFormat().instance(i2);
            if (currentInstance.isMissing(index)) continue;
            double currentVal = currentInstance.value(index);
            if (max < min) {
                max = min = currentVal;
            }
            if (currentVal > max) {
                max = currentVal;
            }
            if (!(currentVal < min)) continue;
            min = currentVal;
        }
        double binWidth = (max - min) / (double)this.m_NumBins;
        double[] cutPoints = null;
        if (this.m_NumBins > 1 && binWidth > 0.0) {
            cutPoints = new double[this.m_NumBins - 1];
            for (int i3 = 1; i3 < this.m_NumBins; ++i3) {
                cutPoints[i3 - 1] = min + binWidth * (double)i3;
            }
        }
        this.m_CutPoints[index] = cutPoints;
    }

    protected void calculateCutPointsByEqualFrequencyBinning(int index) {
        double freq;
        Instances data = new Instances(this.getInputFormat());
        data.sort(index);
        double sumOfWeights = 0.0;
        for (int i2 = 0; i2 < data.numInstances() && !data.instance(i2).isMissing(index); ++i2) {
            sumOfWeights += data.instance(i2).weight();
        }
        double[] cutPoints = new double[this.m_NumBins - 1];
        if (this.getDesiredWeightOfInstancesPerInterval() > 0.0) {
            freq = this.getDesiredWeightOfInstancesPerInterval();
            cutPoints = new double[(int)(sumOfWeights / freq)];
        } else {
            freq = sumOfWeights / (double)this.m_NumBins;
            cutPoints = new double[this.m_NumBins - 1];
        }
        double counter = 0.0;
        double last = 0.0;
        int cpindex = 0;
        int lastIndex = -1;
        for (int i3 = 0; i3 < data.numInstances() - 1 && !data.instance(i3).isMissing(index); ++i3) {
            counter += data.instance(i3).weight();
            sumOfWeights -= data.instance(i3).weight();
            if (!(data.instance(i3).value(index) < data.instance(i3 + 1).value(index))) continue;
            if (counter >= freq) {
                if (freq - last < counter - freq && lastIndex != -1) {
                    cutPoints[cpindex] = (data.instance(lastIndex).value(index) + data.instance(lastIndex + 1).value(index)) / 2.0;
                    last = counter -= last;
                    lastIndex = i3;
                } else {
                    cutPoints[cpindex] = (data.instance(i3).value(index) + data.instance(i3 + 1).value(index)) / 2.0;
                    counter = 0.0;
                    last = 0.0;
                    lastIndex = -1;
                }
                freq = (sumOfWeights + counter) / (double)(cutPoints.length + 1 - ++cpindex);
                continue;
            }
            lastIndex = i3;
            last = counter;
        }
        if (cpindex < cutPoints.length && lastIndex != -1) {
            cutPoints[cpindex] = (data.instance(lastIndex).value(index) + data.instance(lastIndex + 1).value(index)) / 2.0;
            ++cpindex;
        }
        if (cpindex == 0) {
            this.m_CutPoints[index] = null;
        } else {
            double[] cp = new double[cpindex];
            for (int i4 = 0; i4 < cpindex; ++i4) {
                cp[i4] = cutPoints[i4];
            }
            this.m_CutPoints[index] = cp;
        }
    }

    protected void findNumBins(int index) {
        Instance currentInstance;
        int i2;
        double min = Double.MAX_VALUE;
        double max = -1.7976931348623157E308;
        double binWidth = 0.0;
        double bestEntropy = Double.MAX_VALUE;
        int bestNumBins = 1;
        for (i2 = 0; i2 < this.getInputFormat().numInstances(); ++i2) {
            currentInstance = this.getInputFormat().instance(i2);
            if (currentInstance.isMissing(index)) continue;
            double currentVal = currentInstance.value(index);
            if (currentVal > max) {
                max = currentVal;
            }
            if (!(currentVal < min)) continue;
            min = currentVal;
        }
        for (i2 = 0; i2 < this.m_NumBins; ++i2) {
            double[] distribution = new double[i2 + 1];
            binWidth = (max - min) / (double)(i2 + 1);
            block2: for (int j = 0; j < this.getInputFormat().numInstances(); ++j) {
                currentInstance = this.getInputFormat().instance(j);
                if (currentInstance.isMissing(index)) continue;
                for (int k = 0; k < i2 + 1; ++k) {
                    if (!(currentInstance.value(index) <= min + ((double)k + 1.0) * binWidth)) continue;
                    int n = k;
                    distribution[n] = distribution[n] + currentInstance.weight();
                    continue block2;
                }
            }
            double entropy = 0.0;
            for (int k = 0; k < i2 + 1; ++k) {
                if (distribution[k] < 2.0) {
                    entropy = Double.MAX_VALUE;
                    break;
                }
                entropy -= distribution[k] * Math.log((distribution[k] - 1.0) / binWidth);
            }
            if (!(entropy < bestEntropy)) continue;
            bestEntropy = entropy;
            bestNumBins = i2 + 1;
        }
        double[] cutPoints = null;
        if (bestNumBins > 1 && binWidth > 0.0) {
            cutPoints = new double[bestNumBins - 1];
            for (int i3 = 1; i3 < bestNumBins; ++i3) {
                cutPoints[i3 - 1] = min + binWidth * (double)i3;
            }
        }
        this.m_CutPoints[index] = cutPoints;
    }

    protected void setOutputFormat() {
        if (this.m_CutPoints == null) {
            this.setOutputFormat(null);
            return;
        }
        ArrayList<Attribute> attributes = new ArrayList<Attribute>(this.getInputFormat().numAttributes());
        int classIndex = this.getInputFormat().classIndex();
        int m = this.getInputFormat().numAttributes();
        for (int i2 = 0; i2 < m; ++i2) {
            if (this.m_DiscretizeCols.isInRange(i2) && this.getInputFormat().attribute(i2).isNumeric() && this.getInputFormat().classIndex() != i2) {
                double[] cutPoints = this.m_CutPoints[i2];
                if (!this.m_MakeBinary) {
                    ArrayList<String> attribValues;
                    if (cutPoints == null) {
                        attribValues = new ArrayList<String>(1);
                        attribValues.add("'All'");
                    } else {
                        int n;
                        attribValues = new ArrayList(cutPoints.length + 1);
                        if (this.m_UseBinNumbers) {
                            n = cutPoints.length;
                            for (int j = 0; j <= n; ++j) {
                                attribValues.add("'B" + (j + 1) + "of" + (n + 1) + "'");
                            }
                        } else {
                            n = cutPoints.length;
                            for (int j = 0; j <= n; ++j) {
                                attribValues.add("'" + Discretize.binRangeString(cutPoints, j) + "'");
                            }
                        }
                    }
                    Attribute newAtt = new Attribute(this.getInputFormat().attribute(i2).name(), attribValues);
                    newAtt.setWeight(this.getInputFormat().attribute(i2).weight());
                    attributes.add(newAtt);
                    continue;
                }
                if (cutPoints == null) {
                    ArrayList<String> attribValues = new ArrayList<String>(1);
                    attribValues.add("'All'");
                    Attribute newAtt = new Attribute(this.getInputFormat().attribute(i2).name(), attribValues);
                    newAtt.setWeight(this.getInputFormat().attribute(i2).weight());
                    attributes.add(newAtt);
                    continue;
                }
                if (i2 < this.getInputFormat().classIndex()) {
                    classIndex += cutPoints.length - 1;
                }
                int n = cutPoints.length;
                for (int j = 0; j < n; ++j) {
                    ArrayList<String> attribValues = new ArrayList<String>(2);
                    if (this.m_UseBinNumbers) {
                        attribValues.add("'B1of2'");
                        attribValues.add("'B2of2'");
                    } else {
                        double[] binaryCutPoint = new double[]{cutPoints[j]};
                        attribValues.add("'" + Discretize.binRangeString(binaryCutPoint, 0) + "'");
                        attribValues.add("'" + Discretize.binRangeString(binaryCutPoint, 1) + "'");
                    }
                    Attribute newAtt = new Attribute(this.getInputFormat().attribute(i2).name() + "_" + (j + 1), attribValues);
                    newAtt.setWeight(this.getInputFormat().attribute(i2).weight());
                    attributes.add(newAtt);
                }
                continue;
            }
            attributes.add((Attribute)this.getInputFormat().attribute(i2).copy());
        }
        Instances outputFormat = new Instances(this.getInputFormat().relationName(), attributes, 0);
        outputFormat.setClassIndex(classIndex);
        this.setOutputFormat(outputFormat);
    }

    protected void convertInstance(Instance instance) {
        int index = 0;
        double[] vals = new double[this.outputFormatPeek().numAttributes()];
        for (int i2 = 0; i2 < this.getInputFormat().numAttributes(); ++i2) {
            if (this.m_DiscretizeCols.isInRange(i2) && this.getInputFormat().attribute(i2).isNumeric() && this.getInputFormat().classIndex() != i2) {
                int j;
                double currentVal = instance.value(i2);
                if (this.m_CutPoints[i2] == null) {
                    vals[index] = instance.isMissing(i2) ? Utils.missingValue() : 0.0;
                    ++index;
                    continue;
                }
                if (!this.m_MakeBinary) {
                    if (instance.isMissing(i2)) {
                        vals[index] = Utils.missingValue();
                    } else {
                        for (j = 0; j < this.m_CutPoints[i2].length && !(currentVal <= this.m_CutPoints[i2][j]); ++j) {
                        }
                        vals[index] = j;
                    }
                    ++index;
                    continue;
                }
                for (j = 0; j < this.m_CutPoints[i2].length; ++j) {
                    vals[index] = instance.isMissing(i2) ? Utils.missingValue() : (currentVal <= this.m_CutPoints[i2][j] ? 0.0 : 1.0);
                    ++index;
                }
                continue;
            }
            vals[index] = instance.value(i2);
            ++index;
        }
        AbstractInstance inst = null;
        inst = instance instanceof SparseInstance ? new SparseInstance(instance.weight(), vals) : new DenseInstance(instance.weight(), vals);
        this.copyValues(inst, false, instance.dataset(), this.outputFormatPeek());
        this.push(inst);
    }

    @Override
    public String getRevision() {
        return RevisionUtils.extract("$Revision: 12037 $");
    }

    public static void main(String[] argv) {
        Discretize.runFilter(new Discretize(), argv);
    }
}

