Example usage for weka.core Instances add

List of usage examples for weka.core Instances add

Introduction

In this page you can find the example usage for weka.core Instances add.

Prototype

@Override
public boolean add(Instance instance) 

Source Link

Document

Adds one instance to the end of the set.

Usage

From source file:classifier.SentenceBasedTextDirectoryLoader.java

License:Open Source License

/**
 * Return the full data set. If the structure hasn't yet been determined by
 * a call to getStructure then method should do so before processing the
 * rest of the data set./*from  www  .  j ava2  s .c  om*/
 * 
 * @return the structure of the data set as an empty set of Instances
 * @throws IOException
 *             if there is no source or parsing fails
 */
@Override
public Instances getDataSet() throws IOException {
    if (getDirectory() == null)
        throw new IOException("No directory/source has been specified");

    String directoryPath = getDirectory().getAbsolutePath();
    ArrayList<String> classes = new ArrayList<String>();
    ArrayList<String> filenames = new ArrayList<String>();
    Enumeration enm = getStructure().classAttribute().enumerateValues();
    while (enm.hasMoreElements())
        classes.add((String) enm.nextElement());

    Instances data = getStructure();
    int fileCount = 0;
    // each class is actually the filename - this is preserved around weka,
    // so its useful for tracking associations later and using as an "index"
    //

    for (int k = 0; k < classes.size(); k++) {
        String subdirPath = (String) classes.get(k);
        File subdir = new File(directoryPath + File.separator + subdirPath);
        String[] files = subdir.list();
        for (int j = 0; j < files.length; j++) {

            try {
                fileCount++;
                if (getDebug())
                    System.err.println("processing " + fileCount + " : " + files[j]);

                File txt = new File(directoryPath + File.separator + subdirPath + File.separator + files[j]);
                filenames.add(files[j]);
                BufferedInputStream is;
                is = new BufferedInputStream(new FileInputStream(txt));
                StringBuffer txtStr = new StringBuffer();
                int c;
                while ((c = is.read()) != -1) {
                    txtStr.append((char) c);
                }

                // Here is my extension to Text Directory Loader.
                String regexSentenceSplit = "(\\n)";
                String rawtext = txtStr.toString();
                rawtext = rawtext.toLowerCase();
                rawtext.trim();

                // split the sentences
                String[] sentences = rawtext.split(regexSentenceSplit);
                for (String sentence : sentences) {
                    double[] newInst = null;
                    if (m_OutputFilename)
                        newInst = new double[3];
                    else
                        newInst = new double[2];

                    newInst[0] = (double) data.attribute(0).addStringValue(sentence + "\n");
                    if (m_OutputFilename)
                        newInst[1] = (double) data.attribute(1)
                                .addStringValue(subdirPath + File.separator + files[j]);
                    newInst[data.classIndex()] = (double) k;
                    data.add(new DenseInstance(1.0, newInst));
                    // }
                }

                writeFilenames(directoryPath, filenames);

            } catch (Exception e) {
                System.err.println("failed to convert file: " + directoryPath + File.separator + files[j]);
            }
        }
    }

    // this.m_structure.setClassIndex(-1);
    return data;
}

From source file:cn.edu.xjtu.dbmine.TextDirectoryLoader.java

License:Open Source License

/**
 * Return the full data set. If the structure hasn't yet been determined
 * by a call to getStructure then method should do so before processing
 * the rest of the data set./*from   www  . j ava  2 s.c  om*/
 *
 * @return the structure of the data set as an empty set of Instances
 * @throws IOException if there is no source or parsing fails
 */
public Instances getDataSet() throws IOException {
    if (getDirectory() == null)
        throw new IOException("No directory/source has been specified");

    String directoryPath = getDirectory().getAbsolutePath();
    FastVector classes = new FastVector();
    Enumeration enm = getStructure().classAttribute().enumerateValues();
    while (enm.hasMoreElements())
        classes.addElement(enm.nextElement());

    Instances data = getStructure();
    int fileCount = 0;
    for (int k = 0; k < classes.size(); k++) {
        String subdirPath = (String) classes.elementAt(k);
        File subdir = new File(directoryPath + File.separator + subdirPath);
        String[] files = subdir.list();
        for (int j = 0; j < files.length; j++) {
            try {
                fileCount++;
                if (getDebug())
                    System.err.println("processing " + fileCount + " : " + subdirPath + " : " + files[j]);

                double[] newInst = null;
                if (m_OutputFilename)
                    newInst = new double[3];
                else
                    newInst = new double[2];
                File txt = new File(directoryPath + File.separator + subdirPath + File.separator + files[j]);
                BufferedInputStream is;
                is = new BufferedInputStream(new FileInputStream(txt));
                StringBuffer txtStr = new StringBuffer();
                int c;
                /*while ((c = is.read()) != -1) {
                  txtStr.append((char) c);
                }*/
                FileReader fr = new FileReader(txt);
                BufferedReader br = new BufferedReader(fr);
                String line;
                while ((line = br.readLine()) != null) {
                    txtStr.append(line + "\n");
                }

                newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString());
                if (m_OutputFilename)
                    newInst[1] = (double) data.attribute(1)
                            .addStringValue(subdirPath + File.separator + files[j]);
                newInst[data.classIndex()] = (double) k;
                data.add(new Instance(1.0, newInst));
                is.close();
            } catch (Exception e) {
                System.err.println("failed to convert file: " + directoryPath + File.separator + subdirPath
                        + File.separator + files[j]);
            }
        }
    }

    return data;
}

From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java

License:Open Source License

public Instances loadPropertiesAsInstancesPre(String Path) {
    HashMap<String, String> pmap = null;
    try {//w w  w .j  ava2  s.c o  m
        pmap = Yaml.loadType(new FileInputStream(yamlPath), HashMap.class);
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    }

    atts = new ArrayList<Attribute>();
    Instance dfIns = new DenseInstance(pmap.size());
    int pos = 0;
    double[] vals = new double[pmap.size()];
    for (Map.Entry<String, String> ent : pmap.entrySet()) {
        try {
            double val = Double.valueOf(String.valueOf(ent.getValue()));
            vals[pos] = val;

            Properties p1 = new Properties();
            double upper, lower;
            if (val != 0) {
                upper = val * (1. + 0.5);
                lower = val * (1. - 0.5);
            } else {
                lower = val;
                upper = 1;
            }

            p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]");
            ProtectedProperties prop1 = new ProtectedProperties(p1);

            atts.add(new Attribute(String.valueOf(ent.getKey()), prop1));
            pos++;
        } catch (Exception e) {
        }
    }

    Instances dfProp = new Instances("DefaultConfig", atts, 1);
    dfProp.add(dfIns);
    dfIns.setDataset(dfProp);
    for (int i = 0; i < pos; i++) {
        dfIns.setValue(atts.get(i), vals[i]);
        //System.err.println(atts.get(i)+":"+vals[i]);
    }

    return dfProp;
}

From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java

License:Open Source License

public Instances loadPropertiesAsInstances(String Path) {
    HashMap<String, String> pmap = null;
    HashMap rangeMap = null;//  w  w w  . j a v  a  2  s  .co  m
    try {
        pmap = Yaml.loadType(new FileInputStream(yamlPath), HashMap.class);
        rangeMap = Yaml.loadType(new FileInputStream(yamlPath + "_range"), HashMap.class);
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    }

    atts = new ArrayList<Attribute>();
    int pos = 0;
    double[] vals = new double[pmap.size()];
    Object range = null;
    for (Map.Entry<String, String> ent : pmap.entrySet()) {
        try {
            double val = Double.valueOf(String.valueOf(ent.getValue()));
            vals[pos] = val;

            Properties p1 = new Properties();

            range = rangeMap.get(ent.getKey());
            if (range != null) {
                String list = (String) range;
                if (list.indexOf('[') == -1 && list.indexOf('(') == -1)
                    throw new Exception("No Range for You" + ent.getKey());
                p1.setProperty("range", list.trim());
            } else {
                double upper, lower;
                if (val != 0) {
                    upper = val * (1. + 0.5);
                    lower = val * (1. - 0.5);
                } else {
                    lower = val;
                    upper = 1;
                }
                p1.setProperty("range", "[" + String.valueOf(lower) + "," + String.valueOf(upper) + "]");
            }

            ProtectedProperties prop1 = new ProtectedProperties(p1);

            atts.add(new Attribute(String.valueOf(ent.getKey()), prop1));
            pos++;
        } catch (Exception e) {
        }
    }

    Instances dfProp = new Instances("DefaultConfig", atts, 1);
    Instance dfIns = new DenseInstance(atts.size());
    for (int i = 0; i < pos; i++) {
        dfIns.setValue(atts.get(i), vals[i]);
        //System.err.println(atts.get(i)+":"+vals[i]);
    }
    dfProp.add(dfIns);
    dfIns.setDataset(dfProp);

    return dfProp;
}

From source file:cn.ict.zyq.bestConf.bestConf.BestConf.java

License:Open Source License

public static void testCOMT2() throws Exception {
    BestConf bestconf = new BestConf();
    Instances trainingSet = DataIOFile.loadDataFromArffFile("data/trainingBestConf0.arff");
    trainingSet.setClassIndex(trainingSet.numAttributes() - 1);

    Instances samplePoints = LHSInitializer.getMultiDimContinuous(bestconf.getAttributes(),
            InitialSampleSetSize, false);
    samplePoints.insertAttributeAt(trainingSet.classAttribute(), samplePoints.numAttributes());
    samplePoints.setClassIndex(samplePoints.numAttributes() - 1);

    COMT2 comt = new COMT2(samplePoints, COMT2Iteration);

    comt.buildClassifier(trainingSet);/*from   www.j a  va 2 s . c  o  m*/

    Evaluation eval = new Evaluation(trainingSet);
    eval.evaluateModel(comt, trainingSet);
    System.err.println(eval.toSummaryString());

    Instance best = comt.getInstanceWithPossibleMaxY(samplePoints.firstInstance());
    Instances bestInstances = new Instances(trainingSet, 2);
    bestInstances.add(best);
    DataIOFile.saveDataToXrffFile("data/trainingBestConf_COMT2.arff", bestInstances);

    //now we output the training set with the class value updated as the predicted value
    Instances output = new Instances(trainingSet, trainingSet.numInstances());
    Enumeration<Instance> enu = trainingSet.enumerateInstances();
    while (enu.hasMoreElements()) {
        Instance ins = enu.nextElement();
        double[] values = ins.toDoubleArray();
        values[values.length - 1] = comt.classifyInstance(ins);
        output.add(ins.copy(values));
    }
    DataIOFile.saveDataToXrffFile("data/trainingBestConf0_predict.xrff", output);
}

From source file:cn.ict.zyq.bestConf.bestConf.RBSoDDSOptimization.java

License:Open Source License

@Override
public void optimize(String preLoadDatasetPath) {
    ResumeParams rParams = resumePrepareTry();
    boolean justAfterResume = rParams.isResuming;

    //detect whether we need to resume
    if (rParams.isResuming)
        preLoadDatasetPath = null;/*from w  w w.java2s .c  o m*/

    double tempBest;

    while (opParams.currentround < RRSMaxRounds) {
        //is it a global search
        if (samplePoints == null || rParams.propsRound < opParams.currentround) {
            props = bestconf.getAttributes();
            saveProps(props, opParams.currentround, opParams.subround);//for resumability
            opParams.saveToFile();
        }

        if (opParams.currentround != 0 || opParams.subround != 0) {
            if (!justAfterResume || (justAfterResume && (rParams.samplePointRound < opParams.currentround
                    || rParams.samplePointSubRound < opParams.subround))) {
                //let's do the sampling
                ((DDSSampler) sampler).setCurrentRound(opParams.currentround);
                samplePoints = sampler.getMultiDimContinuous(props, InitialSampleSetSize, false, bestconf);
                saveSamplePoints(samplePoints, opParams.currentround, opParams.subround);
            }

            if (!justAfterResume || (justAfterResume && rParams.trainingRound < opParams.currentround
                    || rParams.trainingSubRound < opParams.subround)) {
                //traverse the set and initiate the experiments
                trainingSet = bestconf.runExp(samplePoints, opParams.currentround,
                        "RRS" + String.valueOf(opParams.subround), justAfterResume);
                saveTrainingSet(trainingSet, opParams.currentround, opParams.subround);
            }
        } else {//(currentround==0 && subround==0)
            if (preLoadDatasetPath == null) {

                if (samplePoints == null) {
                    //let's do the sampling
                    ((DDSSampler) sampler).setCurrentRound(opParams.currentround);
                    samplePoints = sampler.getMultiDimContinuous(props, InitialSampleSetSize, false, bestconf);
                    samplePoints.add(0, bestconf.defltSettings.firstInstance());
                    saveSamplePoints(samplePoints, opParams.currentround, opParams.subround);
                }
                if (trainingSet == null) {
                    //traverse the set and initiate the experiments
                    trainingSet = bestconf.runExp(samplePoints, opParams.currentround,
                            "RRS" + String.valueOf(opParams.subround), justAfterResume);
                    saveTrainingSet(trainingSet, opParams.currentround, opParams.subround);
                }
            } else {
                try {
                    bestconf.allInstances = DataIOFile.loadDataFromArffFile(preLoadDatasetPath);
                    bestconf.allInstances.setClassIndex(bestconf.allInstances.numAttributes() - 1);
                    samplePoints = trainingSet = new Instances(bestconf.allInstances);

                    saveSamplePoints(samplePoints, opParams.currentround, opParams.subround);
                    saveTrainingSet(trainingSet, opParams.currentround, opParams.subround);
                } catch (IOException e) {
                    e.printStackTrace();
                }
            }
        }

        //get the point with the best performance
        Instance tempIns = BestConf.findBestPerf(trainingSet);
        tempBest = tempIns.value(trainingSet.numAttributes() - 1);
        if (tempBest > opParams.currentBest || (justAfterResume && tempBest == opParams.currentBest
                && (rParams.propsRound < opParams.currentround || rParams.propsSubRound < opParams.subround))) {
            System.err.println("Previous best is " + opParams.currentBest + "; Current best is " + tempBest);

            opParams.currentBest = tempBest;
            opParams.currentIns = tempIns;
            opParams.saveToFile();

            try {
                //output the best instance of this round
                Instances bestInstances = new Instances(samplePoints, 1);
                bestInstances.add(opParams.currentIns);
                DataIOFile.saveDataToArffFile("data/trainingBestConf_RRS_" + opParams.currentround + "_"
                        + opParams.subround + "_" + opParams.currentBest + ".arff", bestInstances);
            } catch (IOException e) {
                e.printStackTrace();
            }

            //let's search locally
            if (!justAfterResume || (justAfterResume && rParams.propsRound < opParams.currentround
                    || rParams.propsSubRound < opParams.subround)) {
                props = ConfigSampler.scaleDownDetour(trainingSet, tempIns);
                saveProps(props, opParams.currentround, opParams.subround);//for resumability
            }

            opParams.subround++;
            opParams.saveToFile();
        } else {//let's do the restart
            samplePoints = null;

            opParams.currentround++;
            opParams.subround = 0;
            opParams.saveToFile();

            System.err.println("Entering into round " + opParams.currentround);
            /*if(opParams.currentround>=RRSMaxRounds)
               break;*/
        }

        justAfterResume = false;
    } //RRS search

    System.err.println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>");
    System.err.println("We are ending the optimization experiments!");
    System.err.println("Please wait and don't shutdown!");
    System.err.println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>");
    System.err.println(">>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>");

    //output the best
    Map<Attribute, Double> attsmap = BestConf.instanceToMap(opParams.currentIns);
    System.out.println(attsmap.toString());

    //set the best configuration to the cluster
    System.err.println("The best performance is : " + opParams.currentBest);

    System.out.println("=========================================");
    TxtFileOperation.writeToFile("bestConfOutput_RRS", attsmap.toString() + "\n");

    System.out.println("=========================================");

    //output the whole trainings dataset
    try {
        DataIOFile.saveDataToArffFile("data/trainingAllRSS.arff", bestconf.allInstances);
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:cn.ict.zyq.bestConf.bestConf.sampler.DDSSampler.java

License:Open Source License

/**
 * At current version, we assume all attributes are numeric attributes with bounds
 * //from   w w w. j a v a  2  s . co  m
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
public Instances sampleMultiDimContinuous(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) {

    ArrayList<Integer>[] crntSetPerm;
    //only initialize once
    if (sets == null) {
        //possible number of sample sets will not exceed $sampleSetSize to the power of 2
        int L = (int) Math.min(rounds, atts.size() > 2 ? Math.pow(sampleSetSize, atts.size() - 1)
                : (atts.size() > 1 ? sampleSetSize : 1));

        //initialization
        dists = new long[L];
        sets = new ArrayList[L][];
        for (int i = 0; i < L; i++) {
            dists[i] = -1;
            sets[i] = null;
        }

        long maxMinDist = -1;
        int posWithMaxMinDist = -1;
        //generate L sets of sampleSetSize points
        for (int i = 0; i < L; i++) {
            ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
            while (inAlready(sets, setPerm))//continue the samples set generation till different samples are obtained
                setPerm = generateOneSampleSet(sampleSetSize, atts.size());
            sets[i] = setPerm;

            //compute the minimum distance minDist between any sample pair for each set
            dists[i] = minDistForSet(setPerm);
            //select the set with the maximum minDist
            if (dists[i] > maxMinDist) {
                posWithMaxMinDist = i;
                maxMinDist = dists[i];
            }
        }
        //now let the first sample set be the one with the max mindist
        positionSwitch(sets, dists, 0, posWithMaxMinDist);
    }
    crntSetPerm = sets[sampleSetToGet];

    //generate and output the set with the maximum minDist as the result

    //first, divide the domain of each attribute into sampleSetSize equal subdomain
    double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds
    Iterator<Attribute> itr = atts.iterator();
    Attribute crntAttr;
    boolean[] roundToInt = new boolean[atts.size()];
    for (int i = 0; i < bounds.length; i++) {
        crntAttr = itr.next();
        uniBoundsGeneration(bounds[i], crntAttr, sampleSetSize);
        //flexibleBoundsGeneration(bounds[i], crntAttr, sampleSetSize);

        if (bounds[i][sampleSetSize] - bounds[i][0] > sampleSetSize)
            roundToInt[i] = true;
    }

    //second, generate the set according to setWithMaxMinDist
    Instances data = new Instances("SamplesByLHS", atts, sampleSetSize);
    for (int i = 0; i < sampleSetSize; i++) {
        double[] vals = new double[atts.size()];
        for (int j = 0; j < vals.length; j++) {
            vals[j] = useMid ? (bounds[j][crntSetPerm[j].get(i)] + bounds[j][crntSetPerm[j].get(i) + 1]) / 2
                    : bounds[j][crntSetPerm[j].get(i)]
                            + ((bounds[j][crntSetPerm[j].get(i) + 1] - bounds[j][crntSetPerm[j].get(i)])
                                    * uniRand.nextDouble());
            if (roundToInt[j])
                vals[j] = (int) vals[j];
        }
        data.add(new DenseInstance(1.0, vals));
    }

    //third, return the generated points
    return data;
}

From source file:cn.ict.zyq.bestConf.bestConf.sampler.LHSSampler.java

License:Open Source License

/**
 * Assumptions:(1)Numberic is continuous and has lower/upper bounds; (2) Nominals have domains permutable
 * /*from  w  w w. j  a v a  2  s .  co  m*/
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
private static Instances getMultiDim(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) {

    int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
    double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
    ArrayList<Integer>[] setWithMaxMinDist = null;
    //generate L sets of sampleSetSize points
    for (int i = 0; i < L; i++) {
        ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
        //compute the minimum distance minDist between any sample pair for each set
        crntMinDist = minDistForSet(setPerm);
        //select the set with the maximum minDist
        if (crntMinDist > maxMinDist) {
            setWithMaxMinDist = setPerm;
            maxMinDist = crntMinDist;
        }
    }

    //generate and output the set with the maximum minDist as the result

    //first, divide the domain of each attribute into sampleSetSize equal subdomain
    double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds
    Iterator<Attribute> itr = atts.iterator();
    Attribute crntAttr;
    double pace;
    for (int i = 0; i < bounds.length; i++) {
        crntAttr = itr.next();

        if (crntAttr.isNumeric()) {
            bounds[i][0] = crntAttr.getLowerNumericBound();
            bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
            pace = (crntAttr.getUpperNumericBound() - crntAttr.getLowerNumericBound()) / sampleSetSize;
            for (int j = 1; j < sampleSetSize; j++) {
                bounds[i][j] = bounds[i][j - 1] + pace;
            }
        } else {//crntAttr.isNominal()
            if (crntAttr.numValues() >= sampleSetSize) {
                //randomly select among the set
                for (int j = 0; j <= sampleSetSize; j++)
                    bounds[i][j] = uniRand.nextInt(crntAttr.numValues());//the position of one of the nominal values
            } else {
                //first round-robin
                int lastPart = sampleSetSize % crntAttr.numValues();
                for (int j = 0; j < sampleSetSize - lastPart; j++)
                    bounds[i][j] = j % crntAttr.numValues();
                //then randomly select
                for (int j = sampleSetSize - lastPart; j <= sampleSetSize; j++)
                    bounds[i][j] = uniRand.nextInt(crntAttr.numValues());
            }
        } //nominal attribute
    } //get all subdomains

    //second, generate the set according to setWithMaxMinDist
    Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
    for (int i = 0; i < sampleSetSize; i++) {
        double[] vals = new double[atts.size()];
        for (int j = 0; j < vals.length; j++) {
            if (atts.get(j).isNumeric()) {
                vals[j] = useMid
                        ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1])
                                / 2
                        : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1]
                                - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble());
            } else {//isNominal()
                vals[j] = bounds[j][setWithMaxMinDist[j].get(i)];
            }
        }
        data.add(new DenseInstance(1.0, vals));
    }

    //third, return the generated points
    return data;
}

From source file:cn.ict.zyq.bestConf.bestConf.sampler.LHSSampler.java

License:Open Source License

/**
 * At current version, we assume all attributes are numeric attributes with bounds
 * //from  w w w  .j  a v  a  2  s  .  c o m
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
public Instances sampleMultiDimContinuous(ArrayList<Attribute> atts, int sampleSetSize, boolean useMid) {

    int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
    double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
    ArrayList<Integer>[] setWithMaxMinDist = null;
    //generate L sets of sampleSetSize points
    for (int i = 0; i < L; i++) {
        ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
        //compute the minimum distance minDist between any sample pair for each set
        crntMinDist = minDistForSet(setPerm);
        //select the set with the maximum minDist
        if (crntMinDist > maxMinDist) {
            setWithMaxMinDist = setPerm;
            maxMinDist = crntMinDist;
        }
    }

    //generate and output the set with the maximum minDist as the result

    //first, divide the domain of each attribute into sampleSetSize equal subdomain
    double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds
    Iterator<Attribute> itr = atts.iterator();
    Attribute crntAttr;
    boolean[] roundToInt = new boolean[atts.size()];
    for (int i = 0; i < bounds.length; i++) {
        crntAttr = itr.next();
        uniBoundsGeneration(bounds[i], crntAttr, sampleSetSize);
        //flexibleBoundsGeneration(bounds[i], crntAttr, sampleSetSize);

        if (bounds[i][sampleSetSize] - bounds[i][0] > sampleSetSize)
            roundToInt[i] = true;
    }

    //second, generate the set according to setWithMaxMinDist
    Instances data = new Instances("SamplesByLHS", atts, sampleSetSize);
    for (int i = 0; i < sampleSetSize; i++) {
        double[] vals = new double[atts.size()];
        for (int j = 0; j < vals.length; j++) {
            vals[j] = useMid
                    ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1]) / 2
                    : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1]
                            - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble());
            if (roundToInt[j])
                vals[j] = (int) vals[j];
        }
        data.add(new DenseInstance(1.0, vals));
    }

    //third, return the generated points
    return data;
}

From source file:cn.ict.zyq.bestConf.bestConf.sampler.LHSSampler.java

License:Open Source License

/**
 * At current version, we assume all attributes are numeric attributes with bounds
 * //from   w ww . j  av  a 2 s .  c  o  m
 * Let PACE be upper-lower DIVided by the sampleSetSize
 * 
 * @param useMid true if to use the middle point of a subdomain, false if to use a random point within a subdomain
 */
private static Instances getMultiDimContinuousDiv(ArrayList<Attribute> atts, int sampleSetSize,
        boolean useMid) {

    int L = Math.min(7, Math.max(sampleSetSize, atts.size()));//7 is chosen for no special reason
    double maxMinDist = 0, crntMinDist;//work as the threshold to select the sample set
    ArrayList<Integer>[] setWithMaxMinDist = null;
    //generate L sets of sampleSetSize points
    for (int i = 0; i < L; i++) {
        ArrayList<Integer>[] setPerm = generateOneSampleSet(sampleSetSize, atts.size());
        //compute the minimum distance minDist between any sample pair for each set
        crntMinDist = minDistForSet(setPerm);
        //select the set with the maximum minDist
        if (crntMinDist > maxMinDist) {
            setWithMaxMinDist = setPerm;
            maxMinDist = crntMinDist;
        }
    }

    //generate and output the set with the maximum minDist as the result

    //first, divide the domain of each attribute into sampleSetSize equal subdomain
    double[][] bounds = new double[atts.size()][sampleSetSize + 1];//sampleSetSize+1 to include the lower and upper bounds
    Iterator<Attribute> itr = atts.iterator();
    Attribute crntAttr;
    double pace;
    for (int i = 0; i < bounds.length; i++) {
        crntAttr = itr.next();

        bounds[i][0] = crntAttr.getLowerNumericBound();
        bounds[i][sampleSetSize] = crntAttr.getUpperNumericBound();
        pace = (bounds[i][sampleSetSize] - bounds[i][0]) / sampleSetSize;
        for (int j = 1; j < sampleSetSize; j++) {
            bounds[i][j] = bounds[i][j - 1] + pace;
        }
    }

    //second, generate the set according to setWithMaxMinDist
    Instances data = new Instances("InitialSetByLHS", atts, sampleSetSize);
    for (int i = 0; i < sampleSetSize; i++) {
        double[] vals = new double[atts.size()];
        for (int j = 0; j < vals.length; j++) {
            vals[j] = useMid
                    ? (bounds[j][setWithMaxMinDist[j].get(i)] + bounds[j][setWithMaxMinDist[j].get(i) + 1]) / 2
                    : bounds[j][setWithMaxMinDist[j].get(i)] + ((bounds[j][setWithMaxMinDist[j].get(i) + 1]
                            - bounds[j][setWithMaxMinDist[j].get(i)]) * uniRand.nextDouble());
        }
        data.add(new DenseInstance(1.0, vals));
    }

    //third, return the generated points
    return data;
}