List of usage examples for weka.core Instances add
@Override public boolean add(Instance instance)
From source file:de.uni_koeln.spinfo.classification.zoneAnalysis.classifier.WekaClassifier.java
License:Open Source License
@Override public Model buildModel(List<ClassifyUnit> cus, FeatureUnitConfiguration fuc, AbstractFeatureQuantifier fq, File trainingDataFile) {// w ww.j a v a 2 s. c o m Instances trainingSet = initTrainingSet(cus); for (ClassifyUnit classifyUnit : cus) { trainingSet.add(instance(((ZoneClassifyUnit) classifyUnit), trainingSet)); } // // wir merken uns, dass das Training noch nicht abgeschlossen ist ... // classifierBuilt = false; WekaModel model = new WekaModel(); model.setTrainingData(trainingSet); model.setClassifierName(this.getClass().getSimpleName()); model.setFQName(fq.getClass().getSimpleName()); model.setDataFile(trainingDataFile); model.setFuc(fuc); model.setFUOrder(fq.getFeatureUnitOrder()); return model; }
From source file:de.upb.timok.utils.DatasetTransformationUtils.java
License:Open Source License
public static Instances trainingSetToInstances(List<double[]> trainingSet) { final double[] sample = trainingSet.get(0); final ArrayList<Attribute> fvWekaAttributes = new ArrayList<>(sample.length + 1); for (int i = 0; i < sample.length; i++) { fvWekaAttributes.add(new Attribute(Integer.toString(i))); }// ww w .j a v a 2 s .c o m final ArrayList<String> classStrings = new ArrayList<>(); classStrings.add("normal"); final Attribute ClassAttribute = new Attribute("class", classStrings); // Declare the feature vector fvWekaAttributes.add(ClassAttribute); final Instances result = new Instances("trainingSet", fvWekaAttributes, trainingSet.size()); result.setClass(ClassAttribute); result.setClassIndex(fvWekaAttributes.size() - 1); for (final double[] instance : trainingSet) { final double[] newInstance = Arrays.copyOf(instance, instance.length + 1); newInstance[newInstance.length - 1] = 0; final Instance wekaInstance = new DenseInstance(1, newInstance); wekaInstance.setDataset(result); result.add(wekaInstance); } return result; }
From source file:de.upb.timok.utils.DatasetTransformationUtils.java
License:Open Source License
public static Instances testSetToInstances(List<double[]> testSet) { if (testSet.size() == 0) { logger.warn("TestSet has size 0"); }//from w w w . ja va2 s. c om final double[] sample = testSet.get(0); final ArrayList<Attribute> fvWekaAttributes = new ArrayList<>(sample.length); for (int i = 0; i < sample.length; i++) { fvWekaAttributes.add(new Attribute(Integer.toString(i))); } final ArrayList<String> classStrings = new ArrayList<>(); classStrings.add("normal"); final Attribute ClassAttribute = new Attribute("class", classStrings); fvWekaAttributes.add(ClassAttribute); // Declare the feature vector final Instances result = new Instances("testSet", fvWekaAttributes, testSet.size()); result.setClassIndex(fvWekaAttributes.size() - 1); for (final double[] instance : testSet) { final Instance wekaInstance = new DenseInstance(1, instance); wekaInstance.setDataset(result); result.add(wekaInstance); } return result; }
From source file:detplagiasi.TextDirectoryToArff.java
License:Open Source License
public Instances createDataset(String directoryPath) throws Exception { FastVector atts = new FastVector(2); atts.addElement(new Attribute("filename", (FastVector) null)); atts.addElement(new Attribute("contents", (FastVector) null)); /*//from w w w . j av a 2 s .c o m ArrayList atts = new ArrayList(2); atts.addElement(new Attribute("filename", (ArrayList) null)); atts.addElement(new Attribute("contents", (ArrayList) null)); */ Instances data = new Instances("text_files_in_" + directoryPath, atts, 0); File dir = new File(directoryPath); String[] files = dir.list(); //create file a untuk menampung name file dari instance yang terkait //FileWriter fstream = new FileWriter(directoryPath+"\\cluster detail.txt"); BufferedWriter out = null; out = new BufferedWriter(new FileWriter(directoryPath + "\\cluster detail.txt")); for (int i = 0; i < files.length; i++) { if (files[i].endsWith(".txt")) { out.write("file ke " + (i + 1) + ": " + files[i]); System.out.println("processed files:" + files[i]); fileName[i] = files[i]; out.write("file ke " + (i + 1) + ": " + files[i]); try { double[] newInst = new double[2]; newInst[0] = (double) data.attribute(0).addStringValue(files[i]); File txt = new File(directoryPath + File.separator + files[i]); System.out.println("TDTARFF: " + txt.getCanonicalPath()); InputStreamReader is; is = new InputStreamReader(new FileInputStream(txt)); StringBuffer txtStr = new StringBuffer(); int c; while ((c = is.read()) != -1) { txtStr.append((char) c); } newInst[1] = (double) data.attribute(1).addStringValue(txtStr.toString()); try { out.write("file ke " + (i + 1) + ": " + files[i]); System.out.println("success"); } catch (Exception d) { System.err.println(d.getLocalizedMessage()); } //input pada file a nama file dari instance //data.add(new Instance(1.0, newInst)); data.add(new Instance(1.0, newInst)); //data.renameAttributeValue(data.attribute("att_name_in_data2"),"att_value_in_data2","att_value_in_data1"); } catch (Exception e) { System.err.println("failed to convert file: " + directoryPath + File.separator + files[i]); } } } return data; }
From source file:development.CrossValidateShapelets.java
public static Instances randomise(Instances train, int[] pos) { //Generate a random permutation into pos Random r = new Random(); for (int i = 0; i < pos.length; i++) pos[i] = i;/*from w w w. ja v a2s . c om*/ for (int i = 0; i < pos.length; i++) { int p1 = r.nextInt(pos.length); int p2 = r.nextInt(pos.length); int temp = pos[p1]; pos[p1] = pos[p2]; pos[p2] = temp; } Instances newD = new Instances(train, 0); for (int i = 0; i < pos.length; i++) newD.add(train.instance(pos[i])); return newD; }
From source file:development.SpectralTransformComparison.java
public void run() { // Set up the int nosCases = 400; int[] nosCasesPerClass = { nosCases / 2, nosCases / 2 }; int runs = 50; int minParas = 2; int maxParas = 10; ArrayList<String> names = new ArrayList<>(); Random rand = new Random(); c = ACFDomainClassification.setSingleClassifiers(names); int length = m; try {/* www . j ava 2 s . co m*/ int nosTrans = 3; Instances[] train = new Instances[nosTrans]; Instances[] test = new Instances[nosTrans]; double[][] sum = new double[train.length][c.length]; double[][] sumSq = new double[train.length][c.length]; PowerSpectrum ps = new PowerSpectrum(); PowerCepstrum pc = new PowerCepstrum(); pc.useFFT(); FFT fft = new FFT(); OutFile of = new OutFile(path + "mean_" + m + ".csv"); OutFile of2 = new OutFile(path + "sd_" + m + ".csv"); System.out.println(" Running length =" + m); of.writeLine("classifier,PS,PC,FFT"); of2.writeLine("classifier,PS,PC,FFT"); for (int i = 0; i < runs; i++) { //Generate data AND SET NOISE LEVEL c = ACFDomainClassification.setSingleClassifiers(names); if (i % 10 == 0) System.out.println(" m =" + m + " performing run =" + i); train = new Instances[nosTrans]; test = new Instances[nosTrans]; //Change to simulate sin waves. Instances rawTrain = SimulatePowerSpectrum.generateFFTDataSet(minParas, maxParas, length, nosCasesPerClass, true); rawTrain.randomize(rand); Instances rawTest = new Instances(rawTrain, 0); for (int k = 0; k < nosCases / 2; k++) { Instance r = rawTrain.remove(0); rawTest.add(r); } //Generate transforms train[0] = ps.process(rawTrain); train[1] = pc.process(rawTrain); train[2] = fft.process(rawTrain); test[0] = ps.process(rawTest); test[1] = pc.process(rawTest); test[2] = fft.process(rawTest); //Measure classification accuracy for (int j = 0; j < test.length; j++) { for (int k = 0; k < c.length; k++) { double a = ClassifierTools.singleTrainTestSplitAccuracy(c[k], train[j], test[j]); sum[j][k] += a; sumSq[j][k] += a * a; } } } DecimalFormat df = new DecimalFormat("###.###"); System.out.print("\n m=" + length); for (int j = 0; j < c.length; j++) { of.writeString(names.get(j) + ","); of2.writeString(names.get(j) + ","); for (int i = 0; i < test.length; i++) { sum[i][j] /= runs; sumSq[i][j] = sumSq[i][j] / runs - sum[i][j] * sum[i][j]; System.out.print("," + df.format(sum[i][j]) + " (" + df.format(sumSq[i][j]) + ")"); of.writeString(df.format(sum[i][j]) + ","); of2.writeString(df.format(sumSq[i][j]) + ","); } of.writeString("\n"); of2.writeString("\n"); } } catch (Exception e) { System.out.println(" Error =" + e); e.printStackTrace(); System.exit(0); } }
From source file:DiversifyQuery.DivTopK.java
public Instances transformData(Instances data) throws Exception { ArrayList<LegacyShapelet> shapelets = new ArrayList<>(); for (int i = 5; i <= 1; i--) { if (DResultSet.get(i).result.size() == i) { shapelets.addAll(DResultSet.get(i).result); }/*from ww w . j a va2s. c o m*/ } if (shapelets.size() < 1) { throw new Exception( "Number of shapelets initialised incorrectly - please select value of k greater than or equal to 1 (Usage: setNumberOfShapelets"); } if (data.classIndex() < 0) { throw new Exception("Require that the class be set for the ShapeletTransform"); } Instances output = determineOutputFormat(data, shapelets); // for each data, get distance to each shapelet and create new instance for (int i = 0; i < data.numInstances(); i++) { // for each data Instance toAdd = new Instance(shapelets.size() + 1); int shapeletNum = 0; for (LegacyShapelet s : shapelets) { double dist = subsequenceDistance(s.content, data.instance(i)); toAdd.setValue(shapeletNum++, dist); } toAdd.setValue(shapelets.size(), data.instance(i).classValue()); output.add(toAdd); } return output; }
From source file:DiversifyTopKShaepelet.DiversifyTopKShaepelet.java
@Override public Instances process(Instances data) throws Exception { if (this.numShapelets < 1) { throw new Exception( "Number of shapelets initialised incorrectly - please select value of k greater than or equal to 1 (Usage: setNumberOfShapelets"); }//w w w . ja v a 2 s .c o m int maxPossibleLength = data.instance(0).numAttributes() - 1; if (data.classIndex() < 0) { throw new Exception("Require that the class be set for the ShapeletTransform"); } if (this.minShapeletLength < 1 || this.maxShapeletLength < 1 || this.maxShapeletLength < this.minShapeletLength || this.maxShapeletLength > maxPossibleLength) { throw new Exception("Shapelet length parameters initialised incorrectly"); } //Sort data in round robin order dataSourceIDs = new int[data.numInstances()]; for (int i = 0; i < data.numInstances(); i++) { dataSourceIDs[i] = i; } // data = roundRobinData(data, dataSourceIDs); if (this.shapeletsTrained == false) { // shapelets discovery has not yet been caried out, so do so this.shapelets = findDiversityTopKShapelets(this.numShapelets, data, this.minShapeletLength, this.maxShapeletLength); // get k shapelets ATTENTION this.shapeletsTrained = true; if (!supressOutput) { System.out.println(shapelets.size() + " Shapelets have been generated"); } } Instances output = determineOutputFormat(data); // for each data, get distance to each shapelet and create new instance for (int i = 0; i < data.numInstances(); i++) { // for each data Instance toAdd = new Instance(this.shapelets.size() + 1); int shapeletNum = 0; for (LegacyShapelet s : this.shapelets) { double dist = subseqDistance(s.content, data.instance(i)); toAdd.setValue(shapeletNum++, dist); } toAdd.setValue(this.shapelets.size(), data.instance(i).classValue()); output.add(toAdd); } return output; }
From source file:dkpro.similarity.experiments.rte.util.Evaluator.java
License:Open Source License
public static void runClassifierCV(WekaClassifier wekaClassifier, Dataset dataset) throws Exception { // Set parameters int folds = 10; Classifier baseClassifier = ClassifierSimilarityMeasure.getClassifier(wekaClassifier); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + dataset.toString() + ".arff", "-o", MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff" }); Instances data = DataSource.read(MODELS_DIR + "/" + dataset.toString() + "-plusIDs.arff"); data.setClassIndex(data.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data data.randomize(random);/*www.j a v a2 s .co m*/ // Perform cross-validation Instances predictedData = null; Evaluation eval = new Evaluation(data); for (int n = 0; n < folds; n++) { Instances train = data.trainCV(folds, n, random); Instances test = data.testCV(folds, n); // Apply log filter // Filter logFilter = new LogFilter(); // logFilter.setInputFormat(train); // train = Filter.useFilter(train, logFilter); // logFilter.setInputFormat(test); // test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Evaluate eval.evaluateModel(filteredClassifier, test); // Add predictions AddClassification filter = new AddClassification(); filter.setClassifier(classifier); filter.setOutputClassification(true); filter.setOutputDistribution(false); filter.setOutputErrorFlag(true); filter.setInputFormat(train); Filter.useFilter(train, filter); // trains the classifier Instances pred = Filter.useFilter(test, filter); // performs predictions on test set if (predictedData == null) predictedData = new Instances(pred, 0); for (int j = 0; j < pred.numInstances(); j++) predictedData.add(pred.instance(j)); } System.out.println(eval.toSummaryString()); System.out.println(eval.toMatrixString()); // Prepare output scores String[] scores = new String[predictedData.numInstances()]; for (Instance predInst : predictedData) { int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1; int valueIdx = predictedData.numAttributes() - 2; String value = predInst.stringValue(predInst.attribute(valueIdx)); scores[id] = value; } // Output classifications StringBuilder sb = new StringBuilder(); for (String score : scores) sb.append(score.toString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".csv"), sb.toString()); // Output prediction arff DataSink.write(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".predicted.arff", predictedData); // Output meta information sb = new StringBuilder(); sb.append(baseClassifier.toString() + LF); sb.append(eval.toSummaryString() + LF); sb.append(eval.toMatrixString() + LF); FileUtils.writeStringToFile(new File(OUTPUT_DIR + "/" + dataset.toString() + "/" + wekaClassifier.toString() + "/" + dataset.toString() + ".meta.txt"), sb.toString()); }
From source file:dkpro.similarity.experiments.sts2013.util.Evaluator.java
License:Open Source License
public static void runLinearRegressionCV(Mode mode, Dataset... datasets) throws Exception { for (Dataset dataset : datasets) { // Set parameters int folds = 10; Classifier baseClassifier = new LinearRegression(); // Set up the random number generator long seed = new Date().getTime(); Random random = new Random(seed); // Add IDs to the instances AddID.main(new String[] { "-i", MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".arff", "-o", MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + "-plusIDs.arff" }); Instances data = DataSource.read( MODELS_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + "-plusIDs.arff"); data.setClassIndex(data.numAttributes() - 1); // Instantiate the Remove filter Remove removeIDFilter = new Remove(); removeIDFilter.setAttributeIndices("first"); // Randomize the data data.randomize(random);/*from www . ja v a 2 s .c o m*/ // Perform cross-validation Instances predictedData = null; Evaluation eval = new Evaluation(data); for (int n = 0; n < folds; n++) { Instances train = data.trainCV(folds, n, random); Instances test = data.testCV(folds, n); // Apply log filter Filter logFilter = new LogFilter(); logFilter.setInputFormat(train); train = Filter.useFilter(train, logFilter); logFilter.setInputFormat(test); test = Filter.useFilter(test, logFilter); // Copy the classifier Classifier classifier = AbstractClassifier.makeCopy(baseClassifier); // Instantiate the FilteredClassifier FilteredClassifier filteredClassifier = new FilteredClassifier(); filteredClassifier.setFilter(removeIDFilter); filteredClassifier.setClassifier(classifier); // Build the classifier filteredClassifier.buildClassifier(train); // Evaluate eval.evaluateModel(classifier, test); // Add predictions AddClassification filter = new AddClassification(); filter.setClassifier(classifier); filter.setOutputClassification(true); filter.setOutputDistribution(false); filter.setOutputErrorFlag(true); filter.setInputFormat(train); Filter.useFilter(train, filter); // trains the classifier Instances pred = Filter.useFilter(test, filter); // performs predictions on test set if (predictedData == null) { predictedData = new Instances(pred, 0); } for (int j = 0; j < pred.numInstances(); j++) { predictedData.add(pred.instance(j)); } } // Prepare output scores double[] scores = new double[predictedData.numInstances()]; for (Instance predInst : predictedData) { int id = new Double(predInst.value(predInst.attribute(0))).intValue() - 1; int valueIdx = predictedData.numAttributes() - 2; double value = predInst.value(predInst.attribute(valueIdx)); scores[id] = value; // Limit to interval [0;5] if (scores[id] > 5.0) { scores[id] = 5.0; } if (scores[id] < 0.0) { scores[id] = 0.0; } } // Output StringBuilder sb = new StringBuilder(); for (Double score : scores) { sb.append(score.toString() + LF); } FileUtils.writeStringToFile( new File(OUTPUT_DIR + "/" + mode.toString().toLowerCase() + "/" + dataset.toString() + ".csv"), sb.toString()); } }