List of usage examples for weka.core Instances classIndex
publicint classIndex()
From source file:preprocess.TextDirectoryLoader.java
License:Open Source License
/** * Return the full data set. If the structure hasn't yet been determined * by a call to getStructure then method should do so before processing * the rest of the data set.//from w w w . j a v a 2 s. com * * @return the structure of the data set as an empty set of Instances * @throws IOException if there is no source or parsing fails */ public Instances getDataSet() throws IOException { if (getDirectory() == null) throw new IOException("No directory/source has been specified"); String directoryPath = getDirectory().getAbsolutePath(); FastVector classes = new FastVector(); Enumeration enm = getStructure().classAttribute().enumerateValues(); while (enm.hasMoreElements()) classes.addElement(enm.nextElement()); Instances data = getStructure(); int fileCount = 0; for (int k = 0; k < classes.size(); k++) { String subdirPath = (String) classes.elementAt(k); File subdir = new File(directoryPath + File.separator + subdirPath); String[] files = subdir.list(); for (int j = 0; j < files.length; j++) { try { fileCount++; if (getDebug()) System.err.println("processing " + fileCount + " : " + subdirPath + " : " + files[j]); double[] newInst = null; if (m_OutputFilename) newInst = new double[3]; else newInst = new double[2]; File txt = new File(directoryPath + File.separator + subdirPath + File.separator + files[j]); BufferedInputStream is; is = new BufferedInputStream(new FileInputStream(txt)); StringBuffer txtStr = new StringBuffer(); FileReader fr = new FileReader(txt); BufferedReader br = new BufferedReader(fr); String line; while ((line = br.readLine()) != null) { txtStr.append(line + "\n"); } newInst[0] = (double) data.attribute(0).addStringValue(txtStr.toString()); if (m_OutputFilename) newInst[1] = (double) data.attribute(1) .addStringValue(subdirPath + File.separator + files[j]); newInst[data.classIndex()] = (double) k; data.add(new Instance(1.0, newInst)); is.close(); } catch (Exception e) { System.err.println("failed to convert file: " + directoryPath + File.separator + subdirPath + File.separator + files[j]); } } } return data; }
From source file:preprocess.TextDirectoryLoaderEX.java
License:Open Source License
/** * Return the full data set. If the structure hasn't yet been determined * by a call to getStructure then method should do so before processing * the rest of the data set.//from w w w . ja v a 2 s . co m * * @return the structure of the data set as an empty set of Instances * @throws IOException if there is no source or parsing fails */ public Instances getDataSet() throws IOException { if (getDirectory() == null) throw new IOException("No directory/source has been specified"); String directoryPath = getDirectory().getAbsolutePath(); FastVector classes = new FastVector(); Enumeration enm = getStructure().classAttribute().enumerateValues(); while (enm.hasMoreElements()) classes.addElement(enm.nextElement()); Instances data = getStructure(); int fileCount = 0; for (int k = 0; k < classes.size(); k++) { String subdirPath = (String) classes.elementAt(k); File subdir = new File(directoryPath + File.separator + subdirPath); String[] files = subdir.list(); for (int j = 0; j < files.length; j++) { try { fileCount++; if (getDebug()) System.err.println("processing " + fileCount + " : " + subdirPath + " : " + files[j]); double[] newInst = null; if (m_OutputFilename) newInst = new double[3]; else newInst = new double[2]; File txt = new File(directoryPath + File.separator + subdirPath + File.separator + files[j]); BufferedInputStream is; is = new BufferedInputStream(new FileInputStream(txt)); StringBuffer txtStr = new StringBuffer(); /* int c; while ((c = is.read()) != -1) { txtStr.append((char) c); }*/ FileReader fr = new FileReader(txt); BufferedReader br = new BufferedReader(fr); String line; while ((line = br.readLine()) != null) { txtStr.append(line + "\n"); } br.close(); fr.close(); newInst[1] = (double) data.attribute(1).addStringValue(txtStr.toString()); if (m_OutputFilename) newInst[0] = Integer.valueOf(files[j].toString()); newInst[data.classIndex()] = (double) k; data.add(new Instance(1.0, newInst)); is.close(); } catch (Exception e) { System.err.println("failed to convert file: " + directoryPath + File.separator + subdirPath + File.separator + files[j]); } } } return data; }
From source file:probcog.J48Reader.java
License:Open Source License
public static void main(String[] args) { try {//from w ww . ja v a 2 s .com String path = "./"; //String dbdir = path + "zoli4uli4"; String dbdir = path + args[1]; J48 j48 = readJ48(dbdir); Instances instances = readDB(args[0]); //System.out.println(j48.toString()); for (int i = 0; i < instances.numInstances(); i++) { Instance inst = instances.instance(i); //System.out.println(instances.toString()); /* double d = j48.classifyInstance(inst); System.out.println(inst.attribute(inst.classIndex()).toString()); inst.setValue(inst.attribute(inst.classIndex()),d); System.out.println("Object is classified as "+ inst.toString(inst.attribute(inst.classIndex()))); */ //System.out.println(inst); double dist[] = j48.distributionForInstance(inst); int j = 0; for (double d : dist) { Attribute att = instances.attribute(instances.classIndex()); String classification = att.value(j); System.out.println(d + " " + classification); j++; } } } catch (Exception e) { e.printStackTrace(); } }
From source file:reactivetechnologies.sentigrade.dto.VectorRequestData.java
License:Apache License
public void setTextInstances(Instances texts, String domain) { int c = texts.classIndex(); int t = c == 0 ? 1 : 0; for (Instance text : Collections.list(texts.enumerateInstances())) { getDataSet().add(new Tuple(text.stringValue(t), text.stringValue(c))); }//from ww w .j ava 2 s . co m getClasses().addAll(classAttrNominals(texts)); setDomain(domain); }
From source file:recognition.EasySketchRecognizer.java
License:BSD License
/** * Make classifier//from w ww . j ava2s.c o m * @throws Exception */ public RandomForest trainClassifier() throws Exception { DataSource source = null; Instances data = null; //BufferedReader reader = null; try { //reader = new BufferedReader( // new FileReader("data/ToddlerAndMature.arff")); source = new DataSource("data/ToddlerAndMature.arff"); data = source.getDataSet(); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } /*Instances trainSet = null; try { trainSet = new Instances(reader); } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); } reader.close();*/ // setting class attribute //trainSet.setClassIndex(trainSet.numAttributes() - 1); if (data.classIndex() == -1) { data.setClassIndex(data.numAttributes() - 1); } //tree.buildClassifier(data); // build classifier forest.buildClassifier(data); /*Evaluation eval = new Evaluation(data); eval.evaluateModel(forest, data); System.out.println(eval.toSummaryString("result", false)); eval = new Evaluation(data); eval.evaluateModel(tree, data); System.out.println(eval.toSummaryString("result", false));*/ //return tree; return forest; }
From source file:recsys.BuildModel.java
public static void main(String args[]) throws Exception { //Opening the training file int own_training = StaticVariables.own_training; DataSource sourceTrain;//ww w .jav a2 s . c om if (own_training == 1) sourceTrain = new DataSource("D://own_training//item//feature data//train_feature.arff"); else sourceTrain = new DataSource("E://recsys//item//feature data//train_feature.arff"); Instances train = sourceTrain.getDataSet(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "1,2,4"; // first attribute //options[2] = "2"; // first attribute //options[3] = "4"; //options[2] = "9"; // first attribute //options[3] = "3"; // first attribute //options[4] = "4"; // first attribute Remove remove = new Remove(); // new instance of filter remove.setOptions(options); // set options remove.setInputFormat(train); // inform filter about dataset **AFTER** setting options Instances newData = Filter.useFilter(train, remove); // apply filter System.out.println("number of attributes " + newData.numAttributes()); System.out.println(newData.firstInstance()); if (newData.classIndex() == -1) { newData.setClassIndex(newData.numAttributes() - 1); } Resample sampler = new Resample(); String Fliteroptions = "-B 1.0"; sampler.setOptions(weka.core.Utils.splitOptions(Fliteroptions)); sampler.setRandomSeed((int) System.currentTimeMillis()); sampler.setInputFormat(newData); newData = Resample.useFilter(newData, sampler); //Normalize normalize = new Normalize(); //normalize.toSource(Fliteroptions, newData); //Remove remove = new Remove(); // new instance of filter //remove.setOptions(options); // set options //remove.setInputFormat(train); // inform filter about dataset **AFTER** setting options //Instances newData = Filter.useFilter(train, remove); // apply filter //rm.setAttributeIndices("2"); //rm.setAttributeIndices("3"); //rm.setAttributeIndices("4"); //rm.setAttributeIndices("5"); //rm.setAttributeIndices("6"); //rm.setAttributeIndices("6"); //rm.setAttributeIndices("5"); //Remove rm = new Remove(); //rm.setInputFormat(train); //rm.setAttributeIndices("1"); //FilteredClassifier fc = new FilteredClassifier(); //cls.setOptions(args); //J48 cls = new J48(); //LibSVM cls = new LibSVM(); //SMO cls = new SMO(); //Logistic cls = new Logistic(); //BayesianLogisticRegression cls = new BayesianLogisticRegression(); //cls.setThreshold(0.52); //AdaBoostM1 cls = new AdaBoostM1(); //NaiveBayes cls = new NaiveBayes(); //weka.classifiers.meta.Bagging cls = new Bagging(); //weka.classifiers.functions.IsotonicRegression cls = new IsotonicRegression(); //j48.setUnpruned(true); // using an unpruned J48 // meta-classifier //BayesNet cls = new BayesNet(); RandomForest cls = new RandomForest(); //cls.setNumTrees(100); //cls.setMaxDepth(3); //cls.setNumFeatures(3); //fc.setClassifier(cls); //fc.setFilter(rm); // train and make predictions //System.out.println(fc.globalInfo()); //System.out.println(fc.getFilter()); //fc.buildClassifier(train); cls.buildClassifier(newData); //Evaluation eval = new Evaluation(newData); //Random rand = new Random(1); // using seed = 1 //int folds = 2; //eval.crossValidateModel(cls, newData, folds, rand); //System.out.println(eval.toSummaryString()); //System.out.println("precision on buy " + eval.precision(newData.classAttribute().indexOfValue("buy"))); //System.out.println("recall on buy " + eval.recall(newData.classAttribute().indexOfValue("buy"))); //System.out.println(eval.confusionMatrix().toString()); //System.out.println("Precision " + eval.precision(newData.classIndex()-1)); //System.out.println("Recall " + eval.recall(newData.classIndex()-1)); //Classfier cls = new weka.classifiers.bayes.NaiveBayes(); //FilteredClassifier fc = new FilteredClassifier(); //fc.setFilter(rm); //fc.setClassifier(cls); //train and make predictions //fc.buildClassifier(train); // serialize model ObjectOutputStream oos; if (own_training == 1) oos = new ObjectOutputStream(new FileOutputStream("D://own_training//item//model//train.model")); else oos = new ObjectOutputStream(new FileOutputStream("E://recsys//item//model//train.model")); oos.writeObject(cls); oos.flush(); oos.close(); }
From source file:recsys.EvaluationMachineLearning.java
public static void main(String args[]) throws Exception { int own_training = 0; //opening the testing file DataSource sourceTest;/*from w w w . java2 s. co m*/ if (own_training == 1) { sourceTest = new DataSource("D://own_training//item//feature data//test_feature.arff"); } else { sourceTest = new DataSource("E://test_featureFile.arff"); } //DataSource sourceTest = new DataSource("D://own_training//test_featureFile.arff"); //System.out.println("working"); Instances test = sourceTest.getDataSet(); PrintFile solutionFile; if (own_training == 1) { solutionFile = new PrintFile(null, new File("D://own_training//item//solution//solution.dat")); } else { solutionFile = new PrintFile(null, new File("E://solution.dat")); } //PrintFile solutionFile = new PrintFile(null, new File("D://own_training//solution.dat")); if (test.classIndex() == -1) { test.setClassIndex(test.numAttributes() - 1); } //System.out.println("hello"); ObjectInputStream ois; if (own_training == 1) { ois = new ObjectInputStream(new FileInputStream("D://own_training//item//model//train.model")); } else { ois = new ObjectInputStream(new FileInputStream("E://naive_bayes.model")); } //System.out.println("hello"); Remove rm = new Remove(); rm.setAttributeIndices("1"); rm.setAttributeIndices("2"); //rm.setAttributeIndices("6"); //rm.setAttributeIndices("5"); //NaiveBayes cls = (NaiveBayes) ois.readObject(); FilteredClassifier fc = (FilteredClassifier) ois.readObject(); //fc.setFilter(rm); //fc.setClassifier(cls); ois.close(); int totalSessionCount = 0; int buySessionCount = 0; Integer tempSessionId = (int) test.instance(0).value(0); int sessionItemCount = (int) test.instance(0).value(4); ArrayList<Integer> buy = new ArrayList<>(); String result = String.valueOf(tempSessionId) + ";"; int count = 0; for (int i = 0; i < test.numInstances(); i++) { //System.out.println(i); //System.out.print("ID: " + test.instance(i).value(0)); //if a new session occurs //sessionItemCount++; if ((int) test.instance(i).value(0) != tempSessionId) { totalSessionCount++; if (buy.size() > 0) { if (sessionItemCount != 1) { if (sessionItemCount >= 2 && sessionItemCount <= 3) { if (buy.size() == 1) { for (int j = 0; j < buy.size(); j++) { result += buy.get(j) + ","; } solutionFile.writeFile(result.substring(0, result.length() - 1)); buySessionCount++; } } else if (sessionItemCount >= 4) { if (buy.size() >= 2) { for (int j = 0; j < buy.size(); j++) { result += buy.get(j) + ","; } solutionFile.writeFile(result.substring(0, result.length() - 1)); buySessionCount++; } } } } tempSessionId = (int) test.instance(i).value(0); sessionItemCount = (int) test.instance(i).value(4); //System.out.println(tempSessionId + "," + sessionItemCount); result = String.valueOf(tempSessionId) + ";"; buy.clear(); } double pred = fc.classifyInstance(test.instance(i)); if (test.classAttribute().value((int) pred).equals("buy")) { Integer item = (int) test.instance(i).value(1); buy.add(item); } //System.out.print(", actual: " + test.classAttribute().value((int) test.instance(i).classValue())); //System.out.println(", predicted: " + test.classAttribute().value((int) pred)); } System.out.println(buySessionCount); System.out.println(totalSessionCount); if (buy.size() > 0) { solutionFile.writeFile(result.substring(0, result.length() - 1)); } solutionFile.closeFile(); }
From source file:recsys.ResultProcessing.java
public static void main(String args[]) throws Exception { int own_training = StaticVariables.own_training; //opening the testing file DataSource sourceTest;/*from w w w. j a v a 2s . c om*/ if (own_training == 1) { sourceTest = new DataSource("D://own_training//item//feature data//test_feature.arff"); } else { sourceTest = new DataSource("E://recsys//item//feature data//test_feature.arff"); } //DataSource sourceTest = new DataSource("D://own_training//test_featureFile.arff"); //System.out.println("working"); Instances test = sourceTest.getDataSet(); PrintFile solutionFile; if (own_training == 1) { solutionFile = new PrintFile(null, new File("D://own_training//item//solution//solution.dat")); } else { solutionFile = new PrintFile(null, new File("E://solution.dat")); } //PrintFile solutionFile = new PrintFile(null, new File("D://own_training//solution.dat")); if (test.classIndex() == -1) { test.setClassIndex(test.numAttributes() - 1); } //System.out.println("hello"); ObjectInputStream ois; if (own_training == 1) { ois = new ObjectInputStream(new FileInputStream("D://own_training//item//model//train.model")); } else { ois = new ObjectInputStream(new FileInputStream("E:\\recsys\\item\\model\\train.model")); //sois = new ObjectInputStream(new FileInputStream("E:\\recsys\\my best performances\\39127.6\\train.model")); } //AdaBoostM1 cls = (AdaBoostM1)ois.readObject(); //BayesNet cls = (BayesNet)ois .readObject(); RandomForest cls = (RandomForest) ois.readObject(); //Logistic cls = (Logistic) ois.readObject(); //System.out.println(cls.globalInfo()); //System.out.println(cls.getNumFeatures()); //System.out.println(cls.toString()); //BayesianLogisticRegression cls = (BayesianLogisticRegression)ois.readObject(); //NaiveBayes cls = (NaiveBayes) ois.readObject(); //FilteredClassifier fc = (FilteredClassifier) ois.readObject(); System.out.println(cls.toString()); ois.close(); String[] options = new String[2]; options[0] = "-R"; // "range" options[1] = "1,2,4"; // first attribute //options[2] = "2"; //options[3] = "4"; Remove remove = new Remove(); // new instance of filter remove.setOptions(options); // set options remove.setInputFormat(test); // inform filter about dataset **AFTER** setting options Instances newData = Filter.useFilter(test, remove); // apply filter System.out.println(newData.firstInstance()); int totalSessionCount = 0; int buySessionCount = 0; int b = 0; Scanner sc; if (own_training == 0) sc = new Scanner(new File("E:\\recsys\\session\\solution\\solution.dat")); //sc = new Scanner(new File("E:\\recsys\\my best performances\\best performance\\solution_session.dat")); else sc = new Scanner(new File("D:\\own_training\\session\\solution\\solution.dat")); //sc = new Scanner(new File("D:\\own_training\\session\\data\\original_solution.csv")); HashMap<Integer, Integer> a = new HashMap<Integer, Integer>(); while (sc.hasNext()) { String temp = sc.next(); StringTokenizer st = new StringTokenizer(temp, ",;"); a.put(Integer.parseInt(st.nextToken()), 1); } System.out.println("size " + a.size()); Integer tempSessionId = (int) test.instance(0).value(0); ArrayList<Integer> buy = new ArrayList<>(); String result = String.valueOf(tempSessionId) + ";"; //int lengthVector[] = new int[300]; int testSessionCount = 0, currentSessionLength = 0; //int sessionLengthCount=0; for (int i = 0; i < test.numInstances(); i++) { if ((int) test.instance(i).value(0) != tempSessionId) { if (a.containsKey(tempSessionId)) { //if(test.instance(i-1).value(3)< StaticVariables.length) { //System.out.println(test.instance(i-1).value(3)); totalSessionCount++; if (buy.size() > 0) { for (int j = 0; j < buy.size(); j++) { result += buy.get(j) + ","; } solutionFile.writeFile(result.substring(0, result.length() - 1)); } //lengthVector[sessionLengthCount]++; } /*}else{ if(buy.size()>= 3){ for (int j = 0; j < buy.size(); j++) { result += buy.get(j) + ","; } solutionFile.writeFile(result.substring(0, result.length() - 1)); } }*/ //testSessionCount=0; tempSessionId = (int) test.instance(i).value(0); result = String.valueOf(tempSessionId) + ";"; //sessionLengthCount=0; buy.clear(); } //currentSessionLength = test.instance(i).value(3); //testSessionCount++; //System.out.println("working"); //sessionLengthCount++; double pred = cls.classifyInstance(newData.instance(i)); if (test.classAttribute().value((int) pred).equals("buy")) { b++; Integer item = (int) test.instance(i).value(1); buy.add(item); } //System.out.print(", actual: " + test.classAttribute().value((int) test.instance(i).classValue())); //System.out.println(", predicted: " + test.classAttribute().value((int) pred)); } System.out.println(totalSessionCount); //System.out.println(totalSessionCount); //System.out.println(b); if (buy.size() > 0) { solutionFile.writeFile(result.substring(0, result.length() - 1)); } /*for(int p:lengthVector) System.out.println(p);*/ solutionFile.closeFile(); }
From source file:sad_nb.SAD_NB.java
public static void main(String[] args) { /*GestorFichero.getGestorFichero().escribirFichero(GestorFichero.getGestorFichero().leerFichero("src/fichero/tweetSentiment.dev.csv"), "dev.arff"); Instances dataDev = GestorFichero.getGestorFichero().cargarInstancias("src/fichero/dev.arff"); //from w ww. ja v a 2 s . c om GestorFichero.getGestorFichero().escribirFichero(GestorFichero.getGestorFichero().leerFichero("src/fichero/tweetSentiment.test_blind.csv"), "test.arff"); Instances dataTest = GestorFichero.getGestorFichero().cargarInstancias("src/fichero/test.arff"); */ GestorFichero.getGestorFichero().escribirFichero( GestorFichero.getGestorFichero().leerFichero("src/fichero/tweetSentiment.train.csv"), "train.arff"); Instances dataTrain = GestorFichero.getGestorFichero().cargarInstancias("src/fichero/train.arff"); Baseline b = new Baseline(); //Resultados.imprimirResultados(b.aplicarNaiveBayes(dataTrain)); //b.aplicarNaiveBayes(dataTrain); Instances temp = new Instances(dataTrain); temp = Preproceso.filterStringToNominal(dataTrain, "4,5"); List<String> l = new LinkedList<>(); l.add(" TweetId"); l.add(" TweetDate"); //dataTrain=Preproceso.filterRemoveAttribute(dataTrain, l); GestorFichero.asignarClase(temp); System.out.println(temp.attribute(temp.classIndex()).name()); Resultados.imprimirResultados(b.aplicarNaiveBayes(temp)); }
From source file:SEE.NSGAII.java
License:Open Source License
public int[][] search(WrapperSubsetEval ASEval, Instances data, String[] objs) throws Exception { m_stateName = objs;//from ww w. ja va 2 s. c om m_objects = objs.length; m_best = null; m_generationReports = new StringBuffer(); m_hasClass = true; m_classIndex = data.classIndex(); WrapperSubsetEval ASEvaluator = ASEval; m_numAttribs = data.numAttributes(); m_startRange.setUpper(m_numAttribs - 1); if (!(getStartSet().equals(""))) { m_starting = m_startRange.getSelection(); } // initial random population m_lookupTable = new Hashtable<BitSet, GABitSet>(m_lookupTableSize); m_random = new Random(m_seed); m_population = new GABitSet[m_popSize * 2]; // set up random initial population initPopulation(); evaluatePopulation(ASEvaluator); populationStatistics(); scalePopulation(); nonDominatedSort(); // checkBest(); // m_generationReports.append(populationReport(0)); // boolean converged; for (int i = 1; i <= m_maxGenerations; i++) { generation(); evaluatePopulation(ASEvaluator); populationStatistics(); scalePopulation(); nonDominatedSort(); /* * populationStatistics(); scalePopulation(); // find the best pop * member and check for convergence converged = checkBest(); * * if ((i == m_maxGenerations) || ((i % m_reportFrequency) == 0) || * (converged == true)) { * m_generationReports.append(populationReport(i)); if (converged == * true) { break; } } */ /* * System.out.println(""); * System.out.println("This is the "+i+"th generation"); * System.out.print("m_population[ 0]: rank=" + m_population[0].rank * +", chromosome=" + m_population[0].getChromosome()+" "); for * (int jj = 0; jj != m_objects; ++jj) * System.out.print("objective"+jj+"={"+m_population[0].getFitness() * .d[jj] + " " + m_population[0].getObjective().d[jj]+"} "); for * (int j = 0; j != m_popSize; ++j) { * System.out.print("m_population["); if(j<10) * System.out.print(" "); System.out.print(j+"]: rank=" + * m_population[j].rank +", chromosome=" + * m_population[j].getChromosome()+" "); for (int jj = 0; jj != * m_objects; ++jj) * System.out.print("objective"+jj+"={"+m_population[j].getFitness() * .d[jj] + " " + m_population[j].getObjective().d[jj]+"} "); * System.out.println(); } */ } int[][] ans; Set<int[]> ansSet = new TreeSet<int[]>(new IntComparator()); for (GABitSet e : m_population) { if (e.rank != 0) break; ansSet.add(attributeList(e.getChromosome())); } // System.out.println("\nNSGAII finished."); // System.out.println("num of population that layer=0 is "+num); int num = ansSet.size(); // System.out.println(num); ans = new int[num][]; for (int[] e : ansSet) { ans[--num] = e; } /* * for (int i = 0; i != num; ++i){ for(int j=0;j!=m_objects;++j) * System.out.print(m_population[i].getObjective().d[j]+" "); * System.out.println(); } */ return ans; }