List of usage examples for weka.core Instances setClass
public void setClass(Attribute att)
From source file:de.ugoe.cs.cpdp.loader.DecentDataLoader.java
License:Apache License
/** * Loads the given decent file and tranform it from decent->arffx->arff * //from w w w . j a va 2s . c om * @return Instances in WEKA format */ @Override public Instances load(File file) { // Set attributeFilter setAttributeFilter(); // Register MetaModels try { registerMetaModels(); } catch (Exception e1) { Console.printerrln("Metamodels cannot be registered!"); e1.printStackTrace(); } // Set location of decent and arffx Model String decentModelLocation = file.getAbsolutePath(); String pathToDecentModelFolder = decentModelLocation.substring(0, decentModelLocation.lastIndexOf(File.separator)); String arffxModelLocation = pathToDecentModelFolder + "/model.arffx"; String logModelLocation = pathToDecentModelFolder + "/model.log"; String arffLocation = pathToDecentModelFolder + "/model.arff"; // If arff File exists, load from it! if (new File(arffLocation).exists()) { System.out.println("Loading arff File..."); BufferedReader reader; Instances data = null; try { reader = new BufferedReader(new FileReader(arffLocation)); data = new Instances(reader); reader.close(); } catch (FileNotFoundException e) { Console.printerrln("File with path: " + arffLocation + " was not found."); throw new RuntimeException(e); } catch (IOException e) { Console.printerrln("File with path: " + arffLocation + " cannot be read."); throw new RuntimeException(e); } // Set class attribute if not set if (data.classIndex() == -1) { Attribute classAttribute = data.attribute(classAttributeName); data.setClass(classAttribute); } return data; } // Location of EOL Scripts String preprocess = "./decent/epsilon/query/preprocess.eol"; String arffxToArffSource = "./decent/epsilon/query/addLabels.eol"; // Set Log Properties System.setProperty("epsilon.logLevel", logLevel); System.setProperty("epsilon.logToFile", logToFile); System.setProperty("epsilon.logFileAvailable", "false"); // Set decent2arffx Properties System.setProperty("epsilon.transformation.decent2arffx.skipSource", "false"); System.setProperty("epsilon.transformation.decent2arffx.type", "code"); // Preprocess Data, transform from decent2arffx try { IEolExecutableModule preProcessModule = loadModule(preprocess); IModel preProcessDecentModel = modelHandler.getDECENTModel(decentModelLocation, true, true); IModel preProcessArffxarffxModel = modelHandler.getARFFxModel(arffxModelLocation, false, true); preProcessModule.getContext().getModelRepository().addModel(preProcessDecentModel); preProcessModule.getContext().getModelRepository().addModel(preProcessArffxarffxModel); execute(preProcessModule, logModelLocation); preProcessDecentModel.dispose(); preProcessArffxarffxModel.dispose(); preProcessModule.reset(); } catch (URISyntaxException e) { Console.printerrln("URI Syntax for decent or arffx model is wrong."); e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } // Transform to arff, for label and confidence attributes try { IEolExecutableModule arffxToArffModule = loadModule(arffxToArffSource); IModel arffxToArffArffxModel = modelHandler.getARFFxModel(arffxModelLocation, true, true); arffxToArffModule.getContext().getModelRepository().addModel(arffxToArffArffxModel); execute(arffxToArffModule, logModelLocation); arffxToArffArffxModel.dispose(); // can be stored and retained alternatively arffxToArffModule.reset(); } catch (URISyntaxException e) { Console.printerrln("URI Syntax for arffx model is wrong."); e.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } // Unregister MetaModels, otherwise cast will fail HashMap<String, Object> metaModelCache = new HashMap<>(); for (String key : EPackage.Registry.INSTANCE.keySet()) { metaModelCache.put(key, EPackage.Registry.INSTANCE.get(key)); } ; for (String key : metaModelCache.keySet()) { EPackage.Registry.INSTANCE.remove(key); } ; // Workaround to gernerate a usable URI. Absolute path is not // possible, therefore we need to construct a relative path URL location = DecentDataLoader.class.getProtectionDomain().getCodeSource().getLocation(); String basePath = location.getFile(); // Location is the bin folder, so we need to delete the last 4 characters basePath = basePath.substring(0, basePath.length() - 4); String relativePath = new File(basePath).toURI().relativize(new File(arffxModelLocation).toURI()).getPath(); // Loard arffx file and create WEKA Instances ARFFxResourceTool tool = new ARFFxResourceTool(); Resource resource = tool.loadResourceFromXMI(relativePath, "arffx"); Instances dataSet = null; for (EObject o : resource.getContents()) { Model m = (Model) o; dataSet = createWekaDataFormat(m); for (Instance i : m.getData()) { createWekaInstance(dataSet, i); } } // Set class attribute Attribute classAttribute = dataSet.attribute(classAttributeName); dataSet.setClass(classAttribute); // Save as ARFF save(dataSet, arffLocation); return dataSet; }
From source file:de.upb.timok.utils.DatasetTransformationUtils.java
License:Open Source License
public static Instances trainingSetToInstances(List<double[]> trainingSet) { final double[] sample = trainingSet.get(0); final ArrayList<Attribute> fvWekaAttributes = new ArrayList<>(sample.length + 1); for (int i = 0; i < sample.length; i++) { fvWekaAttributes.add(new Attribute(Integer.toString(i))); }/*w w w . j av a2s . co m*/ final ArrayList<String> classStrings = new ArrayList<>(); classStrings.add("normal"); final Attribute ClassAttribute = new Attribute("class", classStrings); // Declare the feature vector fvWekaAttributes.add(ClassAttribute); final Instances result = new Instances("trainingSet", fvWekaAttributes, trainingSet.size()); result.setClass(ClassAttribute); result.setClassIndex(fvWekaAttributes.size() - 1); for (final double[] instance : trainingSet) { final double[] newInstance = Arrays.copyOf(instance, instance.length + 1); newInstance[newInstance.length - 1] = 0; final Instance wekaInstance = new DenseInstance(1, newInstance); wekaInstance.setDataset(result); result.add(wekaInstance); } return result; }
From source file:elh.eus.absa.CLI.java
License:Open Source License
/** * Main access to the polarity detection training functionalities. * * @throws IOException/*from w ww . ja va2s. c o m*/ * input output exception if problems with corpora */ public final void trainATP(final InputStream inputStream) throws IOException { // load training parameters file String paramFile = parsedArguments.getString("params"); String corpusFormat = parsedArguments.getString("corpusFormat"); String validation = parsedArguments.getString("validation"); String lang = parsedArguments.getString("language"); String classes = parsedArguments.getString("classnum"); int foldNum = Integer.parseInt(parsedArguments.getString("foldNum")); //boolean printPreds = parsedArguments.getBoolean("printPreds"); CorpusReader reader = new CorpusReader(inputStream, corpusFormat, lang); System.err.println("trainATP : Corpus read, creating features"); Features atpTrain = new Features(reader, paramFile, classes); Instances traindata; if (corpusFormat.startsWith("tab") && !corpusFormat.equalsIgnoreCase("tabNotagged")) { traindata = atpTrain.loadInstancesTAB(true, "atp"); } else if (corpusFormat.equalsIgnoreCase("tabNotagged") && lang.equalsIgnoreCase("eu")) { traindata = atpTrain.loadInstancesConll(true, "atp"); } else { traindata = atpTrain.loadInstances(true, "atp"); } //setting class attribute (entCat|attCat|entAttCat|polarityCat) traindata.setClass(traindata.attribute("polarityCat")); WekaWrapper classify; try { Properties params = new Properties(); params.load(new FileInputStream(paramFile)); String modelPath = params.getProperty("fVectorDir"); classify = new WekaWrapper(traindata, true); classify.saveModel(modelPath + File.separator + "elixa-atp_" + lang + ".model"); switch (validation) { case "cross": classify.crossValidate(foldNum); break; case "trainTest": classify.trainTest(); break; case "both": classify.crossValidate(foldNum); classify.trainTest(); break; default: System.out.println("train-atp: wrong validation option. Model saved but not tested"); } } catch (Exception e) { e.printStackTrace(); } }
From source file:elh.eus.absa.CLI.java
License:Open Source License
/** * Main access to the polarity tagging functionalities. Target based polarity. * * @throws IOException//from w w w .j a v a2 s . com * input output exception if problems with corpora */ public final void evalATP(final InputStream inputStream) throws IOException, JDOMException { String paramFile = parsedArguments.getString("params"); String corpusFormat = parsedArguments.getString("corpusFormat"); String model = parsedArguments.getString("model"); String lang = parsedArguments.getString("language"); String classnum = parsedArguments.getString("classnum"); boolean ruleBased = parsedArguments.getBoolean("ruleBasedClassifier"); boolean printPreds = parsedArguments.getBoolean("outputPredictions"); //Read corpus sentences CorpusReader reader = new CorpusReader(inputStream, corpusFormat, lang); //Rule-based Classifier. if (ruleBased) { Properties params = new Properties(); params.load(new FileInputStream(new File(paramFile))); String posModelPath = params.getProperty("pos-model"); String lemmaModelPath = params.getProperty("lemma-model"); String kafDir = params.getProperty("kafDir"); /* polarity lexicon. Domain specific polarity lexicon is given priority. * If no domain lexicon is found it reverts to general polarity lexicon. * If no general polarity lexicon is found program exits with error message. */ String lex = params.getProperty("polarLexiconDomain", "none"); if (lex.equalsIgnoreCase("none")) { lex = params.getProperty("polarLexiconGeneral", "none"); if (lex.equalsIgnoreCase("none")) { System.err.println("Elixa Error :: Rule-based classifier is selected but no polarity" + " lexicon has been specified. Either specify one or choose ML classifier"); System.exit(1); } } File lexFile = new File(lex); Evaluator evalDoc = new Evaluator(lexFile, "lemma"); for (String oId : reader.getOpinions().keySet()) { // sentence posTagging String taggedKaf = reader.tagSentenceTab(reader.getOpinion(oId).getsId(), kafDir, posModelPath, lemmaModelPath); //process the postagged sentence with the word count based polarity tagger Map<String, String> results = evalDoc.polarityScoreTab(taggedKaf, lexFile.getName()); String lblStr = results.get("polarity"); String actual = "?"; if (reader.getOpinion(oId).getPolarity() != null) { actual = reader.getOpinion(oId).getPolarity(); } String rId = reader.getOpinion(oId).getsId().replaceFirst("_g$", ""); System.out.println(rId + "\t" + actual + "\t" + lblStr + "\t" + reader.getOpinionSentence(oId)); reader.getOpinion(oId).setPolarity(lblStr); } } //ML Classifier (default) else { Features atpTest = new Features(reader, paramFile, classnum, model); Instances testdata; if (corpusFormat.startsWith("tab") && !corpusFormat.equalsIgnoreCase("tabNotagged")) { testdata = atpTest.loadInstancesTAB(true, "atp"); } else { testdata = atpTest.loadInstances(true, "atp"); } // setting class attribute (entCat|attCat|entAttCat|polarityCat) testdata.setClass(testdata.attribute("polarityCat")); WekaWrapper classify; try { classify = new WekaWrapper(model); System.err.println("evalAtp : going to test the model"); //sort according to the instanceId //traindata.sort(atpTrain.getAttIndexes().get("instanceId")); //Instances testdata = new Instances(traindata); //testdata.deleteAttributeAt(0); //classify.setTestdata(testdata); classify.setTestdata(testdata); classify.testModel(model); if (printPreds) { for (String oId : reader.getOpinions().keySet()) { int iId = atpTest.getOpinInst().get(oId); Instance i = testdata.get(iId - 1); double label = classify.getMLclass().classifyInstance(i); String lblStr = i.classAttribute().value((int) label); String actual = "?"; if (reader.getOpinion(oId).getPolarity() != null) { actual = reader.getOpinion(oId).getPolarity(); } String rId = reader.getOpinion(oId).getsId().replaceFirst("_g$", ""); String oSent = reader.getOpinionSentence(oId); if (corpusFormat.startsWith("tab")) { StringBuilder sb = new StringBuilder(); for (String kk : oSent.split("\n")) { sb.append(kk.split("\\t")[0]); sb.append(" "); } oSent = sb.toString(); } System.out.println(rId + "\t" + actual + "\t" + lblStr + "\t" + oSent + "\t" + reader.getOpinionSentence(oId).replaceAll("\n", " ").replaceAll("\\t", ":::")); reader.getOpinion(oId).setPolarity(lblStr); } } //reader.print2Semeval2015format(model+"tagATP.xml"); //reader.print2conll(model+"tagAtp.conll"); } catch (Exception e) { e.printStackTrace(); } } }
From source file:elh.eus.absa.CLI.java
License:Open Source License
/** * Main access to the polarity tagging functionalities. Target based polarity. * * @throws IOException/*w ww . j a va 2s. co m*/ * input output exception if problems with corpora * @throws JDOMException */ public final void tagATP(final InputStream inputStream) throws IOException, JDOMException { // load training parameters file String paramFile = parsedArguments.getString("params"); String corpusFormat = parsedArguments.getString("corpusFormat"); String model = parsedArguments.getString("model"); String lang = parsedArguments.getString("language"); String classnum = parsedArguments.getString("classnum"); boolean ruleBased = parsedArguments.getBoolean("ruleBasedClassifier"); //Read corpus sentences CorpusReader reader = new CorpusReader(inputStream, corpusFormat, lang); //Rule-based Classifier. if (ruleBased) { Properties params = new Properties(); params.load(new FileInputStream(new File(paramFile))); String posModelPath = params.getProperty("pos-model"); String lemmaModelPath = params.getProperty("lemma-model"); String kafDir = params.getProperty("kafDir"); /* polarity lexicon. Domain specific polarity lexicon is given priority. * If no domain lexicon is found it reverts to general polarity lexicon. * If no general polarity lexicon is found program exits with error message. */ String lex = params.getProperty("polarLexiconDomain", "none"); if (lex.equalsIgnoreCase("none")) { lex = params.getProperty("polarLexiconGeneral", "none"); if (lex.equalsIgnoreCase("none")) { System.err.println("Elixa Error :: Rule-based classifier is selected but no polarity" + " lexicon has been specified. Either specify one or choose ML classifier"); System.exit(1); } } File lexFile = new File(lex); Evaluator evalDoc = new Evaluator(lexFile, "lemma"); for (String oId : reader.getOpinions().keySet()) { // sentence posTagging String taggedKaf = reader.tagSentenceTab(reader.getOpinion(oId).getsId(), kafDir, posModelPath, lemmaModelPath); //process the postagged sentence with the word count based polarity tagger Map<String, String> results = evalDoc.polarityScoreTab(taggedKaf, lexFile.getName()); String lblStr = results.get("polarity"); String actual = "?"; if (reader.getOpinion(oId).getPolarity() != null) { actual = reader.getOpinion(oId).getPolarity(); } String rId = reader.getOpinion(oId).getsId().replaceFirst("_g$", ""); System.out.println(rId + "\t" + actual + "\t" + lblStr + "\t" + reader.getOpinionSentence(oId)); reader.getOpinion(oId).setPolarity(lblStr); } } else { Features atpTrain = new Features(reader, paramFile, classnum, model); Instances traindata; if (corpusFormat.startsWith("tab") && !corpusFormat.equalsIgnoreCase("tabNotagged")) { traindata = atpTrain.loadInstancesTAB(true, "atp"); } else if (lang.equalsIgnoreCase("eu") && (corpusFormat.equalsIgnoreCase("tabNotagged") || corpusFormat.equalsIgnoreCase("ireom"))) { traindata = atpTrain.loadInstancesConll(true, "atp"); } else { traindata = atpTrain.loadInstances(true, "atp"); } // setting class attribute (entCat|attCat|entAttCat|polarityCat) traindata.setClass(traindata.attribute("polarityCat")); WekaWrapper classify; try { classify = new WekaWrapper(model); System.err.println(); //sort according to the instanceId //traindata.sort(atpTrain.getAttIndexes().get("instanceId")); //Instances testdata = new Instances(traindata); //testdata.deleteAttributeAt(0); //classify.setTestdata(testdata); classify.setTestdata(traindata); classify.loadModel(model); for (String oId : reader.getOpinions().keySet()) { int iId = atpTrain.getOpinInst().get(oId); Instance i = traindata.get(iId - 1); double label = classify.getMLclass().classifyInstance(i); String lblStr = i.classAttribute().value((int) label); String actual = "?"; if (reader.getOpinion(oId).getPolarity() != null) { actual = reader.getOpinion(oId).getPolarity(); } String rId = reader.getOpinion(oId).getsId().replaceFirst("_g$", ""); String oSent = reader.getOpinionSentence(oId); if (corpusFormat.startsWith("tab")) { StringBuilder sb = new StringBuilder(); for (String kk : oSent.split("\n")) { sb.append(kk.split("\\t")[0]); sb.append(" "); } oSent = sb.toString(); } System.out.println(rId + "\t" + actual + "\t" + lblStr + "\t" + oSent + "\t" + reader.getOpinionSentence(oId).replaceAll("\n", " ").replaceAll("\\t", ":::")); reader.getOpinion(oId).setPolarity(lblStr); } //reader.print2Semeval2015format(model+"tagATP.xml"); //reader.print2conll(model+"tagAtp.conll"); } catch (Exception e) { e.printStackTrace(); } } }
From source file:elh.eus.absa.CLI.java
License:Open Source License
/** * Main access to the train-atc functionalities. Train ATC using a double one vs. all classifier * (E and A) for E#A aspect categories//w ww. j a va2 s . com * @throws Exception */ public final void trainATC2(final InputStream inputStream) throws IOException { // load training parameters file String paramFile = parsedArguments.getString("params"); String testFile = parsedArguments.getString("testset"); String paramFile2 = parsedArguments.getString("params2"); String corpusFormat = parsedArguments.getString("corpusFormat"); //String validation = parsedArguments.getString("validation"); String lang = parsedArguments.getString("language"); //int foldNum = Integer.parseInt(parsedArguments.getString("foldNum")); //boolean printPreds = parsedArguments.getBoolean("printPreds"); boolean nullSentenceOpinions = parsedArguments.getBoolean("nullSentences"); boolean onlyTest = parsedArguments.getBoolean("testOnly"); double threshold = 0.5; double threshold2 = 0.5; String modelsPath = "/home/inaki/elixa-atp/ovsaModels"; CorpusReader reader = new CorpusReader(inputStream, corpusFormat, nullSentenceOpinions, lang); Features atcTrain = new Features(reader, paramFile, "3"); Instances traindata = atcTrain.loadInstances(true, "atc"); if (onlyTest) { if (FileUtilsElh.checkFile(testFile)) { System.err.println("read from test file"); reader = new CorpusReader(new FileInputStream(new File(testFile)), corpusFormat, nullSentenceOpinions, lang); atcTrain.setCorpus(reader); traindata = atcTrain.loadInstances(true, "atc"); } } //setting class attribute (entCat|attCat|entAttCat|polarityCat) //HashMap<String, Integer> opInst = atcTrain.getOpinInst(); //WekaWrapper classifyAtts; WekaWrapper onevsall; try { //classify.printMultilabelPredictions(classify.multiLabelPrediction()); */ //onevsall Instances entdata = new Instances(traindata); entdata.deleteAttributeAt(entdata.attribute("attCat").index()); entdata.deleteAttributeAt(entdata.attribute("entAttCat").index()); entdata.setClassIndex(entdata.attribute("entCat").index()); onevsall = new WekaWrapper(entdata, true); if (!onlyTest) { onevsall.trainOneVsAll(modelsPath, paramFile + "entCat"); System.out.println("trainATC: one vs all models ready"); } onevsall.setTestdata(entdata); HashMap<Integer, HashMap<String, Double>> ovsaRes = onevsall.predictOneVsAll(modelsPath, paramFile + "entCat"); System.out.println("trainATC: one vs all predictions ready"); HashMap<Integer, String> instOps = new HashMap<Integer, String>(); for (String oId : atcTrain.getOpinInst().keySet()) { instOps.put(atcTrain.getOpinInst().get(oId), oId); } atcTrain = new Features(reader, paramFile2, "3"); entdata = atcTrain.loadInstances(true, "attTrain2_data"); entdata.deleteAttributeAt(entdata.attribute("entAttCat").index()); //entdata.setClassIndex(entdata.attribute("entCat").index()); Attribute insAtt = entdata.attribute("instanceId"); double maxInstId = entdata.kthSmallestValue(insAtt, entdata.numDistinctValues(insAtt) - 1); System.err.println("last instance has index: " + maxInstId); for (int ins = 0; ins < entdata.numInstances(); ins++) { System.err.println("ins" + ins); int i = (int) entdata.instance(ins).value(insAtt); Instance currentInst = entdata.instance(ins); //System.err.println("instance "+i+" oid "+kk.get(i+1)+"kk contains key i?"+kk.containsKey(i)); String sId = reader.getOpinion(instOps.get(i)).getsId(); String oId = instOps.get(i); reader.removeSentenceOpinions(sId); int oSubId = 0; for (String cl : ovsaRes.get(i).keySet()) { //System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl)); if (ovsaRes.get(i).get(cl) > threshold) { //System.err.println("one got through ! instance "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl)); // for the first one update the instances if (oSubId >= 1) { Instance newIns = new SparseInstance(currentInst); newIns.setDataset(entdata); entdata.add(newIns); newIns.setValue(insAtt, maxInstId + oSubId); newIns.setClassValue(cl); instOps.put((int) maxInstId + oSubId, oId); } // if the are more create new instances else { currentInst.setClassValue(cl); //create and add opinion to the structure // trgt, offsetFrom, offsetTo, polarity, cat, sId); //Opinion op = new Opinion(instOps.get(i)+"_"+oSubId, "", 0, 0, "", cl, sId); //reader.addOpinion(op); } oSubId++; } } //finished updating instances data } entdata.setClass(entdata.attribute("attCat")); onevsall = new WekaWrapper(entdata, true); /** * Bigarren sailkatzailea * * */ if (!onlyTest) { onevsall.trainOneVsAll(modelsPath, paramFile + "attCat"); System.out.println("trainATC: one vs all attcat models ready"); } ovsaRes = onevsall.predictOneVsAll(modelsPath, paramFile + "entAttCat"); insAtt = entdata.attribute("instanceId"); maxInstId = entdata.kthSmallestValue(insAtt, insAtt.numValues()); System.err.println("last instance has index: " + maxInstId); for (int ins = 0; ins < entdata.numInstances(); ins++) { System.err.println("ins: " + ins); int i = (int) entdata.instance(ins).value(insAtt); Instance currentInst = entdata.instance(ins); //System.err.println("instance "+i+" oid "+kk.get(i+1)+"kk contains key i?"+kk.containsKey(i)); String sId = reader.getOpinion(instOps.get(i)).getsId(); String oId = instOps.get(i); reader.removeSentenceOpinions(sId); int oSubId = 0; for (String cl : ovsaRes.get(i).keySet()) { //System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl)); if (ovsaRes.get(i).get(cl) > threshold2) { ///System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl)); if (ovsaRes.get(i).get(cl) > threshold) { //System.err.println("one got through ! instance "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl)); // for the first one update the instances if (oSubId >= 1) { String label = currentInst.stringValue(entdata.attribute("entAtt")) + "#" + cl; //create and add opinion to the structure // trgt, offsetFrom, offsetTo, polarity, cat, sId); Opinion op = new Opinion(oId + "_" + oSubId, "", 0, 0, "", label, sId); reader.addOpinion(op); } // if the are more create new instances else { String label = currentInst.stringValue(entdata.attribute("entAtt")) + "#" + cl; //create and add opinion to the structure // trgt, offsetFrom, offsetTo, polarity, cat, sId); reader.removeOpinion(oId); Opinion op = new Opinion(oId + "_" + oSubId, "", 0, 0, "", label, sId); reader.addOpinion(op); } oSubId++; } } //finished updating instances data } } reader.print2Semeval2015format(paramFile + "entAttCat.xml"); } catch (Exception e) { e.printStackTrace(); } //traindata.setClass(traindata.attribute("entAttCat")); System.err.println("DONE CLI train-atc2 (oneVsAll)"); }
From source file:elh.eus.absa.CLI.java
License:Open Source License
/** * Main access to the train functionalities. * @throws Exception /* w w w .ja v a2 s. c om*/ */ public final void tagATC(final InputStream inputStream) throws IOException { // load training parameters file String paramFile = parsedArguments.getString("params"); String corpusFormat = parsedArguments.getString("corpusFormat"); //String validation = parsedArguments.getString("validation"); String lang = parsedArguments.getString("language"); int foldNum = Integer.parseInt(parsedArguments.getString("foldNum")); //boolean printPreds = parsedArguments.getBoolean("printPreds"); CorpusReader reader = new CorpusReader(inputStream, corpusFormat, lang); Features atcTrain = new Features(reader, paramFile, "3"); Instances traindata = atcTrain.loadInstances(true, "atc"); //setting class attribute (entCat|attCat|entAttCat|polarityCat) //HashMap<String, Integer> opInst = atcTrain.getOpinInst(); WekaWrapper classify; try { //train first classifier (entities) traindata.setClass(traindata.attribute("entCat")); classify = new WekaWrapper(traindata, true); classify.crossValidate(foldNum); //Classifier entityCl = classify.getMLclass().; //train second classifier (attributtes) traindata.setClass(traindata.attribute("attCat")); classify.setTraindata(traindata); classify.crossValidate(foldNum); //Classifier attCl = classify.getMLclass(); classify.printMultilabelPredictions(classify.multiLabelPrediction()); } catch (Exception e) { e.printStackTrace(); } traindata.setClass(traindata.attribute("entAttCat")); System.err.println("DONE CLI train-atc"); }
From source file:epsi.i5.datamining.Weka.java
public void excutionAlgo() throws FileNotFoundException, IOException, Exception { BufferedReader reader = new BufferedReader(new FileReader("src/epsi/i5/data/" + fileOne + ".arff")); Instances data = new Instances(reader); reader.close();// w w w . j a v a2 s. c om //System.out.println(data.attribute(0)); data.setClass(data.attribute(0)); NaiveBayes NB = new NaiveBayes(); NB.buildClassifier(data); Evaluation naiveBayes = new Evaluation(data); naiveBayes.crossValidateModel(NB, data, 10, new Random(1)); naiveBayes.evaluateModel(NB, data); //System.out.println(test.confusionMatrix() + "1"); //System.out.println(test.correct() + "2"); System.out.println("*****************************"); System.out.println("******** Naive Bayes ********"); System.out.println(naiveBayes.toMatrixString()); System.out.println("*****************************"); System.out.println("**** Pourcentage Correct ****"); System.out.println(naiveBayes.pctCorrect()); System.out.println(""); J48 j = new J48(); j.buildClassifier(data); Evaluation jeval = new Evaluation(data); jeval.crossValidateModel(j, data, 10, new Random(1)); jeval.evaluateModel(j, data); System.out.println("*****************************"); System.out.println("************ J48 ************"); System.out.println(jeval.toMatrixString()); System.out.println("*****************************"); System.out.println("**** Pourcentage Correct ****"); System.out.println(jeval.pctCorrect()); System.out.println(""); DecisionTable DT = new DecisionTable(); DT.buildClassifier(data); Evaluation decisionTable = new Evaluation(data); decisionTable.crossValidateModel(DT, data, 10, new Random(1)); decisionTable.evaluateModel(DT, data); System.out.println("*****************************"); System.out.println("******* DecisionTable *******"); System.out.println(decisionTable.toMatrixString()); System.out.println("*****************************"); System.out.println("**** Pourcentage Correct ****"); System.out.println(decisionTable.pctCorrect()); System.out.println(""); OneR OR = new OneR(); OR.buildClassifier(data); Evaluation oneR = new Evaluation(data); oneR.crossValidateModel(OR, data, 10, new Random(1)); oneR.evaluateModel(OR, data); System.out.println("*****************************"); System.out.println("************ OneR ***********"); System.out.println(oneR.toMatrixString()); System.out.println("*****************************"); System.out.println("**** Pourcentage Correct ****"); System.out.println(oneR.pctCorrect()); //Polarit data.setClass(data.attribute(1)); System.out.println(""); M5Rules MR = new M5Rules(); MR.buildClassifier(data); Evaluation m5rules = new Evaluation(data); m5rules.crossValidateModel(MR, data, 10, new Random(1)); m5rules.evaluateModel(MR, data); System.out.println("*****************************"); System.out.println("********** M5Rules **********"); System.out.println(m5rules.correlationCoefficient()); System.out.println(""); LinearRegression LR = new LinearRegression(); LR.buildClassifier(data); Evaluation linearR = new Evaluation(data); linearR.crossValidateModel(LR, data, 10, new Random(1)); linearR.evaluateModel(LR, data); System.out.println("*****************************"); System.out.println("********** linearR **********"); System.out.println(linearR.correlationCoefficient()); }
From source file:gate.plugin.learningframework.data.CorpusRepresentationWeka.java
/** * Create a Weka dataset from just the meta-information of the Mallet representation. * This creates an empty Instances object that has all the attributes constructed from * the information we have in the Mallet representation. * The dataset will always have a class attribute defined: if there is a mallet target alphabet, * a nominal (class) attribute, otherwise a numeric (regression) attribute. *///from www. j a v a 2s . c om public static Instances emptyDatasetFromMallet(CorpusRepresentationMallet cr) { if (!(cr instanceof CorpusRepresentationMalletTarget)) { throw new GateRuntimeException("Conversion to weka not implemented yet: " + cr.getClass()); } InstanceList malletInstances = cr.getRepresentationMallet(); Alphabet dataAlph = malletInstances.getDataAlphabet(); Pipe pipe = malletInstances.getPipe(); // the pipe should always be an instance of LFPipe, but we allow this to be used for instancelists // which have been created in a different way and contain some other type of Pipe. // If we do hava a LFPipe, we create a map that can be used to figure out which of the // mallet features are either boolean or nominal with a numeric coding. Otherwise, we // regard all features as numeric. // This maps from the mallet feature name to the alphabet for a nominal feature we have // stored in our attribute, or to a placeholder alphabet containing true/false if we have // a boolean feature. Alphabet booleanAlph = new Alphabet(); booleanAlph.lookupIndex("false"); booleanAlph.lookupIndex("true"); Map<String, Alphabet> name2lfalph = new HashMap<String, Alphabet>(); if (pipe instanceof LFPipe) { LFPipe lfpipe = (LFPipe) pipe; // go through all the antries in the instances data alphabet and try to figure out which // of the featuers are either boolean ore nominals coded as number for (int i = 0; i < dataAlph.size(); i++) { String malletFeatureName = (String) dataAlph.lookupObject(i); gate.plugin.learningframework.features.Attribute lfatt = FeatureExtraction .lookupAttributeForFeatureName(lfpipe.getFeatureInfo().getAttributes(), malletFeatureName); Alphabet alphToUse = null; if (lfatt instanceof gate.plugin.learningframework.features.AttributeList) { if (((gate.plugin.learningframework.features.AttributeList) lfatt).datatype == Datatype.bool) { alphToUse = booleanAlph; } else { if (((gate.plugin.learningframework.features.AttributeList) lfatt).datatype == Datatype.nominal && ((gate.plugin.learningframework.features.AttributeList) lfatt).codeas == CodeAs.number) { alphToUse = ((gate.plugin.learningframework.features.AttributeList) lfatt).alphabet; } } } else if (lfatt instanceof gate.plugin.learningframework.features.SimpleAttribute) { if (((gate.plugin.learningframework.features.SimpleAttribute) lfatt).datatype == Datatype.bool) { alphToUse = booleanAlph; } else { if (((gate.plugin.learningframework.features.SimpleAttribute) lfatt).datatype == Datatype.nominal && ((gate.plugin.learningframework.features.SimpleAttribute) lfatt).codeas == CodeAs.number) { alphToUse = ((gate.plugin.learningframework.features.SimpleAttribute) lfatt).alphabet; } } } // if alphToUse is not null, add it to the map if (alphToUse != null) { name2lfalph.put(malletFeatureName, alphToUse); } } } // This is the information weka needs about the attributes ArrayList<Attribute> wekaAttributes = new ArrayList<Attribute>(); // now go through the data alphabet again and add one weka attribute to the attributes list // for each mallet feature. If we know an alphabet for the mallet feature, create the // weka attribute as a nominal otherwise as a numeric weka attribute. for (int i = 0; i < pipe.getDataAlphabet().size(); i++) { String malletFeatureName = (String) pipe.getDataAlphabet().lookupObject(i); Alphabet lfalph = name2lfalph.get(malletFeatureName); if (lfalph == null) { wekaAttributes.add(new Attribute(malletFeatureName)); } else { List<String> nomVals = new ArrayList<String>(lfalph.size()); for (int j = 0; j < lfalph.size(); j++) { nomVals.add((String) lfalph.lookupObject(j)); } wekaAttributes.add(new Attribute(malletFeatureName, nomVals)); } } // now add the class attribute, if necessary: if there is a target alphabet, the class must be nominal, // so create a nominal weka attribute, otherwise, create a numeric one weka.core.Attribute targetAttr = null; if (pipe.getTargetAlphabet() != null) { Alphabet talph = pipe.getTargetAlphabet(); // create the values for the target from the target alphabet List<String> classVals = new ArrayList<String>(); for (int i = 0; i < talph.size(); i++) { classVals.add((String) talph.lookupObject(i)); } targetAttr = new Attribute("class", classVals); wekaAttributes.add(targetAttr); System.err.println("LF: created an empty weka dataset for classification"); } else { targetAttr = new Attribute("target"); wekaAttributes.add(targetAttr); System.err.println("LF: created an empty weka dataset for regression"); } // create the weka dataset Instances insts = new weka.core.Instances("GATELearningFramework", wekaAttributes, malletInstances.size()); insts.setClass(targetAttr); return insts; }
From source file:lu.lippmann.cdb.common.gui.dataset.InstancesLoaderDialogFactory.java
License:Open Source License
private static Instances showDialog(final Component parent, final boolean setClass) throws Exception { final Preferences prefs = Preferences.userRoot().node("CadralDecisionBuild"); final String path = prefs.get(REG_KEY, WekaDataAccessUtil.DEFAULT_SAMPLE_DIR); final JFileChooser fc = new JFileChooser(); fc.setCurrentDirectory(new File(path)); final int returnVal = fc.showOpenDialog(parent); if (returnVal == JFileChooser.APPROVE_OPTION) { final File file = fc.getSelectedFile(); if (file != null) { prefs.put(REG_KEY, file.getPath()); final Instances ds = WekaDataAccessUtil.loadInstancesFromARFFOrCSVFile(file); final Attribute defaultClassAttr = ds.classIndex() >= 0 ? ds.classAttribute() : ds.attribute(0); ds.setClassIndex(-1);//from w w w. j a v a2 s .c o m ds.setRelationName(file.getPath()); final List<String> attributesNames = new ArrayList<String>(); final Enumeration<?> e = ds.enumerateAttributes(); while (e.hasMoreElements()) { final Attribute attr = (Attribute) e.nextElement(); attributesNames.add(attr.name()); } if (setClass) { final String s = (String) JOptionPane.showInputDialog(parent, "Select the class attribute for '" + file.getName() + "' (default:'" + defaultClassAttr.name() + "'): ", "Class selection", JOptionPane.QUESTION_MESSAGE, null, // icon attributesNames.toArray(), attributesNames.get(attributesNames.size() - 1)); if (s != null) { ds.setClass(ds.attribute(s)); } else { //Otherwise no class defined and CACHE attributeClass => No class index defined after cancel + retry ds.setClass(defaultClassAttr); return null; } } else { ds.setClass(defaultClassAttr); } return ds; } else throw new Exception(); } else return null; }