List of usage examples for weka.core Instances numDistinctValues
publicint numDistinctValues(Attribute att)
From source file:adams.data.instancesanalysis.PCA.java
License:Open Source License
/** * Performs the actual analysis.// w w w. j a v a2 s . c om * * @param data the data to analyze * @return null if successful, otherwise error message * @throws Exception if analysis fails */ @Override protected String doAnalyze(Instances data) throws Exception { String result; Remove remove; PublicPrincipalComponents pca; int i; Capabilities caps; PartitionedMultiFilter2 part; Range rangeUnsupported; Range rangeSupported; TIntList listNominal; Range rangeNominal; ArrayList<ArrayList<Double>> coeff; Instances filtered; SpreadSheet transformed; WekaInstancesToSpreadSheet conv; String colName; result = null; m_Loadings = null; m_Scores = null; if (!m_AttributeRange.isAllRange()) { if (isLoggingEnabled()) getLogger().info("Filtering attribute range: " + m_AttributeRange.getRange()); remove = new Remove(); remove.setAttributeIndicesArray(m_AttributeRange.getIntIndices()); remove.setInvertSelection(true); remove.setInputFormat(data); data = Filter.useFilter(data, remove); } if (isLoggingEnabled()) getLogger().info("Performing PCA..."); listNominal = new TIntArrayList(); if (m_SkipNominal) { for (i = 0; i < data.numAttributes(); i++) { if (i == data.classIndex()) continue; if (data.attribute(i).isNominal()) listNominal.add(i); } } // check for unsupported attributes caps = new PublicPrincipalComponents().getCapabilities(); m_Supported = new TIntArrayList(); m_Unsupported = new TIntArrayList(); for (i = 0; i < data.numAttributes(); i++) { if (!caps.test(data.attribute(i)) || (i == data.classIndex()) || (listNominal.contains(i))) m_Unsupported.add(i); else m_Supported.add(i); } data.setClassIndex(-1); m_NumAttributes = m_Supported.size(); // the principal components will delete the attributes without any distinct values. // this checks which instances will be kept. m_Kept = new ArrayList<>(); for (i = 0; i < m_Supported.size(); i++) { if (data.numDistinctValues(m_Supported.get(i)) > 1) m_Kept.add(m_Supported.get(i)); } // build a model using the PublicPrincipalComponents pca = new PublicPrincipalComponents(); pca.setMaximumAttributes(m_MaxAttributes); pca.setVarianceCovered(m_Variance); pca.setMaximumAttributeNames(m_MaxAttributeNames); part = null; if (m_Unsupported.size() > 0) { rangeUnsupported = new Range(); rangeUnsupported.setMax(data.numAttributes()); rangeUnsupported.setIndices(m_Unsupported.toArray()); rangeSupported = new Range(); rangeSupported.setMax(data.numAttributes()); rangeSupported.setIndices(m_Supported.toArray()); part = new PartitionedMultiFilter2(); part.setFilters(new Filter[] { pca, new AllFilter(), }); part.setRanges(new weka.core.Range[] { new weka.core.Range(rangeSupported.getRange()), new weka.core.Range(rangeUnsupported.getRange()), }); } try { if (part != null) part.setInputFormat(data); else pca.setInputFormat(data); } catch (Exception e) { result = Utils.handleException(this, "Failed to set data format", e); } transformed = null; if (result == null) { try { if (part != null) filtered = weka.filters.Filter.useFilter(data, part); else filtered = weka.filters.Filter.useFilter(data, pca); } catch (Exception e) { result = Utils.handleException(this, "Failed to apply filter", e); filtered = null; } if (filtered != null) { conv = new WekaInstancesToSpreadSheet(); conv.setInput(filtered); result = conv.convert(); if (result == null) { transformed = (SpreadSheet) conv.getOutput(); // shorten column names again if (part != null) { for (i = 0; i < transformed.getColumnCount(); i++) { colName = transformed.getColumnName(i); colName = colName.replaceFirst("filtered-[0-9]*-", ""); transformed.getHeaderRow().getCell(i).setContentAsString(colName); } } } } } if (result == null) { // get the coefficients from the filter m_Scores = transformed; coeff = pca.getCoefficients(); m_Loadings = extractLoadings(data, coeff); m_Loadings.setName("Loadings for " + data.relationName()); } return result; }
From source file:cezeri.utils.FactoryInstance.java
public static String[] getDefaultClasses(Instances data) { // String[] str = getOriginalClasses(data); int n = data.numDistinctValues(data.classAttribute()); // int n = data.numClasses(); String[] ret = new String[n]; for (int i = 0; i < n; i++) { ret[i] = i + ""; }//from w w w . j a va2 s. com return ret; }
From source file:com.mycompany.neuralnetwork.NeuralNetworkClassifier.java
@Override public void buildClassifier(Instances instances) throws Exception { int inputCount = instances.numAttributes() - 1; List<Integer> nodesPerLayer = new ArrayList<>(); for (int i = 0; i < layers - 1; i++) { nodesPerLayer.add(inputCount);/* w w w .j a v a 2 s . com*/ } nodesPerLayer.add(instances.numDistinctValues(instances.classIndex())); network = new Network(inputCount, nodesPerLayer); ArrayList<Double> errorsPerIteration = new ArrayList<>(); for (int j = 0; j < iterations; j++) { double errorsPer = 0; for (int k = 0; k < instances.numInstances(); k++) { Instance instance = instances.instance(k); List<Double> input = new ArrayList<>(); for (int i = 0; i < instance.numAttributes(); i++) { if (Double.isNaN(instance.value(i)) && i != instance.classIndex()) input.add(0.0); else if (i != instance.classIndex()) input.add(instance.value(i)); } errorsPer += network.train(input, instance.value(instance.classIndex()), learningFactor); } errorsPerIteration.add(errorsPer); } //Display Errors This is used to collect the data for the graph //for (Double d : errorsPerIteration) //{ // System.out.println(d); //} }
From source file:decisiontree.MyID3.java
private void makeTree(Instances data) { // Check if no instances have reached this node. if (data.numInstances() == 0) { splitAttr = null;/* w w w .ja v a2 s. c o m*/ leafValue = Double.NaN; leafDist = new double[data.numClasses()]; return; } if (data.numDistinctValues(data.classIndex()) == 1) { leafValue = data.firstInstance().classValue(); return; } // Compute attribute with maximum information gain. double[] infoGains = new double[data.numAttributes()]; Enumeration attEnum = data.enumerateAttributes(); while (attEnum.hasMoreElements()) { Attribute att = (Attribute) attEnum.nextElement(); infoGains[att.index()] = computeInfoGain(data, att); } splitAttr = data.attribute(maxIndex(infoGains)); // Make leaf if information gain is zero. // Otherwise create successors. if (Utils.eq(infoGains[splitAttr.index()], 0)) { splitAttr = null; leafDist = new double[data.numClasses()]; Enumeration instEnum = data.enumerateInstances(); while (instEnum.hasMoreElements()) { Instance inst = (Instance) instEnum.nextElement(); leafDist[(int) inst.classValue()]++; } normalize(leafDist); leafValue = Utils.maxIndex(leafDist); classAttr = data.classAttribute(); } else { Instances[] splitData = splitData(data, splitAttr); child = new MyID3[splitAttr.numValues()]; for (int j = 0; j < splitAttr.numValues(); j++) { child[j] = new MyID3(); child[j].makeTree(splitData[j]); } } }
From source file:elh.eus.absa.CLI.java
License:Open Source License
/** * Main access to the train-atc functionalities. Train ATC using a double one vs. all classifier * (E and A) for E#A aspect categories// w ww . ja v a 2s . co m * @throws Exception */ public final void trainATC2(final InputStream inputStream) throws IOException { // load training parameters file String paramFile = parsedArguments.getString("params"); String testFile = parsedArguments.getString("testset"); String paramFile2 = parsedArguments.getString("params2"); String corpusFormat = parsedArguments.getString("corpusFormat"); //String validation = parsedArguments.getString("validation"); String lang = parsedArguments.getString("language"); //int foldNum = Integer.parseInt(parsedArguments.getString("foldNum")); //boolean printPreds = parsedArguments.getBoolean("printPreds"); boolean nullSentenceOpinions = parsedArguments.getBoolean("nullSentences"); boolean onlyTest = parsedArguments.getBoolean("testOnly"); double threshold = 0.5; double threshold2 = 0.5; String modelsPath = "/home/inaki/elixa-atp/ovsaModels"; CorpusReader reader = new CorpusReader(inputStream, corpusFormat, nullSentenceOpinions, lang); Features atcTrain = new Features(reader, paramFile, "3"); Instances traindata = atcTrain.loadInstances(true, "atc"); if (onlyTest) { if (FileUtilsElh.checkFile(testFile)) { System.err.println("read from test file"); reader = new CorpusReader(new FileInputStream(new File(testFile)), corpusFormat, nullSentenceOpinions, lang); atcTrain.setCorpus(reader); traindata = atcTrain.loadInstances(true, "atc"); } } //setting class attribute (entCat|attCat|entAttCat|polarityCat) //HashMap<String, Integer> opInst = atcTrain.getOpinInst(); //WekaWrapper classifyAtts; WekaWrapper onevsall; try { //classify.printMultilabelPredictions(classify.multiLabelPrediction()); */ //onevsall Instances entdata = new Instances(traindata); entdata.deleteAttributeAt(entdata.attribute("attCat").index()); entdata.deleteAttributeAt(entdata.attribute("entAttCat").index()); entdata.setClassIndex(entdata.attribute("entCat").index()); onevsall = new WekaWrapper(entdata, true); if (!onlyTest) { onevsall.trainOneVsAll(modelsPath, paramFile + "entCat"); System.out.println("trainATC: one vs all models ready"); } onevsall.setTestdata(entdata); HashMap<Integer, HashMap<String, Double>> ovsaRes = onevsall.predictOneVsAll(modelsPath, paramFile + "entCat"); System.out.println("trainATC: one vs all predictions ready"); HashMap<Integer, String> instOps = new HashMap<Integer, String>(); for (String oId : atcTrain.getOpinInst().keySet()) { instOps.put(atcTrain.getOpinInst().get(oId), oId); } atcTrain = new Features(reader, paramFile2, "3"); entdata = atcTrain.loadInstances(true, "attTrain2_data"); entdata.deleteAttributeAt(entdata.attribute("entAttCat").index()); //entdata.setClassIndex(entdata.attribute("entCat").index()); Attribute insAtt = entdata.attribute("instanceId"); double maxInstId = entdata.kthSmallestValue(insAtt, entdata.numDistinctValues(insAtt) - 1); System.err.println("last instance has index: " + maxInstId); for (int ins = 0; ins < entdata.numInstances(); ins++) { System.err.println("ins" + ins); int i = (int) entdata.instance(ins).value(insAtt); Instance currentInst = entdata.instance(ins); //System.err.println("instance "+i+" oid "+kk.get(i+1)+"kk contains key i?"+kk.containsKey(i)); String sId = reader.getOpinion(instOps.get(i)).getsId(); String oId = instOps.get(i); reader.removeSentenceOpinions(sId); int oSubId = 0; for (String cl : ovsaRes.get(i).keySet()) { //System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl)); if (ovsaRes.get(i).get(cl) > threshold) { //System.err.println("one got through ! instance "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl)); // for the first one update the instances if (oSubId >= 1) { Instance newIns = new SparseInstance(currentInst); newIns.setDataset(entdata); entdata.add(newIns); newIns.setValue(insAtt, maxInstId + oSubId); newIns.setClassValue(cl); instOps.put((int) maxInstId + oSubId, oId); } // if the are more create new instances else { currentInst.setClassValue(cl); //create and add opinion to the structure // trgt, offsetFrom, offsetTo, polarity, cat, sId); //Opinion op = new Opinion(instOps.get(i)+"_"+oSubId, "", 0, 0, "", cl, sId); //reader.addOpinion(op); } oSubId++; } } //finished updating instances data } entdata.setClass(entdata.attribute("attCat")); onevsall = new WekaWrapper(entdata, true); /** * Bigarren sailkatzailea * * */ if (!onlyTest) { onevsall.trainOneVsAll(modelsPath, paramFile + "attCat"); System.out.println("trainATC: one vs all attcat models ready"); } ovsaRes = onevsall.predictOneVsAll(modelsPath, paramFile + "entAttCat"); insAtt = entdata.attribute("instanceId"); maxInstId = entdata.kthSmallestValue(insAtt, insAtt.numValues()); System.err.println("last instance has index: " + maxInstId); for (int ins = 0; ins < entdata.numInstances(); ins++) { System.err.println("ins: " + ins); int i = (int) entdata.instance(ins).value(insAtt); Instance currentInst = entdata.instance(ins); //System.err.println("instance "+i+" oid "+kk.get(i+1)+"kk contains key i?"+kk.containsKey(i)); String sId = reader.getOpinion(instOps.get(i)).getsId(); String oId = instOps.get(i); reader.removeSentenceOpinions(sId); int oSubId = 0; for (String cl : ovsaRes.get(i).keySet()) { //System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl)); if (ovsaRes.get(i).get(cl) > threshold2) { ///System.err.println("instance: "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl)); if (ovsaRes.get(i).get(cl) > threshold) { //System.err.println("one got through ! instance "+i+" class "+cl+" value: "+ovsaRes.get(i).get(cl)); // for the first one update the instances if (oSubId >= 1) { String label = currentInst.stringValue(entdata.attribute("entAtt")) + "#" + cl; //create and add opinion to the structure // trgt, offsetFrom, offsetTo, polarity, cat, sId); Opinion op = new Opinion(oId + "_" + oSubId, "", 0, 0, "", label, sId); reader.addOpinion(op); } // if the are more create new instances else { String label = currentInst.stringValue(entdata.attribute("entAtt")) + "#" + cl; //create and add opinion to the structure // trgt, offsetFrom, offsetTo, polarity, cat, sId); reader.removeOpinion(oId); Opinion op = new Opinion(oId + "_" + oSubId, "", 0, 0, "", label, sId); reader.addOpinion(op); } oSubId++; } } //finished updating instances data } } reader.print2Semeval2015format(paramFile + "entAttCat.xml"); } catch (Exception e) { e.printStackTrace(); } //traindata.setClass(traindata.attribute("entAttCat")); System.err.println("DONE CLI train-atc2 (oneVsAll)"); }
From source file:explorer.ChordalysisModelling.java
License:Open Source License
public void buildModelNoExplore(Instances dataset) { this.nbInstances = dataset.numInstances(); this.dataset = dataset; int[] variables = new int[dataset.numAttributes()]; int[] nbValuesForAttribute = new int[variables.length]; for (int i = 0; i < variables.length; i++) { variables[i] = i;// w w w . ja v a 2 s.com nbValuesForAttribute[i] = dataset.numDistinctValues(i); } this.lattice = new Lattice(dataset); this.entropyComputer = new EntropyComputer(dataset.numInstances(), this.lattice); this.scorer = new GraphActionScorerPValue(nbInstances, entropyComputer); this.bestModel = new DecomposableModel(variables, nbValuesForAttribute); this.pq = new MyPriorityQueue(variables.length, bestModel, scorer); for (int i = 0; i < variables.length; i++) { for (int j = i + 1; j < variables.length; j++) { pq.enableEdge(i, j); } } }
From source file:explorer.ChordalysisModelling.java
License:Open Source License
public void buildModelNoExplore(Instances dataset, ArffReader loader) throws IOException { this.dataset = dataset; int[] variables = new int[dataset.numAttributes()]; int[] nbValuesForAttribute = new int[variables.length]; for (int i = 0; i < variables.length; i++) { variables[i] = i;/*w w w .j a va2 s . c o m*/ nbValuesForAttribute[i] = dataset.numDistinctValues(i); } this.lattice = new Lattice(dataset, loader); this.nbInstances = this.lattice.getNbInstances(); this.entropyComputer = new EntropyComputer(nbInstances, this.lattice); this.scorer = new GraphActionScorerPValue(nbInstances, entropyComputer); this.bestModel = new DecomposableModel(variables, nbValuesForAttribute); this.pq = new MyPriorityQueue(variables.length, bestModel, scorer); for (int i = 0; i < variables.length; i++) { for (int j = i + 1; j < variables.length; j++) { pq.enableEdge(i, j); } } }
From source file:explorer.ChordalysisModellingMML.java
License:Open Source License
public void buildModelNoExplore(Instances dataset) { this.nbInstances = dataset.numInstances(); this.dataset = dataset; int[] variables = new int[dataset.numAttributes()]; int[] nbValuesForAttribute = new int[variables.length]; for (int i = 0; i < variables.length; i++) { variables[i] = i;/* ww w. j av a2 s . c om*/ nbValuesForAttribute[i] = dataset.numDistinctValues(i); } this.lattice = new Lattice(dataset); this.computer = new MessageLengthFactorialComputer(dataset.numInstances(), this.lattice); this.scorer = new GraphActionScorerMML(nbInstances, computer); this.bestModel = new DecomposableModel(variables, nbValuesForAttribute); this.pq = new MyPriorityQueue(variables.length, bestModel, scorer); for (int i = 0; i < variables.length; i++) { for (int j = i + 1; j < variables.length; j++) { pq.enableEdge(i, j); } } }
From source file:explorer.ChordalysisModellingMML.java
License:Open Source License
public void buildModelNoExplore(Instances dataset, ArffReader loader) throws IOException { this.dataset = dataset; int[] variables = new int[dataset.numAttributes()]; int[] nbValuesForAttribute = new int[variables.length]; for (int i = 0; i < variables.length; i++) { variables[i] = i;/*from ww w . ja v a 2s . c o m*/ nbValuesForAttribute[i] = dataset.numDistinctValues(i); } this.lattice = new Lattice(dataset, loader); this.nbInstances = this.lattice.getNbInstances(); this.computer = new MessageLengthFactorialComputer(nbInstances, this.lattice); this.scorer = new GraphActionScorerMML(nbInstances, computer); this.bestModel = new DecomposableModel(variables, nbValuesForAttribute); this.pq = new MyPriorityQueue(variables.length, bestModel, scorer); for (int i = 0; i < variables.length; i++) { for (int j = i + 1; j < variables.length; j++) { pq.enableEdge(i, j); } } }
From source file:ffnn.FFNNTubesAI.java
@Override public void buildClassifier(Instances i) throws Exception { Instance temp_instance = null;/*from ww w . j a va2s . c o m*/ RealMatrix error_output; RealMatrix error_hidden; RealMatrix input_matrix; RealMatrix hidden_matrix; RealMatrix output_matrix; Instances temp_instances; int r = 0; Scanner scan = new Scanner(System.in); output_layer = i.numDistinctValues(i.classIndex()); //3 temp_instances = filterNominalNumeric(i); if (output_layer == 2) { Add filter = new Add(); filter.setAttributeIndex("last"); filter.setAttributeName("dummy"); filter.setInputFormat(temp_instances); temp_instances = Filter.useFilter(temp_instances, filter); // System.out.println(temp_instances); for (int j = 0; j < temp_instances.numInstances(); j++) { if (temp_instances.instance(j).value(temp_instances.numAttributes() - 2) == 0) { temp_instances.instance(j).setValue(temp_instances.numAttributes() - 2, 1); temp_instances.instance(j).setValue(temp_instances.numAttributes() - 1, 0); } else { temp_instances.instance(j).setValue(temp_instances.numAttributes() - 2, 0); temp_instances.instance(j).setValue(temp_instances.numAttributes() - 1, 1); } } } //temp_instances.randomize(temp_instances.getRandomNumberGenerator(1)); //System.out.println(temp_instances); input_layer = temp_instances.numAttributes() - output_layer; //4 hidden_layer = 0; while (hidden_layer < 1) { System.out.print("Hidden layer : "); hidden_layer = scan.nextInt(); } int init_hidden = hidden_layer; error_hidden = new BlockRealMatrix(1, hidden_layer); error_output = new BlockRealMatrix(1, output_layer); input_matrix = new BlockRealMatrix(1, input_layer + 1); //Menambahkan bias buildWeight(input_layer, hidden_layer, output_layer); long last_time = System.nanoTime(); double last_error_rate = 1; double best_error_rate = 1; double last_update = System.nanoTime(); // brp iterasi // for( long itr = 0; last_error_rate > 0.001; ++ itr ){ for (long itr = 0; itr < 50000; ++itr) { if (r == 10) { break; } long time = System.nanoTime(); if (time - last_time > 2000000000) { Evaluation eval = new Evaluation(i); eval.evaluateModel(this, i); double accry = eval.correct() / eval.numInstances(); if (eval.errorRate() < last_error_rate) { last_update = System.nanoTime(); if (eval.errorRate() < best_error_rate) SerializationHelper.write(accry + "-" + time + ".model", this); } if (accry > 0) last_error_rate = eval.errorRate(); // 2 minute without improvement restart if (time - last_update > 30000000000L) { last_update = System.nanoTime(); learning_rate = random() * 0.05; hidden_layer = (int) (10 + floor(random() * 15)); hidden_layer = (int) floor((hidden_layer / 25) * init_hidden); if (hidden_layer == 0) { hidden_layer = 1; } itr = 0; System.out.println("RESTART " + learning_rate + " " + hidden_layer); buildWeight(input_layer, hidden_layer, output_layer); r++; } System.out.println(accry + " " + itr); last_time = time; } for (int j = 0; j < temp_instances.numInstances(); j++) { // foward !! temp_instance = temp_instances.instance(j); for (int k = 0; k < input_layer; k++) { input_matrix.setEntry(0, k, temp_instance.value(k)); } input_matrix.setEntry(0, input_layer, 1.0); // bias hidden_matrix = input_matrix.multiply(weight1); for (int y = 0; y < hidden_layer; ++y) { hidden_matrix.setEntry(0, y, sig(hidden_matrix.getEntry(0, y))); } output_matrix = hidden_matrix.multiply(weight2).add(bias2); for (int y = 0; y < output_layer; ++y) { output_matrix.setEntry(0, y, sig(output_matrix.getEntry(0, y))); } // backward << // error layer 2 double total_err = 0; for (int k = 0; k < output_layer; k++) { double o = output_matrix.getEntry(0, k); double t = temp_instance.value(input_layer + k); double err = o * (1 - o) * (t - o); total_err += err * err; error_output.setEntry(0, k, err); } // back propagation layer 2 for (int y = 0; y < hidden_layer; y++) { for (int x = 0; x < output_layer; ++x) { double wold = weight2.getEntry(y, x); double correction = learning_rate * error_output.getEntry(0, x) * hidden_matrix.getEntry(0, y); weight2.setEntry(y, x, wold + correction); } } for (int x = 0; x < output_layer; ++x) { double correction = learning_rate * error_output.getEntry(0, x); // anggap 1 inputnya bias2.setEntry(0, x, bias2.getEntry(0, x) + correction); } // error layer 1 for (int k = 0; k < hidden_layer; ++k) { double o = hidden_matrix.getEntry(0, k); double t = 0; for (int x = 0; x < output_layer; ++x) { t += error_output.getEntry(0, x) * weight2.getEntry(k, x); } double err = o * (1 - o) * t; error_hidden.setEntry(0, k, err); } // back propagation layer 1 for (int y = 0; y < input_layer + 1; ++y) { for (int x = 0; x < hidden_layer; ++x) { double wold = weight1.getEntry(y, x); double correction = learning_rate * error_hidden.getEntry(0, x) * input_matrix.getEntry(0, y); weight1.setEntry(y, x, wold + correction); } } } } }