List of usage examples for weka.core Attribute name
public finalString name()
From source file:DocClassifier.java
public Instance createInstance(File file) { BufferedReader reader = null; try {// w ww .j ava 2 s. co m Instance inst = new Instance(this.attrList.size()); reader = new BufferedReader(new FileReader(file)); Map<Attribute, Double> tFreqMap = new HashMap<Attribute, Double>(); while (reader.ready()) { String line = reader.readLine(); String[] words = line.split(" "); for (int i = 0; i < attrList.size() - 1; ++i) { Attribute attr = (Attribute) attrList.elementAt(i); int tFreq = termFreq(attr.name(), words); Double prevFreq = tFreqMap.get(attr); tFreqMap.put(attr, ((prevFreq != null) ? (prevFreq + tFreq) : tFreq)); } } normalizeVector(tFreqMap); // System.err.println("\nInstance:"); for (Attribute attr : tFreqMap.keySet()) { inst.setValue(attr, tFreqMap.get(attr) * idfMap.get(attr.name())); //System.err.print(attr.name()+":"+inst.value(attr)); } inst.setValue((Attribute) attrList.lastElement(), file.getName().substring(0, 3).toLowerCase()); return inst; } catch (FileNotFoundException ex) { Logger.getLogger(DocClassifier.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(DocClassifier.class.getName()).log(Level.SEVERE, null, ex); } finally { try { if (reader != null) { reader.close(); } } catch (IOException ex) { Logger.getLogger(DocClassifier.class.getName()).log(Level.SEVERE, null, ex); } } return null; }
From source file:MPCKMeans.java
License:Open Source License
/** Actual KMeans function */ protected void runKMeans() throws Exception { boolean converged = false; m_Iterations = 0;// ww w . j ava 2 s .c o m m_numBlankIterations = 0; m_Objective = Double.POSITIVE_INFINITY; if (!m_isOfflineMetric) { if (m_useMultipleMetrics) { for (int i = 0; i < m_metrics.length; i++) { m_metrics[i].resetMetric(); m_metricLearners[i].resetLearner(); } } else { m_metric.resetMetric(); m_metricLearner.resetLearner(); } // initialize max CL penalties if (m_ConstraintsHash.size() > 0) { m_maxCLPenalties = calculateMaxCLPenalties(); } } // initialize m_ClusterAssignments for (int i = 0; i < m_NumClusters; i++) { m_ClusterAssignments[i] = -1; } PrintStream fincoh = null; if (m_ConstraintIncoherenceFile != null) { fincoh = new PrintStream(new FileOutputStream(m_ConstraintIncoherenceFile)); } while (!converged) { System.out.println("\n" + m_Iterations + ". Objective function: " + ((float) m_Objective)); m_OldObjective = m_Objective; // E-step int numMovedPoints = findBestAssignments(); m_numBlankIterations = (numMovedPoints == 0) ? m_numBlankIterations + 1 : 0; // calculateObjectiveFunction(false); System.out.println((float) m_Objective + " - Objective function after point assignment(CALC)"); System.out.println("\tvar=" + ((float) m_objVariance) + "\tC=" + ((float) m_objCannotLinks) + "\tM=" + ((float) m_objMustLinks) + "\tLOG=" + ((float) m_objNormalizer) + "\tREG=" + ((float) m_objRegularizer)); // M-step updateClusterCentroids(); // calculateObjectiveFunction(false); System.out.println((float) m_Objective + " - Objective function after centroid estimation"); System.out.println("\tvar=" + ((float) m_objVariance) + "\tC=" + ((float) m_objCannotLinks) + "\tM=" + ((float) m_objMustLinks) + "\tLOG=" + ((float) m_objNormalizer) + "\tREG=" + ((float) m_objRegularizer)); if (m_Trainable == TRAINING_INTERNAL && !m_isOfflineMetric) { updateMetricWeights(); if (m_verbose) { calculateObjectiveFunction(true); System.out.println((float) m_Objective + " - Objective function after metric update"); System.out.println("\tvar=" + ((float) m_objVariance) + "\tC=" + ((float) m_objCannotLinks) + "\tM=" + ((float) m_objMustLinks) + "\tLOG=" + ((float) m_objNormalizer) + "\tREG=" + ((float) m_objRegularizer)); } if (m_ConstraintsHash.size() > 0) { m_maxCLPenalties = calculateMaxCLPenalties(); } } if (fincoh != null) { printConstraintIncoherence(fincoh); } converged = convergenceCheck(m_OldObjective, m_Objective); m_Iterations++; } if (fincoh != null) { fincoh.close(); } System.out.println("Converged!"); System.err.print("Its\t" + m_Iterations + "\t"); if (m_verbose) { System.out.println("Done clustering; top cluster features: "); for (int i = 0; i < m_NumClusters; i++) { System.out.println("Centroid " + i); TreeMap map = new TreeMap(Collections.reverseOrder()); Instance centroid = m_ClusterCentroids.instance(i); for (int j = 0; j < centroid.numValues(); j++) { Attribute attr = centroid.attributeSparse(j); map.put(new Double(centroid.value(attr)), attr.name()); } Iterator it = map.entrySet().iterator(); for (int j = 0; j < 5 && it.hasNext(); j++) { Map.Entry entry = (Map.Entry) it.next(); System.out.println("\t" + entry.getKey() + "\t" + entry.getValue()); } } } }
From source file:adams.data.instance.Instance.java
License:Open Source License
/** * Clears the container and adds the data from the weka.core.Instance * (internal values). Uses only the attributes specified in the range. * * @param inst the instance to use// w w w. j av a 2 s. com * @param index the row index in the original dataset, use -1 to ignore * @param additional the indices of the additional attribute values to * store in the report * @param range the range of attributes to limit the instance to * @param attTypes whether to restrict to attributes types, null or zero-length array means no restriction * @see Attribute */ public void set(weka.core.Instance inst, int index, int[] additional, Range range, HashSet<Integer> attTypes) { ArrayList<InstancePoint> list; int i; Attribute att; String fieldStr; clear(); // keep reference to header m_DatasetHeader = new Instances(inst.dataset(), 0); range.setMax(inst.numAttributes()); list = new ArrayList<InstancePoint>(); for (i = 0; i < inst.numAttributes(); i++) { if (i == inst.classIndex()) continue; if (!range.isInRange(i)) continue; if ((attTypes != null) && (!attTypes.contains(inst.attribute(i).type()))) continue; list.add(new InstancePoint(i, inst.value(i))); } addAll(list); // create artificial report m_Report.addParameter(REPORT_DATASET, m_DatasetHeader.relationName()); att = m_DatasetHeader.attribute(ArffUtils.getDBIDName()); if (att != null) { m_Report.addParameter(REPORT_DB_ID, new Double(inst.value(att))); m_Report.setDatabaseID((int) inst.value(att)); } att = m_DatasetHeader.attribute(ArffUtils.getIDName()); if (att != null) m_Report.addParameter(REPORT_ID, new Double(inst.value(att))); // class if (inst.classIndex() > -1) { if (inst.classAttribute().isNumeric()) { m_Report.addField(new Field(REPORT_CLASS, DataType.NUMERIC)); if (inst.classIsMissing()) { m_Report.addField(new Field(REPORT_CLASS, DataType.STRING)); m_Report.addParameter(REPORT_CLASS, "?"); } else { m_Report.addField(new Field(REPORT_CLASS, DataType.NUMERIC)); m_Report.addParameter(REPORT_CLASS, Double.toString(inst.classValue())); } } else { m_Report.addField(new Field(REPORT_CLASS, DataType.STRING)); if (inst.classIsMissing()) m_Report.addParameter(REPORT_CLASS, "?"); else m_Report.addParameter(REPORT_CLASS, inst.stringValue(inst.classIndex())); } } // row if (index != -1) { m_Report.addField(new Field(REPORT_ROW, DataType.NUMERIC)); m_Report.addParameter(REPORT_ROW, new Double(index + 1)); } // additional attributes for (i = 0; i < additional.length; i++) { att = inst.attribute(additional[i]); fieldStr = REPORT_ADDITIONAL_PREFIX + (additional[i] + 1) + "-" + att.name(); if (att.isNumeric()) { m_Report.addField(new Field(fieldStr, DataType.NUMERIC)); m_Report.addParameter(fieldStr, inst.value(additional[i])); } else { m_Report.addField(new Field(fieldStr, DataType.STRING)); m_Report.addParameter(fieldStr, inst.stringValue(additional[i])); } } // display ID (hashcode of string representation of Instance) if (getID().length() == 0) setID("" + inst.toString().hashCode()); }
From source file:adams.data.instance.InstancePoint.java
License:Open Source License
/** * Returns a string representation of the point. * * @return the string representation */// w w w. j a va 2s.c om @Override public String toString() { String result; Attribute att; if (getParent() != null) { att = ((Instance) getParent()).getDatasetHeader().attribute(getX()); result = att.name(); result += "="; if (att.isNominal()) result += att.value(getY().intValue()); else result += getY(); } else { result = getX() + "," + getY(); } return result; }
From source file:adams.flow.sink.WekaCostBenefitAnalysis.java
License:Open Source License
/** * Plots the token (the panel and dialog have already been created at * this stage).//from w w w . ja v a2s .c o m * * @param token the token to display */ @Override protected void display(Token token) { Evaluation eval; Attribute classAtt; Attribute classAttToUse; int classValue; ThresholdCurve tc; Instances result; ArrayList<String> newNames; CostBenefitAnalysis cbAnalysis; PlotData2D tempd; boolean[] cp; int n; try { if (token.getPayload() instanceof WekaEvaluationContainer) eval = (Evaluation) ((WekaEvaluationContainer) token.getPayload()) .getValue(WekaEvaluationContainer.VALUE_EVALUATION); else eval = (Evaluation) token.getPayload(); if (eval.predictions() == null) { getLogger().severe("No predictions available from Evaluation object!"); return; } classAtt = eval.getHeader().classAttribute(); m_ClassIndex.setData(classAtt); classValue = m_ClassIndex.getIntIndex(); tc = new ThresholdCurve(); result = tc.getCurve(eval.predictions(), classValue); // Create a dummy class attribute with the chosen // class value as index 0 (if necessary). classAttToUse = eval.getHeader().classAttribute(); if (classValue != 0) { newNames = new ArrayList<>(); newNames.add(classAtt.value(classValue)); for (int k = 0; k < classAtt.numValues(); k++) { if (k != classValue) newNames.add(classAtt.value(k)); } classAttToUse = new Attribute(classAtt.name(), newNames); } // assemble plot data tempd = new PlotData2D(result); tempd.setPlotName(result.relationName()); tempd.m_alwaysDisplayPointsOfThisSize = 10; // specify which points are connected cp = new boolean[result.numInstances()]; for (n = 1; n < cp.length; n++) cp[n] = true; tempd.setConnectPoints(cp); // add plot m_CostBenefitPanel.setCurveData(tempd, classAttToUse); } catch (Exception e) { handleException("Failed to display token: " + token, e); } }
From source file:adams.flow.sink.WekaCostBenefitAnalysis.java
License:Open Source License
/** * Creates a new panel for the token.//from www.ja v a 2 s . co m * * @param token the token to display in a new panel, can be null * @return the generated panel */ public AbstractDisplayPanel createDisplayPanel(Token token) { AbstractDisplayPanel result; String name; if (token != null) name = "Cost curve (" + getEvaluation(token).getHeader().relationName() + ")"; else name = "Cost curve"; result = new AbstractComponentDisplayPanel(name) { private static final long serialVersionUID = -3513994354297811163L; protected CostBenefitAnalysis m_VisualizePanel; @Override protected void initGUI() { super.initGUI(); setLayout(new BorderLayout()); m_VisualizePanel = new CostBenefitAnalysis(); add(m_VisualizePanel, BorderLayout.CENTER); } @Override public void display(Token token) { try { Evaluation eval = getEvaluation(token); Attribute classAtt = eval.getHeader().classAttribute(); m_ClassIndex.setData(classAtt); int classValue = m_ClassIndex.getIntIndex(); ThresholdCurve tc = new ThresholdCurve(); Instances result = tc.getCurve(eval.predictions(), classValue); // Create a dummy class attribute with the chosen // class value as index 0 (if necessary). Attribute classAttToUse = eval.getHeader().classAttribute(); if (classValue != 0) { ArrayList<String> newNames = new ArrayList<>(); newNames.add(classAtt.value(classValue)); for (int k = 0; k < classAtt.numValues(); k++) { if (k != classValue) newNames.add(classAtt.value(k)); } classAttToUse = new Attribute(classAtt.name(), newNames); } // assemble plot data PlotData2D tempd = new PlotData2D(result); tempd.setPlotName(result.relationName()); tempd.m_alwaysDisplayPointsOfThisSize = 10; // specify which points are connected boolean[] cp = new boolean[result.numInstances()]; for (int n = 1; n < cp.length; n++) cp[n] = true; tempd.setConnectPoints(cp); // add plot m_VisualizePanel.setCurveData(tempd, classAttToUse); } catch (Exception e) { getLogger().log(Level.SEVERE, "Failed to display token: " + token, e); } } @Override public JComponent supplyComponent() { return m_VisualizePanel; } @Override public void clearPanel() { } public void cleanUp() { } }; if (token != null) result.display(token); return result; }
From source file:adams.flow.transformer.WekaReorderAttributesToReference.java
License:Open Source License
/** * Executes the flow item.//from w w w . jav a2 s . c om * * @return null if everything is fine, otherwise error message */ @Override protected String doExecute() { String result; Instances dataOld; Instance instOld; Instances dataNew; Instance instNew; Attribute att; int i; StringBuilder order; List<Add> adds; Add add; int index; StringBuilder labels; int n; List<Filter> filters; Reorder reorder; result = null; if (m_OnTheFly && (m_Reference == null)) { result = setUpReference(); if (result != null) return result; } dataNew = null; instNew = null; // get input data if (m_InputToken.getPayload() instanceof Instance) { instOld = (Instance) m_InputToken.getPayload(); dataOld = instOld.dataset(); } else { instOld = null; dataOld = (Instances) m_InputToken.getPayload(); } // do we need to initialize filter? if (m_InitializeOnce || (m_Reorder == null)) { // check incoming data if (!m_Lenient) { for (i = 0; i < m_Reference.numAttributes(); i++) { att = m_Reference.attribute(i); if (dataOld.attribute(att.name()) == null) { if (result == null) result = "Missing attribute(s) in incoming data: " + att.name(); else result += ", " + att.name(); } } if (result != null) getLogger().severe(result); } if (result == null) { try { // determine indices order = new StringBuilder(); adds = new ArrayList<Add>(); for (i = 0; i < m_Reference.numAttributes(); i++) { att = m_Reference.attribute(i); if (dataOld.attribute(att.name()) == null) { index = dataOld.numAttributes() + adds.size(); add = new Add(); add.setAttributeIndex("last"); add.setAttributeName(att.name()); add.setAttributeType(new SelectedTag(att.type(), Add.TAGS_TYPE)); if (att.isNominal()) { labels = new StringBuilder(); for (n = 0; n < att.numValues(); n++) { if (labels.length() > 0) labels.append(","); labels.append(att.value(n)); } add.setNominalLabels(labels.toString()); } adds.add(add); } else { index = dataOld.attribute(att.name()).index(); } if (order.length() > 0) order.append(","); order.append((index + 1)); } // build reorder filter reorder = new Reorder(); reorder.setAttributeIndices(order.toString()); // build multifilter filters = new ArrayList<Filter>(); filters.addAll(adds); filters.add(reorder); m_Reorder = new MultiFilter(); m_Reorder.setFilters(filters.toArray(new Filter[filters.size()])); // initialize filter m_Reorder.setInputFormat(dataOld); } catch (Exception e) { result = handleException("Failed to initialize reorder filter!", e); } } } // reorder data if (result == null) { try { if (instOld != null) { m_Reorder.input(instOld); m_Reorder.batchFinished(); instNew = m_Reorder.output(); if (m_KeepRelationName) instNew.dataset().setRelationName(dataOld.relationName()); } else { dataNew = Filter.useFilter(dataOld, m_Reorder); if (m_KeepRelationName) dataNew.setRelationName(dataOld.relationName()); } } catch (Exception e) { result = handleException("Failed to reorder data!", e); instNew = null; dataNew = null; } } if (instNew != null) m_OutputToken = new Token(instNew); else if (dataNew != null) m_OutputToken = new Token(dataNew); return result; }
From source file:adams.gui.visualization.instance.InstanceTableModel.java
License:Open Source License
/** * Returns the value at the given position. * * @param row the row in the table/* w ww . ja v a2s.c o m*/ * @param column the column in the table * @return the value */ public Object getValueAt(int row, int column) { Attribute att; att = getAttribute(column); if (column == 0) { return row + 1; } else if (att == null) { return ""; } else if (m_Data.instance(row).isMissing(att)) { return null; } else if (att.name().equals(ArffUtils.getDBIDName())) { return (int) m_Data.instance(row).value(att); } else if (att.name().equals(ArffUtils.getIDName())) { return m_Data.instance(row).stringValue(att).replaceAll("\'", ""); } else { switch (att.type()) { case Attribute.NUMERIC: return m_Data.instance(row).value(att); case Attribute.DATE: case Attribute.NOMINAL: case Attribute.STRING: case Attribute.RELATIONAL: return m_Data.instance(row).stringValue(att); default: return "???"; } } }
From source file:adams.gui.visualization.instance.InstanceTableModel.java
License:Open Source License
/** * Returns the class for the column.//from w w w.ja v a 2s. c o m * * @param column the column to retrieve the class for * @return the class */ public Class getColumnClass(int column) { Attribute att; att = getAttribute(column); if (column == 0) { return Integer.class; } else if (att == null) { return String.class; } else if (att.name().equals(ArffUtils.getDBIDName())) { // special case return Integer.class; } else { switch (att.type()) { case Attribute.NUMERIC: return Double.class; case Attribute.DATE: case Attribute.NOMINAL: case Attribute.STRING: case Attribute.RELATIONAL: return String.class; default: return String.class; } } }
From source file:affective.core.ArffLexiconEvaluator.java
License:Open Source License
/** * Processes all the dictionary files.// w ww. ja v a 2 s .c o m * @throws IOException an IOException will be raised if an invalid file is supplied */ public void processDict() throws IOException { BufferedReader reader = new BufferedReader(new FileReader(this.m_lexiconFile)); Instances lexInstances = new Instances(reader); // set upper value for word index lexiconWordIndex.setUpper(lexInstances.numAttributes() - 1); List<Attribute> numericAttributes = new ArrayList<Attribute>(); List<Attribute> nominalAttributes = new ArrayList<Attribute>(); // checks all numeric and nominal attributes and discards the word attribute for (int i = 0; i < lexInstances.numAttributes(); i++) { if (i != this.lexiconWordIndex.getIndex()) { if (lexInstances.attribute(i).isNumeric()) { numericAttributes.add(lexInstances.attribute(i)); // adds the attribute name to the message-level features to be calculated this.featureNames.add(this.lexiconName + "-" + lexInstances.attribute(i).name()); } else if (lexInstances.attribute(i).isNominal()) { nominalAttributes.add(lexInstances.attribute(i)); // adds the attribute name together with the nominal value to the message-level features to be calculated int numValues = lexInstances.attribute(i).numValues(); for (int j = 0; j < numValues; j++) this.featureNames.add(this.lexiconName + "-" + lexInstances.attribute(i).name() + "-" + lexInstances.attribute(i).value(j)); } } } // Maps all words with their affective scores discarding missing values for (Instance inst : lexInstances) { if (inst.attribute(this.lexiconWordIndex.getIndex()).isString()) { String word = inst.stringValue(this.lexiconWordIndex.getIndex()); // stems the word word = this.m_stemmer.stem(word); // map numeric scores if (!numericAttributes.isEmpty()) { Map<String, Double> wordVals = new HashMap<String, Double>(); for (Attribute na : numericAttributes) { if (!weka.core.Utils.isMissingValue(inst.value(na))) wordVals.put(na.name(), inst.value(na)); } this.numDict.put(word, wordVals); } // map nominal associations if (!nominalAttributes.isEmpty()) { Map<String, String> wordCounts = new HashMap<String, String>(); for (Attribute no : nominalAttributes) { if (!weka.core.Utils.isMissingValue(inst.value(no))) { wordCounts.put(no.name(), no.value((int) inst.value(no))); } this.nomDict.put(word, wordCounts); } } } } }