List of usage examples for weka.core Attribute Attribute
public Attribute(String attributeName)
From source file:DocClassifier.java
private FastVector createTerms(File[] files) { try {//from w ww . ja v a2s. c o m Set<String> termSet = new HashSet<String>(); for (File file : files) { BufferedReader reader = new BufferedReader(new FileReader(file)); Set<String> docTermSet = new HashSet<String>(); while (reader.ready()) { String line = reader.readLine(); String[] words = line.split(" "); for (String word : words) { Kelime[] kelimeler = this.zemberek.kelimeCozumle(word); if (kelimeler.length > 0) { String kok = kelimeler[0].kok().icerik(); docTermSet.add(kok); termSet.add(kok); } } } // DF for a doc for (String t : docTermSet) { Double freq = this.idfMap.get(t); this.idfMap.put(t, ((freq != null) ? (freq + 1) : 1)); } reader.close(); } //Remove some words like ve,veya,de,da,in from set termSet = PreProcesser.filterTermSet(termSet); //IDF Calculation for (String t : termSet) { Double df = this.idfMap.get(t); if (df != null) { this.idfMap.put(t, Math.log(files.length / df) / Math.log(2)); } else { this.idfMap.put(t, 0.0); } //System.out.println(t + ": " + df); } // Attribute creation //System.err.println("\nAttribute:"); FastVector terms = new FastVector(); for (String term : termSet) { terms.addElement(new Attribute(term)); // System.err.println(term + "-"); } // Class values are created Set<String> classSet = new HashSet<String>(); for (File file : files) { classSet.add(file.getName().substring(0, 3).toLowerCase()); } //System.err.println("\nClass:"); this.classValues = new FastVector(); for (String category : classSet) { this.classValues.addElement(category); // System.out.print(category + "-"); } terms.addElement(new Attribute(CLASS_ATTR_NAME, classValues)); return terms; } catch (FileNotFoundException ex) { Logger.getLogger(DocClassifier.class.getName()).log(Level.SEVERE, null, ex); } catch (IOException ex) { Logger.getLogger(DocClassifier.class.getName()).log(Level.SEVERE, null, ex); } return null; }
From source file:PrincipalComponents.java
License:Open Source License
/** * Set up the header for the PC->original space dataset * * @return the output format/* w w w . ja v a2 s . com*/ * @throws Exception if something goes wrong */ private Instances setOutputFormatOriginal() throws Exception { ArrayList<Attribute> attributes = new ArrayList<Attribute>(); for (int i = 0; i < m_numAttribs; i++) { String att = m_trainInstances.attribute(i).name(); attributes.add(new Attribute(att)); } if (m_hasClass) { attributes.add((Attribute) m_trainHeader.classAttribute().copy()); } Instances outputFormat = new Instances(m_trainHeader.relationName() + "->PC->original space", attributes, 0); // set the class to be the last attribute if necessary if (m_hasClass) { outputFormat.setClassIndex(outputFormat.numAttributes() - 1); } return outputFormat; }
From source file:PrincipalComponents.java
License:Open Source License
/** * Set the format for the transformed data * * @return a set of empty Instances (header only) in the new format * @throws Exception if the output format can't be set *//*from ww w. j a v a 2 s . co m*/ private Instances setOutputFormat() throws Exception { if (m_eigenvalues == null) { return null; } double cumulative = 0.0; ArrayList<Attribute> attributes = new ArrayList<Attribute>(); for (int i = m_numAttribs - 1; i >= 0; i--) { StringBuffer attName = new StringBuffer(); // build array of coefficients double[] coeff_mags = new double[m_numAttribs]; for (int j = 0; j < m_numAttribs; j++) { coeff_mags[j] = -Math.abs(m_eigenvectors[j][m_sortedEigens[i]]); } int num_attrs = (m_maxAttrsInName > 0) ? Math.min(m_numAttribs, m_maxAttrsInName) : m_numAttribs; // this array contains the sorted indices of the coefficients int[] coeff_inds; if (m_numAttribs > 0) { // if m_maxAttrsInName > 0, sort coefficients by decreasing // magnitude coeff_inds = Utils.sort(coeff_mags); } else { // if m_maxAttrsInName <= 0, use all coeffs in original order coeff_inds = new int[m_numAttribs]; for (int j = 0; j < m_numAttribs; j++) { coeff_inds[j] = j; } } // build final attName string for (int j = 0; j < num_attrs; j++) { double coeff_value = m_eigenvectors[coeff_inds[j]][m_sortedEigens[i]]; if (j > 0 && coeff_value >= 0) { attName.append("+"); } attName.append( Utils.doubleToString(coeff_value, 5, 3) + m_trainInstances.attribute(coeff_inds[j]).name()); } if (num_attrs < m_numAttribs) { attName.append("..."); } attributes.add(new Attribute(attName.toString())); cumulative += m_eigenvalues[m_sortedEigens[i]]; if ((cumulative / m_sumOfEigenValues) >= m_coverVariance) { break; } } if (m_hasClass) { attributes.add((Attribute) m_trainHeader.classAttribute().copy()); } Instances outputFormat = new Instances(m_trainInstances.relationName() + "_principal components", attributes, 0); // set the class to be the last attribute if necessary if (m_hasClass) { outputFormat.setClassIndex(outputFormat.numAttributes() - 1); } m_outputNumAtts = outputFormat.numAttributes(); return outputFormat; }
From source file:ArrayLoader.java
License:Open Source License
/** * Return the full data set. If the structure hasn't yet been determined * by a call to getStructure then method should do so before processing * the rest of the data set./* www. j av a 2s .c o m*/ * * @return the structure of the data set as an empty set of Instances * @exception IOException if there is no source or parsing fails */ public Instances getDataSet() throws IOException { if (m_data == null) { throw new IOException("No source has been specified"); } if (m_structure == null) { getStructure(); } m_cumulativeStructure = new FastVector(m_structure.numAttributes()); for (int i = 0; i < m_structure.numAttributes(); i++) { m_cumulativeStructure.addElement(new Hashtable()); } m_cumulativeInstances = new FastVector(); FastVector current; for (int i = 0; i < m_data.length; i++) { current = getInstance(m_data[i]); m_cumulativeInstances.addElement(current); } FastVector atts = new FastVector(m_structure.numAttributes()); for (int i = 0; i < m_structure.numAttributes(); i++) { String attname = m_structure.attribute(i).name(); Hashtable tempHash = ((Hashtable) m_cumulativeStructure.elementAt(i)); if (tempHash.size() == 0) { atts.addElement(new Attribute(attname)); } else { if (m_StringAttributes.isInRange(i)) { atts.addElement(new Attribute(attname, (FastVector) null)); } else { FastVector values = new FastVector(tempHash.size()); // add dummy objects in order to make the FastVector's size == capacity for (int z = 0; z < tempHash.size(); z++) { values.addElement("dummy"); } Enumeration e = tempHash.keys(); while (e.hasMoreElements()) { Object ob = e.nextElement(); // if (ob instanceof Double) { int index = ((Integer) tempHash.get(ob)).intValue(); String s = ob.toString(); if (s.startsWith("'") || s.startsWith("\"")) s = s.substring(1, s.length() - 1); values.setElementAt(new String(s), index); // } } atts.addElement(new Attribute(attname, values)); } } } // make the instances String relationName; relationName = "ArrayData"; Instances dataSet = new Instances(relationName, atts, m_cumulativeInstances.size()); for (int i = 0; i < m_cumulativeInstances.size(); i++) { current = ((FastVector) m_cumulativeInstances.elementAt(i)); double[] vals = new double[dataSet.numAttributes()]; for (int j = 0; j < current.size(); j++) { Object cval = current.elementAt(j); if (cval instanceof String) { if (((String) cval).compareTo(m_MissingValue) == 0) { vals[j] = Instance.missingValue(); } else { if (dataSet.attribute(j).isString()) { vals[j] = dataSet.attribute(j).addStringValue((String) cval); } else if (dataSet.attribute(j).isNominal()) { // find correct index Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j); int index = ((Integer) lookup.get(cval)).intValue(); vals[j] = index; } else { throw new IllegalStateException("Wrong attribute type at position " + (i + 1) + "!!!"); } } } else if (dataSet.attribute(j).isNominal()) { // find correct index Hashtable lookup = (Hashtable) m_cumulativeStructure.elementAt(j); int index = ((Integer) lookup.get(cval)).intValue(); vals[j] = index; } else if (dataSet.attribute(j).isString()) { vals[j] = dataSet.attribute(j).addStringValue("" + cval); } else { vals[j] = ((Double) cval).doubleValue(); } } dataSet.add(new Instance(1.0, vals)); } m_structure = new Instances(dataSet, 0); m_cumulativeStructure = null; // conserve memory return dataSet; }
From source file:ArrayLoader.java
License:Open Source License
/** * Assumes the first line of the file contains the attribute names. * Assumes all attributes are real (Reading the full data set with * getDataSet will establish the true structure). * *///from w w w . ja va 2 s.c o m private void readHeader(String[] column) throws IOException { FastVector attribNames = new FastVector(); // Assume first row of data are the column titles for (int i = 0; i < column.length; i++) { attribNames.addElement(new Attribute(column[i])); } m_structure = new Instances("DataArray", attribNames, 0); }
From source file:CJWeka.java
License:Open Source License
public Object addInstance(Object args) throws Exception { if (!(args instanceof String)) { throw new RuntimeException("Invalid type for execute"); }/* w ww .j a v a2 s. co m*/ StringBuffer retbuf = new StringBuffer(""); // function code goes in here String floatstring = (String) args; // convert floatstring to float/double array to instance String[] flostr = floatstring.split(" "); int nvalues = flostr.length; // add instance to ii if (my_attributes.isEmpty()) { // create attributes for all instances for (int j = 0; j < nvalues - 1; j++) { Attribute a = new Attribute(Integer.toString(j)); my_attributes.add(a); } classvals.add("0"); classvals.add("1"); /* classvals.add("2"); classvals.add("3"); classvals.add("4"); classvals.add("5"); classvals.add("6"); classvals.add("7");*/ Attribute cls = new Attribute("class", classvals); my_attributes.add(cls); ii = new Instances("my_instances", my_attributes, 0); } ii.setClassIndex(nvalues - 1); Instance inst = this.floatstringToInst(floatstring, ii, true); ii.add(inst); retbuf.append(ii.numInstances()); // return number of Instances in ii return retbuf.toString(); }
From source file:aaa.util.test.CreateArff.java
License:Open Source License
/** * Generates the Instances object and outputs it in ARFF format to stdout. * * @param args ignored/*from w w w . ja v a 2 s . c o m*/ * @throws Exception if generation of instances fails */ public static void main(String[] args) throws Exception { ArrayList<Attribute> atts; ArrayList<Attribute> attsRel; ArrayList<String> attVals; ArrayList<String> attValsRel; Instances data; Instances dataRel; double[] vals; double[] valsRel; int i; // 1. set up attributes atts = new ArrayList<Attribute>(); // - numeric atts.add(new Attribute("att1")); // - nominal attVals = new ArrayList<String>(); for (i = 0; i < 5; i++) attVals.add("val" + (i + 1)); atts.add(new Attribute("att2", attVals)); // - string atts.add(new Attribute("att3", (ArrayList<String>) null)); // - date atts.add(new Attribute("att4", "yyyy-MM-dd")); // - relational attsRel = new ArrayList<Attribute>(); // -- numeric attsRel.add(new Attribute("att5.1")); // -- nominal attValsRel = new ArrayList<String>(); for (i = 0; i < 5; i++) attValsRel.add("val5." + (i + 1)); attsRel.add(new Attribute("att5.2", attValsRel)); dataRel = new Instances("att5", attsRel, 0); atts.add(new Attribute("att5", dataRel, 0)); // 2. create Instances object data = new Instances("MyRelation", atts, 0); // 3. fill with data // first instance vals = new double[data.numAttributes()]; // - numeric vals[0] = Math.PI; // - nominal vals[1] = attVals.indexOf("val3"); // - string vals[2] = data.attribute(2).addStringValue("This is a string!"); // - date vals[3] = data.attribute(3).parseDate("2001-11-09"); // - relational dataRel = new Instances(data.attribute(4).relation(), 0); // -- first instance valsRel = new double[2]; valsRel[0] = Math.PI + 1; valsRel[1] = attValsRel.indexOf("val5.3"); dataRel.add(new DenseInstance(1.0, valsRel)); // -- second instance valsRel = new double[2]; valsRel[0] = Math.PI + 2; valsRel[1] = attValsRel.indexOf("val5.2"); dataRel.add(new DenseInstance(1.0, valsRel)); vals[4] = data.attribute(4).addRelation(dataRel); // add data.add(new DenseInstance(1.0, vals)); // second instance vals = new double[data.numAttributes()]; // important: needs NEW array! // - numeric vals[0] = Math.E; // - nominal vals[1] = attVals.indexOf("val1"); // - string vals[2] = data.attribute(2).addStringValue("And another one!"); // - date vals[3] = data.attribute(3).parseDate("2000-12-01"); // - relational dataRel = new Instances(data.attribute(4).relation(), 0); // -- first instance valsRel = new double[2]; valsRel[0] = Math.E + 1; valsRel[1] = attValsRel.indexOf("val5.4"); dataRel.add(new DenseInstance(1.0, valsRel)); // -- second instance valsRel = new double[2]; valsRel[0] = Math.E + 2; valsRel[1] = attValsRel.indexOf("val5.1"); dataRel.add(new DenseInstance(1.0, valsRel)); vals[4] = data.attribute(4).addRelation(dataRel); // add data.add(new DenseInstance(1.0, vals)); // 4. output data System.out.println(data); }
From source file:activeSegmentation.feature.FeatureExtraction.java
License:Open Source License
private ArrayList<Attribute> createFeatureHeader() { ArrayList<Attribute> attributes = new ArrayList<Attribute>(); for (int i = 1; i <= filterManager.getNumOfFeatures(); i++) { String attString = filterManager.getLabel(i); attributes.add(new Attribute(attString)); }/*from ww w. ja v a2 s. co m*/ return attributes; }
From source file:adams.data.conversion.ReportToWekaInstance.java
License:Open Source License
/** * Performs the actual conversion./*from w w w . j a va2 s . co m*/ * * @return the converted data * @throws Exception if something goes wrong with the conversion */ protected Object doConvert() throws Exception { Report report; Instance result; ArrayList atts; ArrayList attValues; int i; double[] values; report = (Report) m_Input; // generate header if (m_Header == null) { atts = new ArrayList(); for (i = 0; i < m_Fields.length; i++) { switch (m_Fields[i].getDataType()) { case NUMERIC: atts.add(new Attribute(m_Fields[i].getName())); break; case BOOLEAN: attValues = new ArrayList(); attValues.add("false"); attValues.add("true"); atts.add(new Attribute(m_Fields[i].getName(), attValues)); break; default: atts.add(new Attribute(m_Fields[i].getName(), (List) null)); break; } } m_Header = new Instances(getClass().getName(), atts, 0); } // generate instance values = new double[m_Header.numAttributes()]; for (i = 0; i < m_Fields.length; i++) { if (report.hasValue(m_Fields[i])) { switch (m_Fields[i].getDataType()) { case NUMERIC: values[i] = report.getDoubleValue(m_Fields[i]); break; case BOOLEAN: if (report.getBooleanValue(m_Fields[i])) values[i] = 1; else values[i] = 0; break; default: values[i] = m_Header.attribute(i).addStringValue("" + report.getValue(m_Fields[i])); break; } } else { values[i] = weka.core.Utils.missingValue(); } } result = new DenseInstance(1.0, values); result.setDataset(m_Header); return result; }
From source file:adams.data.conversion.SpreadSheetToWekaInstances.java
License:Open Source License
/** * Performs the actual conversion.//from www . ja va 2 s .c o m * * @return the converted data * @throws Exception if something goes wrong with the conversion */ @Override protected Object doConvert() throws Exception { Instances result; SpreadSheet sheet; DenseInstance inst; ArrayList<Attribute> atts; HashSet<String> unique; ArrayList<String> labels; Row row; Cell cell; int i; int n; double[] values; Collection<ContentType> types; ContentType type; boolean added; int[] classIndices; sheet = (SpreadSheet) m_Input; // create header atts = new ArrayList<>(); for (i = 0; i < sheet.getColumnCount(); i++) { added = false; types = sheet.getContentTypes(i); if (types.contains(ContentType.DOUBLE)) types.remove(ContentType.LONG); if (types.contains(ContentType.LONG)) { types.add(ContentType.DOUBLE); types.remove(ContentType.LONG); } if (types.size() == 1) { type = (ContentType) types.toArray()[0]; if (type == ContentType.DOUBLE) { atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent())); added = true; } else if (type == ContentType.DATE) { atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent(), Constants.TIMESTAMP_FORMAT)); added = true; } else if (type == ContentType.TIME) { atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent(), Constants.TIME_FORMAT)); added = true; } } if (!added) { unique = new HashSet<>(); for (n = 0; n < sheet.getRowCount(); n++) { row = sheet.getRow(n); cell = row.getCell(i); if ((cell != null) && !cell.isMissing()) unique.add(cell.getContent()); } if ((unique.size() > m_MaxLabels) || (m_MaxLabels < 1)) { atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent(), (FastVector) null)); } else { labels = new ArrayList<>(unique); Collections.sort(labels); atts.add(new Attribute(sheet.getHeaderRow().getCell(i).getContent(), labels)); } } } result = new Instances(Environment.getInstance().getProject(), atts, sheet.getRowCount()); if (sheet.hasName()) result.setRelationName(sheet.getName()); // add data for (n = 0; n < sheet.getRowCount(); n++) { row = sheet.getRow(n); values = new double[result.numAttributes()]; for (i = 0; i < result.numAttributes(); i++) { cell = row.getCell(i); values[i] = weka.core.Utils.missingValue(); if ((cell != null) && !cell.isMissing()) { if (result.attribute(i).type() == Attribute.DATE) { if (cell.isTime()) values[i] = cell.toTime().getTime(); else values[i] = cell.toDate().getTime(); } else if (result.attribute(i).isNumeric()) { values[i] = Utils.toDouble(cell.getContent()); } else if (result.attribute(i).isString()) { values[i] = result.attribute(i).addStringValue(cell.getContent()); } else { values[i] = result.attribute(i).indexOfValue(cell.getContent()); } } } inst = new DenseInstance(1.0, values); result.add(inst); } if (sheet instanceof Dataset) { classIndices = ((Dataset) sheet).getClassAttributeIndices(); if (classIndices.length > 0) result.setClassIndex(classIndices[0]); } return result; }