List of usage examples for weka.core Utils quote
public staticString quote(String string)
From source file:csv.CSVParser.java
public void processCSV(boolean doPosTag) { try {/*from w w w. j av a2 s . c om*/ Set<String> uniqueWords = new HashSet<>(); int wordCount = 0; reader = new BufferedReader(new FileReader(file)); writer = new FileWriter(outFile); writer.append(fileHeader); while ((line = reader.readLine()) != null) { String[] parts = line.split(separator); String processed = (textPreprocessing.preprocess(parts[1], doPosTag)); // String[] tokenized = openNlp.tokenize(processed, false); wordCount += tokenized.length; for (String string : tokenized) { uniqueWords.add(string); } writer.append(parts[0].substring(1, parts[0].length() - 1)); writer.append(','); //Sredi ovo, ne moze ovako glupavo.. if (processed.charAt(processed.length() - 1) == ' ') { processed = processed.substring(0, processed.length() - 1); } if (processed.charAt(0) == ' ') { processed = processed.substring(1); } String escaped = Utils.quote(processed); writer.append(escaped); writer.append("\n"); } System.out.println("Words total: " + wordCount); System.out.println("Unique words: " + uniqueWords.size()); } catch (FileNotFoundException ex) { ex.printStackTrace(); } catch (IOException ex) { ex.printStackTrace(); } finally { if (reader != null) { try { reader.close(); } catch (IOException ex) { ex.printStackTrace(); } } if (writer != null) { try { writer.flush(); writer.close(); } catch (IOException ex) { ex.printStackTrace(); } } } }
From source file:edu.stanford.rsl.conrad.segmentation.GridFeatureExtractor.java
License:Open Source License
public void saveInstances(String s) throws IOException { if (Configuration.getGlobalConfiguration().getRegistryEntry(RegKeys.CLASSIFIER_DATA_LOCATION) != null) { BufferedWriter bw = new BufferedWriter(new FileWriter( Configuration.getGlobalConfiguration().getRegistryEntry(RegKeys.CLASSIFIER_DATA_LOCATION) + "_" + s));/* w w w. j av a2 s.com*/ System.out.println("Saving: " + s); //bw.write(getInstances().toString()); Instances inst = getInstances(); StringBuffer text = new StringBuffer(); text.append("@relation").append(" ").append(Utils.quote("testing")).append("\n\n"); for (int i = 0; i < inst.numAttributes(); i++) { text.append(inst.attribute(i)).append("\n"); } text.append("\n").append("@data").append("\n"); bw.write(text.toString()); for (int i = 0; i < inst.numInstances(); i++) { text = new StringBuffer(); text.append(inst.instance(i)); if (i < inst.numInstances() - 1) { text.append('\n'); } bw.write(text.toString()); } bw.flush(); bw.close(); System.out.println("Done."); } }
From source file:milk.core.Exemplars.java
License:Open Source License
/** * Returns the exemplars as a string. //from w w w . ja v a 2 s . c o m * It only shows each exemplar's ID value, class value and weight * as well as the ARFF header of the dataset * * @return the set of exemplars as a string */ public final String toString() { StringBuffer text = new StringBuffer(); text.append("@relation " + Utils.quote(m_RelationName) + "\n\n"); for (int i = 0; i < m_Attributes.length; i++) { text.append(m_Attributes[i]); if (i == m_IdIndex) text.append(" (ID Attribute)"); else if (i == m_ClassIndex) text.append(" (Class Attribute)"); //text.append("\n"); } Attribute id = idAttribute(); Attribute cl = classAttribute(); text.append("\n@Exemplars: \nID(" + id.name() + "); Class(" + cl.name() + "); Weight; sumOfInstances'Weights\n"); double[] weights = sumsOfWeights(); for (int j = 0; j < m_Exemplars.size(); j++) { Exemplar eg = (Exemplar) m_Exemplars.elementAt(j); text.append(id.value((int) eg.idValue()) + "; " + cl.value((int) eg.classValue()) + "; " + eg.weight() + "; " + weights[j] + "\n"); } text.append("There are totally " + numExemplars() + " exemplars"); return text.toString(); }
From source file:milk.experiment.MICSVResultListener.java
License:Open Source License
/** * Just prints out each result as it is received. * * @param rp the MIResultProducer that generated the result * @param key The key for the results./*from w w w . jav a2s . c o m*/ * @param result The actual results. * @exception Exception if the result could not be accepted. */ public void acceptResult(MIResultProducer rp, Object[] key, Object[] result) throws Exception { if (m_RP != rp) { throw new Error("Unrecognized MIResultProducer sending results!!"); } for (int i = 0; i < key.length; i++) { if (i != 0) { m_Out.print(','); } if (key[i] == null) { m_Out.print("?"); } else { m_Out.print(Utils.quote(key[i].toString())); } } for (int i = 0; i < result.length; i++) { m_Out.print(','); if (result[i] == null) { m_Out.print("?"); } else { m_Out.print(Utils.quote(result[i].toString())); } } m_Out.println(""); }
From source file:org.pentaho.di.arff.ArffOutputData.java
License:Open Source License
/** * Format a Kettle value for writing.//from ww w . java2 s . co m * * @param index the index of the value to format * @param v <code>ValueMetaInterface</code> for the field in question * @param value the actual value * @param encoding an (optional) character encoding * @return the formatted value as an array of bytes * @exception KettleValueException if an error occurs */ private byte[] formatField(int index, ValueMetaInterface v, Object value, String encoding) throws KettleValueException { // Check for missing value (null or empty string) // This seems to only consider empty string ("") // to be a null/missing value if the actual type // is String; for other types it returns false if // the value is "" (Kettle 3.0). if (v.isNull(value)) { return m_missing; } if (m_arffMeta[index].getArffType() == ArffMeta.NOMINAL || m_arffMeta[index].getArffType() == ArffMeta.STRING) { String svalue = (value instanceof String) ? (String) value : v.getString(value); if (m_arffMeta[index].getArffType() == ArffMeta.NOMINAL) { // check to see if we've seen this value before, if not // then update the hash tree. Note that we enclose in // quotes (if necessary) *after* inserting into the // hash table so that the header values are kept in // sorted order in the situation when there are // a mixture of values that need quoting and those // that do not. if (!m_nominalVals[index].containsKey(svalue)) { m_nominalVals[index].put(svalue, svalue); } } svalue = Utils.quote(svalue); return convertStringToBinaryString(encoding, Const.trimToType(svalue, v.getTrimType())); } else if (m_arffMeta[index].getArffType() == ArffMeta.DATE) { // isNull bug workaround String temp = v.getString(value); if (temp == null || temp.length() == 0) { return m_missing; } temp = Utils.quote(temp); return convertStringToBinaryString(encoding, Const.trimToType(temp, v.getTrimType())); } else if (m_arffMeta[index].getKettleType() == ValueMetaInterface.TYPE_BOOLEAN) { // isNull bug workaround String temp = v.getString(value); if (temp == null || temp.length() == 0) { return m_missing; } if (v.getBoolean(value)) { temp = "1"; } else { temp = "0"; } return convertStringToBinaryString(encoding, Const.trimToType(temp, v.getTrimType())); } else { // isNull bug workaround String temp = v.getString(value); if (temp == null || temp.length() == 0) { return m_missing; } return v.getBinaryString(value); } }
From source file:org.pentaho.di.arff.ArffOutputData.java
License:Open Source License
/** * Writes the ARFF header and appends the temporary file * /*from www .j a v a2 s . c om*/ * @param relationName the ARFF relation name * @param encoding an (optional) character encoding * @exception KettleStepException if an error occurs */ public void finishOutput(String relationName, String encoding) throws KettleStepException { if (m_headerOut == null) { // can't do anything return; } relationName = Utils.quote(relationName); relationName = "@relation " + relationName; byte[] rn = null; byte[] atAtt = null; byte[] atData = null; if (m_hasEncoding && encoding != null) { if (Const.isEmpty(encoding)) { rn = relationName.getBytes(); atAtt = "@attribute ".getBytes(); atData = "@data".getBytes(); } else { try { rn = relationName.getBytes(encoding); atAtt = "@attribute ".getBytes(encoding); atData = "@data".getBytes(encoding); } catch (UnsupportedEncodingException e) { throw new KettleStepException( "Unable to write header with " + "specified string encoding [" + encoding + "]", e); } } } else { rn = relationName.getBytes(); atAtt = "@attribute ".getBytes(); atData = "@data".getBytes(); } try { // write the header m_headerOut.write(rn); m_headerOut.write(m_newLine); // now write the attributes for (int i = 0; i < m_outputFieldIndexes.length; i++) { if (m_outputFieldIndexes[i] >= 0) { if (m_arffMeta[i].getArffType() == ArffMeta.NOMINAL) { m_headerOut.write(atAtt); writeBinaryNominalAttString(i, encoding); } else if (m_arffMeta[i].getArffType() == ArffMeta.STRING) { m_headerOut.write(atAtt); writeBinaryStringAttString(i, encoding); } else if (m_arffMeta[i].getArffType() == ArffMeta.NUMERIC) { m_headerOut.write(atAtt); writeBinaryNumericAttString(i, encoding); } else { m_headerOut.write(atAtt); writeBinaryDateAttString(i, encoding); } } } m_headerOut.write(atData); m_headerOut.write(m_newLine); m_dataOut.flush(); m_dataOut.close(); } catch (IOException ex) { throw new KettleStepException("Problem writing values to " + "file.", ex); } finally { try { closeFiles(); } catch (IOException ex) { throw new KettleStepException("Problem closing files...", ex); } } // now append the temporary file to the header file InputStream is = null; OutputStream os = null; try { is = new FileInputStream(m_tempFile); // open the header file for appending os = new FileOutputStream(m_headerFile, true); while (true) { synchronized (m_buffer) { int amountRead = is.read(m_buffer); if (amountRead == -1) { break; } os.write(m_buffer, 0, amountRead); } } } catch (IOException ex) { throw new KettleStepException("Problem copying temp file", ex); } finally { try { if (is != null) { is.close(); // Try and clean up by deleting the temp file m_tempFile.delete(); } if (os != null) { os.close(); } } catch (IOException ex) { throw new KettleStepException("Problem closing files...", ex); } } }
From source file:org.pentaho.di.arff.ArffOutputData.java
License:Open Source License
/** * Writes an attribute declaration for a numeric attribute * // w ww. ja va2 s . c o m * @param index the index of the attribute/field * @param encoding an (optional) character encoding * @exception IOException if an error occurs * @exception KettleStepException if an error occurs */ private void writeBinaryNumericAttString(int index, String encoding) throws IOException, KettleStepException { byte[] attName = null; byte[] attType = null; if (m_hasEncoding && encoding != null) { if (Const.isEmpty(encoding)) { attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes(); attType = " numeric".getBytes(); } else { try { attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes(encoding); attType = " numeric".getBytes(encoding); } catch (UnsupportedEncodingException e) { throw new KettleStepException( "Unable to write header with " + "specified string encoding [" + encoding + "]", e); } } } else { attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes(); attType = " numeric".getBytes(); } m_headerOut.write(attName); m_headerOut.write(attType); m_headerOut.write(m_newLine); }
From source file:org.pentaho.di.arff.ArffOutputData.java
License:Open Source License
private void writeBinaryStringAttString(int index, String encoding) throws IOException, KettleStepException { byte[] attName = null; byte[] attType = null; if (m_hasEncoding && encoding != null) { if (Const.isEmpty(encoding)) { attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes(); attType = " string".getBytes(); } else {/*from w w w . j a v a 2s . co m*/ try { attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes(encoding); attType = " string".getBytes(encoding); } catch (UnsupportedEncodingException e) { throw new KettleStepException( "Unable to write header with " + "specified string encoding [" + encoding + "]", e); } } } else { attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes(); attType = " string".getBytes(); } m_headerOut.write(attName); m_headerOut.write(attType); m_headerOut.write(m_newLine); }
From source file:org.pentaho.di.arff.ArffOutputData.java
License:Open Source License
/** * Writes an attribute declaration for a date attribute * // ww w . ja v a 2 s .c o m * @param index the index of the attribute/field * @param encoding an (optional) character encoding * @exception IOException if an error occurs * @exception KettleStepException if an error occurs */ private void writeBinaryDateAttString(int index, String encoding) throws IOException, KettleStepException { byte[] attName = null; byte[] attType = null; byte[] dateFormat = null; ValueMetaInterface v = m_outputRowMeta.getValueMeta(m_outputFieldIndexes[index]); String dateF = v.getDateFormat().toPattern(); dateF = Utils.quote(dateF); if (m_hasEncoding && encoding != null) { if (Const.isEmpty(encoding)) { attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes(); attType = " date ".getBytes(); dateFormat = dateF.getBytes(); } else { try { attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes(encoding); attType = " date ".getBytes(encoding); dateFormat = dateF.getBytes(encoding); } catch (UnsupportedEncodingException e) { throw new KettleStepException( "Unable to write header with " + "specified string encoding [" + encoding + "]", e); } } } else { attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes(); attType = " date ".getBytes(); dateFormat = dateF.getBytes(); } m_headerOut.write(attName); m_headerOut.write(attType); m_headerOut.write(dateFormat); m_headerOut.write(m_newLine); }
From source file:org.pentaho.di.arff.ArffOutputData.java
License:Open Source License
/** * Writes an attribute declaration for a nominal attribute * //from w w w . ja va2 s. c o m * @param index the index of the attribute/field * @param encoding an (optional) character encoding * @exception IOException if an error occurs * @exception KettleStepException if an error occurs */ private void writeBinaryNominalAttString(int index, String encoding) throws IOException, KettleStepException { byte[] attName = null; byte[] lcurly = null; byte[] rcurly = null; if (m_hasEncoding && encoding != null) { if (Const.isEmpty(encoding)) { attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes(); lcurly = m_spaceLeftCurly; rcurly = m_rightCurly; } else { try { attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes(encoding); lcurly = " {".getBytes(encoding); rcurly = "}".getBytes(encoding); } catch (UnsupportedEncodingException e) { throw new KettleStepException( "Unable to write header with " + "specified string encoding [" + encoding + "]", e); } } } else { attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes(); lcurly = m_spaceLeftCurly; rcurly = m_rightCurly; } m_headerOut.write(attName); m_headerOut.write(lcurly); // get keys from corresponding hash tree Set<String> keySet = m_nominalVals[index].keySet(); Iterator<String> ksi = keySet.iterator(); byte[] nomVal = null; while (ksi.hasNext()) { String next = ksi.next(); next = Utils.quote(next); if (m_hasEncoding && encoding != null) { if (Const.isEmpty(encoding)) { nomVal = next.getBytes(); } else { nomVal = next.getBytes(encoding); } } else { nomVal = next.getBytes(); } m_headerOut.write(nomVal); if (ksi.hasNext()) { m_headerOut.write(m_separator); } } m_headerOut.write(rcurly); m_headerOut.write(m_newLine); }