Example usage for weka.core Utils quote

List of usage examples for weka.core Utils quote

Introduction

In this page you can find the example usage for weka.core Utils quote.

Prototype

public staticString quote(String string) 

Source Link

Document

Quotes a string if it contains special characters.

Usage

From source file:csv.CSVParser.java

public void processCSV(boolean doPosTag) {
    try {/*from   w  w  w.  j  av  a2 s . c om*/
        Set<String> uniqueWords = new HashSet<>();
        int wordCount = 0;
        reader = new BufferedReader(new FileReader(file));
        writer = new FileWriter(outFile);
        writer.append(fileHeader);
        while ((line = reader.readLine()) != null) {
            String[] parts = line.split(separator);
            String processed = (textPreprocessing.preprocess(parts[1], doPosTag));

            //
            String[] tokenized = openNlp.tokenize(processed, false);
            wordCount += tokenized.length;
            for (String string : tokenized) {
                uniqueWords.add(string);
            }
            writer.append(parts[0].substring(1, parts[0].length() - 1));
            writer.append(',');

            //Sredi ovo, ne moze ovako glupavo..
            if (processed.charAt(processed.length() - 1) == ' ') {
                processed = processed.substring(0, processed.length() - 1);
            }
            if (processed.charAt(0) == ' ') {
                processed = processed.substring(1);
            }

            String escaped = Utils.quote(processed);
            writer.append(escaped);
            writer.append("\n");
        }

        System.out.println("Words total: " + wordCount);
        System.out.println("Unique words: " + uniqueWords.size());

    } catch (FileNotFoundException ex) {
        ex.printStackTrace();
    } catch (IOException ex) {
        ex.printStackTrace();
    } finally {
        if (reader != null) {
            try {
                reader.close();
            } catch (IOException ex) {
                ex.printStackTrace();
            }
        }
        if (writer != null) {
            try {
                writer.flush();
                writer.close();
            } catch (IOException ex) {
                ex.printStackTrace();
            }
        }
    }
}

From source file:edu.stanford.rsl.conrad.segmentation.GridFeatureExtractor.java

License:Open Source License

public void saveInstances(String s) throws IOException {
    if (Configuration.getGlobalConfiguration().getRegistryEntry(RegKeys.CLASSIFIER_DATA_LOCATION) != null) {
        BufferedWriter bw = new BufferedWriter(new FileWriter(
                Configuration.getGlobalConfiguration().getRegistryEntry(RegKeys.CLASSIFIER_DATA_LOCATION) + "_"
                        + s));/*  w w  w.  j av a2 s.com*/
        System.out.println("Saving: " + s);

        //bw.write(getInstances().toString());

        Instances inst = getInstances();
        StringBuffer text = new StringBuffer();

        text.append("@relation").append(" ").append(Utils.quote("testing")).append("\n\n");
        for (int i = 0; i < inst.numAttributes(); i++) {
            text.append(inst.attribute(i)).append("\n");
        }
        text.append("\n").append("@data").append("\n");
        bw.write(text.toString());

        for (int i = 0; i < inst.numInstances(); i++) {
            text = new StringBuffer();
            text.append(inst.instance(i));
            if (i < inst.numInstances() - 1) {
                text.append('\n');
            }
            bw.write(text.toString());
        }
        bw.flush();
        bw.close();
        System.out.println("Done.");
    }
}

From source file:milk.core.Exemplars.java

License:Open Source License

/**
 * Returns the exemplars as a string. //from w  w  w  . ja  v a  2  s . c o m
 * It only shows each exemplar's ID value, class value and weight
 * as well as the ARFF header of the dataset 
 *
 * @return the set of exemplars as a string
 */
public final String toString() {

    StringBuffer text = new StringBuffer();
    text.append("@relation " + Utils.quote(m_RelationName) + "\n\n");
    for (int i = 0; i < m_Attributes.length; i++) {
        text.append(m_Attributes[i]);
        if (i == m_IdIndex)
            text.append(" (ID Attribute)");
        else if (i == m_ClassIndex)
            text.append(" (Class Attribute)");

        //text.append("\n");
    }

    Attribute id = idAttribute();
    Attribute cl = classAttribute();
    text.append("\n@Exemplars: \nID(" + id.name() + "); Class(" + cl.name()
            + "); Weight; sumOfInstances'Weights\n");

    double[] weights = sumsOfWeights();
    for (int j = 0; j < m_Exemplars.size(); j++) {
        Exemplar eg = (Exemplar) m_Exemplars.elementAt(j);
        text.append(id.value((int) eg.idValue()) + "; " + cl.value((int) eg.classValue()) + "; " + eg.weight()
                + "; " + weights[j] + "\n");
    }
    text.append("There are totally " + numExemplars() + " exemplars");
    return text.toString();
}

From source file:milk.experiment.MICSVResultListener.java

License:Open Source License

/**
 * Just prints out each result as it is received.
 *
 * @param rp the MIResultProducer that generated the result
 * @param key The key for the results./*from  w w  w .  jav  a2s .  c o m*/
 * @param result The actual results.
 * @exception Exception if the result could not be accepted.
 */
public void acceptResult(MIResultProducer rp, Object[] key, Object[] result) throws Exception {

    if (m_RP != rp) {
        throw new Error("Unrecognized MIResultProducer sending results!!");
    }
    for (int i = 0; i < key.length; i++) {
        if (i != 0) {
            m_Out.print(',');
        }
        if (key[i] == null) {
            m_Out.print("?");
        } else {
            m_Out.print(Utils.quote(key[i].toString()));
        }
    }
    for (int i = 0; i < result.length; i++) {
        m_Out.print(',');
        if (result[i] == null) {
            m_Out.print("?");
        } else {
            m_Out.print(Utils.quote(result[i].toString()));
        }
    }
    m_Out.println("");
}

From source file:org.pentaho.di.arff.ArffOutputData.java

License:Open Source License

/**
 * Format a Kettle value for writing.//from   ww  w .  java2  s  . co m
 * 
 * @param index the index of the value to format
 * @param v <code>ValueMetaInterface</code> for the field in question
 * @param value the actual value
 * @param encoding an (optional) character encoding
 * @return the formatted value as an array of bytes
 * @exception KettleValueException if an error occurs
 */
private byte[] formatField(int index, ValueMetaInterface v, Object value, String encoding)
        throws KettleValueException {

    // Check for missing value (null or empty string)
    // This seems to only consider empty string ("")
    // to be a null/missing value if the actual type
    // is String; for other types it returns false if
    // the value is "" (Kettle 3.0).
    if (v.isNull(value)) {
        return m_missing;
    }

    if (m_arffMeta[index].getArffType() == ArffMeta.NOMINAL
            || m_arffMeta[index].getArffType() == ArffMeta.STRING) {
        String svalue = (value instanceof String) ? (String) value : v.getString(value);

        if (m_arffMeta[index].getArffType() == ArffMeta.NOMINAL) {
            // check to see if we've seen this value before, if not
            // then update the hash tree. Note that we enclose in
            // quotes (if necessary) *after* inserting into the
            // hash table so that the header values are kept in
            // sorted order in the situation when there are
            // a mixture of values that need quoting and those
            // that do not.
            if (!m_nominalVals[index].containsKey(svalue)) {
                m_nominalVals[index].put(svalue, svalue);
            }
        }
        svalue = Utils.quote(svalue);

        return convertStringToBinaryString(encoding, Const.trimToType(svalue, v.getTrimType()));
    } else if (m_arffMeta[index].getArffType() == ArffMeta.DATE) {
        // isNull bug workaround
        String temp = v.getString(value);
        if (temp == null || temp.length() == 0) {
            return m_missing;
        }

        temp = Utils.quote(temp);
        return convertStringToBinaryString(encoding, Const.trimToType(temp, v.getTrimType()));
    } else if (m_arffMeta[index].getKettleType() == ValueMetaInterface.TYPE_BOOLEAN) {
        // isNull bug workaround
        String temp = v.getString(value);
        if (temp == null || temp.length() == 0) {
            return m_missing;
        }

        if (v.getBoolean(value)) {
            temp = "1";
        } else {
            temp = "0";
        }
        return convertStringToBinaryString(encoding, Const.trimToType(temp, v.getTrimType()));
    } else {
        // isNull bug workaround
        String temp = v.getString(value);
        if (temp == null || temp.length() == 0) {
            return m_missing;
        }
        return v.getBinaryString(value);
    }
}

From source file:org.pentaho.di.arff.ArffOutputData.java

License:Open Source License

/**
 * Writes the ARFF header and appends the temporary file
 * /*from   www  .j a  v a2  s  .  c om*/
 * @param relationName the ARFF relation name
 * @param encoding an (optional) character encoding
 * @exception KettleStepException if an error occurs
 */
public void finishOutput(String relationName, String encoding) throws KettleStepException {

    if (m_headerOut == null) {
        // can't do anything
        return;
    }

    relationName = Utils.quote(relationName);
    relationName = "@relation " + relationName;
    byte[] rn = null;
    byte[] atAtt = null;
    byte[] atData = null;
    if (m_hasEncoding && encoding != null) {
        if (Const.isEmpty(encoding)) {
            rn = relationName.getBytes();
            atAtt = "@attribute ".getBytes();
            atData = "@data".getBytes();
        } else {
            try {
                rn = relationName.getBytes(encoding);
                atAtt = "@attribute ".getBytes(encoding);
                atData = "@data".getBytes(encoding);
            } catch (UnsupportedEncodingException e) {
                throw new KettleStepException(
                        "Unable to write header with " + "specified string encoding [" + encoding + "]", e);
            }
        }
    } else {
        rn = relationName.getBytes();
        atAtt = "@attribute ".getBytes();
        atData = "@data".getBytes();
    }

    try {
        // write the header
        m_headerOut.write(rn);
        m_headerOut.write(m_newLine);

        // now write the attributes
        for (int i = 0; i < m_outputFieldIndexes.length; i++) {
            if (m_outputFieldIndexes[i] >= 0) {
                if (m_arffMeta[i].getArffType() == ArffMeta.NOMINAL) {
                    m_headerOut.write(atAtt);
                    writeBinaryNominalAttString(i, encoding);
                } else if (m_arffMeta[i].getArffType() == ArffMeta.STRING) {
                    m_headerOut.write(atAtt);
                    writeBinaryStringAttString(i, encoding);
                } else if (m_arffMeta[i].getArffType() == ArffMeta.NUMERIC) {
                    m_headerOut.write(atAtt);
                    writeBinaryNumericAttString(i, encoding);
                } else {
                    m_headerOut.write(atAtt);
                    writeBinaryDateAttString(i, encoding);
                }
            }
        }

        m_headerOut.write(atData);
        m_headerOut.write(m_newLine);

        m_dataOut.flush();
        m_dataOut.close();
    } catch (IOException ex) {
        throw new KettleStepException("Problem writing values to " + "file.", ex);
    } finally {
        try {
            closeFiles();
        } catch (IOException ex) {
            throw new KettleStepException("Problem closing files...", ex);
        }
    }

    // now append the temporary file to the header file
    InputStream is = null;
    OutputStream os = null;
    try {
        is = new FileInputStream(m_tempFile);
        // open the header file for appending
        os = new FileOutputStream(m_headerFile, true);

        while (true) {
            synchronized (m_buffer) {
                int amountRead = is.read(m_buffer);
                if (amountRead == -1) {
                    break;
                }
                os.write(m_buffer, 0, amountRead);
            }
        }
    } catch (IOException ex) {
        throw new KettleStepException("Problem copying temp file", ex);
    } finally {
        try {
            if (is != null) {
                is.close();
                // Try and clean up by deleting the temp file
                m_tempFile.delete();
            }
            if (os != null) {
                os.close();
            }
        } catch (IOException ex) {
            throw new KettleStepException("Problem closing files...", ex);
        }
    }
}

From source file:org.pentaho.di.arff.ArffOutputData.java

License:Open Source License

/**
 * Writes an attribute declaration for a numeric attribute
 * //  w ww.  ja  va2  s  .  c  o  m
 * @param index the index of the attribute/field
 * @param encoding an (optional) character encoding
 * @exception IOException if an error occurs
 * @exception KettleStepException if an error occurs
 */
private void writeBinaryNumericAttString(int index, String encoding) throws IOException, KettleStepException {
    byte[] attName = null;
    byte[] attType = null;

    if (m_hasEncoding && encoding != null) {
        if (Const.isEmpty(encoding)) {
            attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes();
            attType = " numeric".getBytes();
        } else {
            try {
                attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes(encoding);
                attType = " numeric".getBytes(encoding);
            } catch (UnsupportedEncodingException e) {
                throw new KettleStepException(
                        "Unable to write header with " + "specified string encoding [" + encoding + "]", e);
            }
        }
    } else {
        attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes();
        attType = " numeric".getBytes();
    }

    m_headerOut.write(attName);
    m_headerOut.write(attType);
    m_headerOut.write(m_newLine);
}

From source file:org.pentaho.di.arff.ArffOutputData.java

License:Open Source License

private void writeBinaryStringAttString(int index, String encoding) throws IOException, KettleStepException {
    byte[] attName = null;
    byte[] attType = null;

    if (m_hasEncoding && encoding != null) {
        if (Const.isEmpty(encoding)) {
            attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes();
            attType = " string".getBytes();
        } else {/*from  w  w w . j  a  v  a  2s .  co m*/
            try {
                attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes(encoding);
                attType = " string".getBytes(encoding);
            } catch (UnsupportedEncodingException e) {
                throw new KettleStepException(
                        "Unable to write header with " + "specified string encoding [" + encoding + "]", e);
            }
        }
    } else {
        attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes();
        attType = " string".getBytes();
    }

    m_headerOut.write(attName);
    m_headerOut.write(attType);
    m_headerOut.write(m_newLine);
}

From source file:org.pentaho.di.arff.ArffOutputData.java

License:Open Source License

/**
 * Writes an attribute declaration for a date attribute
 * //  ww  w .  ja  v  a  2 s .c o m
 * @param index the index of the attribute/field
 * @param encoding an (optional) character encoding
 * @exception IOException if an error occurs
 * @exception KettleStepException if an error occurs
 */
private void writeBinaryDateAttString(int index, String encoding) throws IOException, KettleStepException {
    byte[] attName = null;
    byte[] attType = null;
    byte[] dateFormat = null;

    ValueMetaInterface v = m_outputRowMeta.getValueMeta(m_outputFieldIndexes[index]);
    String dateF = v.getDateFormat().toPattern();
    dateF = Utils.quote(dateF);

    if (m_hasEncoding && encoding != null) {
        if (Const.isEmpty(encoding)) {
            attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes();
            attType = " date ".getBytes();
            dateFormat = dateF.getBytes();
        } else {
            try {
                attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes(encoding);
                attType = " date ".getBytes(encoding);
                dateFormat = dateF.getBytes(encoding);
            } catch (UnsupportedEncodingException e) {
                throw new KettleStepException(
                        "Unable to write header with " + "specified string encoding [" + encoding + "]", e);
            }
        }
    } else {
        attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes();
        attType = " date ".getBytes();
        dateFormat = dateF.getBytes();
    }

    m_headerOut.write(attName);
    m_headerOut.write(attType);
    m_headerOut.write(dateFormat);
    m_headerOut.write(m_newLine);
}

From source file:org.pentaho.di.arff.ArffOutputData.java

License:Open Source License

/**
 * Writes an attribute declaration for a nominal attribute
 * //from   w w  w  . ja  va2 s. c  o m
 * @param index the index of the attribute/field
 * @param encoding an (optional) character encoding
 * @exception IOException if an error occurs
 * @exception KettleStepException if an error occurs
 */
private void writeBinaryNominalAttString(int index, String encoding) throws IOException, KettleStepException {
    byte[] attName = null;
    byte[] lcurly = null;
    byte[] rcurly = null;

    if (m_hasEncoding && encoding != null) {
        if (Const.isEmpty(encoding)) {
            attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes();
            lcurly = m_spaceLeftCurly;
            rcurly = m_rightCurly;
        } else {
            try {
                attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes(encoding);
                lcurly = " {".getBytes(encoding);
                rcurly = "}".getBytes(encoding);
            } catch (UnsupportedEncodingException e) {
                throw new KettleStepException(
                        "Unable to write header with " + "specified string encoding [" + encoding + "]", e);
            }
        }
    } else {
        attName = Utils.quote(m_arffMeta[index].getFieldName()).getBytes();
        lcurly = m_spaceLeftCurly;
        rcurly = m_rightCurly;
    }
    m_headerOut.write(attName);
    m_headerOut.write(lcurly);

    // get keys from corresponding hash tree
    Set<String> keySet = m_nominalVals[index].keySet();
    Iterator<String> ksi = keySet.iterator();

    byte[] nomVal = null;
    while (ksi.hasNext()) {
        String next = ksi.next();

        next = Utils.quote(next);
        if (m_hasEncoding && encoding != null) {
            if (Const.isEmpty(encoding)) {
                nomVal = next.getBytes();
            } else {
                nomVal = next.getBytes(encoding);
            }
        } else {
            nomVal = next.getBytes();
        }

        m_headerOut.write(nomVal);
        if (ksi.hasNext()) {
            m_headerOut.write(m_separator);
        }
    }

    m_headerOut.write(rcurly);
    m_headerOut.write(m_newLine);
}