Example usage for org.apache.hadoop.io BytesWritable toString

Introduction

In this page you can find the example usage for org.apache.hadoop.io BytesWritable toString.

Prototype

@Override
public String toString()

Source Link

Document

Generate the stream of bytes as hex pairs separated by ' '.

Usage

From source file:com.cloudera.recordservice.examples.terasort.TeraValidate.java

License:Apache License

private static String textifyBytes(Text t) {
    BytesWritable b = new BytesWritable();
    b.set(t.getBytes(), 0, t.getLength());
    return b.toString();
}

From source file:org.opensextant.mapreduce.GeoTaggerMapper.java

License:Apache License

/**
 * /*from w w  w  .j a  va2  s .  c om*/
 */
@Override
public void map(BytesWritable key, Text textRecord, Context context) throws IOException, InterruptedException {
    String text = null;
    HashSet<String> dedup = new HashSet<>();

    try {
        JSONObject obj = JSONObject.fromObject(textRecord.toString());
        if (!obj.containsKey("text")) {
            return;
        }
        String text_id = key.toString();
        text = obj.getString("text");
        TextInput textObj = new TextInput(text_id, text);
        textObj.langid = "en";
        /* LANG ID = 'ENGLISH',
         * If this is not true, then you need to add LangID to your metadata or detect it live
         */

        List<TextMatch> matches = geocoder.extract(textObj);

        if (matches.isEmpty()) {
            return;
        }

        /* NORMALIZE findings.
         * Reduce all matches, minimizing duplicates, removing whitespace, etc.
         *
         */
        int filtered = 0, duplicates = 0;
        for (TextMatch tm : matches) {

            //                if (filterCrap(tm.getText())) {
            //                    filtered += 1;
            //                    continue;
            //                }
            if (dedup.contains(tm.getText())) {
                duplicates += 1;
                continue;
            }
            dedup.add(tm.getText());
            JSONObject o = match2JSON(tm);
            Text matchOutput = new Text(o.toString());
            context.write(NullWritable.get(), matchOutput);
        }
        if (log.isTraceEnabled()) {
            log.trace("For key " + new String(key.getBytes(), StandardCharsets.UTF_8) + " found "
                    + matches.size() + ", filtered: " + filtered + " as junk, " + duplicates + " duplicates.");
        }
    } catch (Exception err) {
        log.error("Error running geotagger", err);
    }
}

From source file:org.opensextant.mapreduce.KeywordTaggerMapper.java

License:Apache License

/**
 * /*from ww  w .j  ava 2 s. co  m*/
 */
@Override
public void map(BytesWritable key, Text textRecord, Context context) throws IOException, InterruptedException {
    ++counter;
    String text = null;
    HashSet<String> dedup = new HashSet<>();

    try {
        JSONObject obj = JSONObject.fromObject(textRecord.toString());
        if (!obj.containsKey("text")) {
            return;
        }
        String text_id = key.toString();
        text = obj.getString("text");
        TextInput textObj = new TextInput(text_id, text);
        textObj.langid = "en";
        /* LANG ID = 'ENGLISH', 
         * If this is not true, then you need to add LangID to your metadata or detect it live 
         */

        /*
         * Testing to see if XTax tagger operates in Hadoop job
         */
        List<TextMatch> matches = xtax.extract(textObj);

        if (matches.isEmpty()) {
            return;
        }

        /* NORMALIZE findings.
         * Reduce all matches, minimizing duplicates, removing whitespace, etc.
         * 
         */
        int filtered = 0, duplicates = 0;
        for (TextMatch tm : matches) {
            if (filterCrap(tm.getText())) {
                filtered += 1;
                continue;
            }
            if (dedup.contains(tm.getText())) {
                duplicates += 1;
                continue;
            }
            dedup.add(tm.getText());
            JSONObject o = match2JSON(tm);
            Text matchOutput = new Text(o.toString());
            context.write(NullWritable.get(), matchOutput);
        }
        if (log.isTraceEnabled()) {
            log.trace("For key " + new String(key.getBytes(), StandardCharsets.UTF_8) + " found "
                    + matches.size() + ", filtered: " + filtered + " as junk, " + duplicates + " duplicates.");
        }
    } catch (Exception err) {
        log.error("Error running xtax", err);
        // System.exit(-1);
    }
}

From source file:protobuf.examples.ProtobufMapper.java

License:Open Source License

public void map(LongWritable key, BytesWritable value, OutputCollector<Text, IntWritable> output,
        Reporter reporter) throws IOException {

    LOG.info("In Mapper Get Data: " + value.toString());

    int bufferSize = value.getLength();
    byte buffer[] = new byte[bufferSize];
    System.arraycopy(value.getBytes(), 0, buffer, 0, bufferSize);

    output.collect(new Text("msg.getEmail()"), new IntWritable(1));
}

From source file:weka.distributed.hadoop.CorrelationMatrixRowHadoopReducer.java

License:Open Source License

@Override
public void reduce(Text key, Iterable<BytesWritable> values, Context context) throws IOException {
    List<MatrixRowHolder> rowsToAgg = new ArrayList<MatrixRowHolder>();

    try {/*from www  . java 2  s  .  c  o m*/
        for (BytesWritable b : values) {
            byte[] bytes = b.getBytes();

            rowsToAgg.add(deserialize(bytes));
        }
    } catch (ClassNotFoundException ex) {
        throw new IOException(ex);
    }

    if (rowsToAgg.size() > 0) {

        int rowNum = rowsToAgg.get(0).getRowNumber();

        List<double[]> rows = new ArrayList<double[]>();
        List<int[]> coOcc = null;
        if (!m_missingsWereReplacedWithMeans) {
            coOcc = new ArrayList<int[]>();
        }

        for (MatrixRowHolder r : rowsToAgg) {
            if (r.getRowNumber() != rowNum) {
                throw new IOException("Matrix row numbers for this key appear to differ!");
            }
            rows.add(r.getRow());
            if (!m_missingsWereReplacedWithMeans) {
                coOcc.add(r.getCoOccurrencesCounts());
            }
        }
        try {
            double[] aggregated = m_task.aggregate(rowsToAgg.get(0).getRowNumber(), rows, coOcc,
                    m_headerWithSummaryAtts, m_missingsWereReplacedWithMeans, m_covariance, m_deleteClassIfSet);

            // assemble Text key (row num) and Text row (space separated
            // values)

            Text outKey = new Text();
            outKey.set("" + rowNum);

            StringBuilder b = new StringBuilder();
            for (int i = 0; i < aggregated.length; i++) {
                if (i < aggregated.length - 1) {
                    b.append("" + aggregated[i]).append(" ");
                } else {
                    b.append("" + aggregated[i]);
                }
            }

            Text outVal = new Text();
            outVal.set(b.toString());
            context.write(outKey, outVal);
        } catch (DistributedWekaException e) {
            throw new IOException(e);
        } catch (InterruptedException e) {
            throw new IOException(e);
        }
    }
}