Example usage for org.apache.hadoop.io Text Text

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text Text.

Prototype

public Text(byte[] utf8)

Source Link

Document

Construct from a byte array.

Usage

From source file:hadoop_serialize.java

License:Apache License

public static void main(String[] args) throws java.io.IOException {
    //System.err.println("Writing byte stream to stdout");
    DataOutputStream os = new DataOutputStream(System.out);

    //System.err.println("Writing a sequence of numbers");

    //System.err.println("WritableUtils.writeVInt: 42, 4242, 424242, 42424242, -42");
    WritableUtils.writeVInt(os, 42);/*w w  w .  ja v  a 2 s  . c  om*/
    WritableUtils.writeVInt(os, 4242);
    WritableUtils.writeVInt(os, 424242);
    WritableUtils.writeVInt(os, 42424242);
    WritableUtils.writeVInt(os, -42);

    //System.err.println("WritableUtils.writeVLong 42, 424242, 4242424242");
    WritableUtils.writeVLong(os, 42L);
    WritableUtils.writeVLong(os, 424242L);
    WritableUtils.writeVLong(os, 4242424242L);
    //
    //System.err.println("WritableUtils.writeString \"hello world\"");
    WritableUtils.writeString(os, "hello world");
    WritableUtils.writeString(os, "oggi \u00e8 gioved\u00ec");

    // This file contains: writeVInt of 42, 4242, 424242, 42424242, -42; writeVLong of 42, 424242, 4242424242; 2 writeString calls

    //System.err.println("Text.write \"I'm a Text object\"");
    Text t = new Text("\u00e0 Text object");
    t.write(os);

    os.close();
}

From source file:$package.TextFileSetSink.java

License:Apache License

@Override
public void transform(StructuredRecord input, Emitter<KeyValue<NullWritable, Text>> emitter) throws Exception {
    StringBuilder joinedFields = new StringBuilder();
    Iterator<Schema.Field> fieldIter = input.getSchema().getFields().iterator();
    if (!fieldIter.hasNext()) {
        // shouldn't happen
        return;// w  w  w  .j av a  2  s. c  om
    }

    Object val = input.get(fieldIter.next().getName());
    if (val != null) {
        joinedFields.append(val);
    }
    while (fieldIter.hasNext()) {
        String fieldName = fieldIter.next().getName();
        joinedFields.append(config.fieldSeparator);
        val = input.get(fieldName);
        if (val != null) {
            joinedFields.append(val);
        }
    }
    emitter.emit(new KeyValue<>(NullWritable.get(), new Text(joinedFields.toString())));
}

From source file:accumulo.balancer.GroupBalancer.java

License:Apache License

public GroupBalancer(String tableId) {
    this.tableId = tableId;
    this.textTableId = new Text(tableId);
}

From source file:accumulo.RowsWithoutColumnIterator.java

License:Apache License

@Override
public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String> options,
        IteratorEnvironment env) throws IOException {
    super.init(source, options, env);

    // Get the columns whose rows that contain them we don't want to return
    if (options.containsKey(COLUMNS_TO_IGNORE)) {
        String columnsToIgnoreValue = options.get(COLUMNS_TO_IGNORE);

        Iterable<String> splitColumns = Splitter.on(',').split(columnsToIgnoreValue);
        for (String splitColumn : splitColumns) {
            columnsToIgnore.add(new Text(splitColumn));
        }// ww  w .j  a v a 2s  . co m
    }
}

From source file:adept.mapreduce.AdeptMapper.java

License:Apache License

public void map(Text key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

    try {/*from   w ww  .ja va2 s.  co m*/
        HltContentContainer hltcontentcontainer = (HltContentContainer) xmlserializer
                .deserializeString(value.toString(), HltContentContainer.class);
        hltcontentcontainer = doProcess(hltcontentcontainer);

        String serializedHltContainer = xmlserializer.serializeAsString(hltcontentcontainer);
        serializedHltContainer = serializedHltContainer.replaceAll("\\r\\n", " ");
        serializedHltContainer = serializedHltContainer.replaceAll("\\n", " ");
        output.collect(key, new Text(serializedHltContainer));
    } catch (Exception e) {
        //System.out.println(e.getMessage());
        System.out.println("Exception thrown in map function: " + e.getLocalizedMessage());
    }

}

From source file:adept.mapreduce.PreprocessingJob.java

License:Apache License

public void map(Text key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    HltContentContainer hltcontentcontainer = new HltContentContainer();
    Document doc = DocumentMaker.getInstance().createDefaultDocument(key.toString(), null, null, null, null,
            value.toString(), hltcontentcontainer);

    // sentence segmentation.For now, consider all text as a single sentence.
    List<Sentence> sentences = new ArrayList<Sentence>();
    sentences.addAll(//from  www  . j  a v  a  2  s . co m
            OpenNLPSentenceSegmenter.getInstance().getSentences(doc.getValue(), doc.getDefaultTokenStream()));
    hltcontentcontainer.setSentences(sentences);

    XMLSerializer xmlserializer = new XMLSerializer(SerializationType.XML);
    String serializedHltContainer = xmlserializer.serializeAsString(hltcontentcontainer);
    serializedHltContainer = serializedHltContainer.replaceAll("\r\n", " ");
    serializedHltContainer = serializedHltContainer.replaceAll("\n", " ");
    output.collect(key, new Text(serializedHltContainer));
}

From source file:AllLab_Skeleton.Lab1.WordCount_Reducer.java

@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
    context.write(new Text("Count : "), new IntWritable(count));
}

From source file:alluxio.client.hadoop.AccumulatingReducer.java

License:Apache License

/**
 * This method accumulates values based on their type.
 *
 * @param key the type of values//w w  w . ja  va2s .com
 * @param values the values to accumulates
 * @param output collect the result of accumulating
 * @param reporter to report progress and update status information
 */
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    String field = key.toString();

    reporter.setStatus("starting " + field + " ::host = " + mHostname);

    // concatenate strings
    if (field.startsWith(VALUE_TYPE_STRING)) {
        StringBuilder sSum = new StringBuilder();
        while (values.hasNext()) {
            sSum.append(values.next().toString()).append(";");
        }
        output.collect(key, new Text(sSum.toString()));
        reporter.setStatus("finished " + field + " ::host = " + mHostname);
        return;
    }
    // sum long values
    if (field.startsWith(VALUE_TYPE_FLOAT)) {
        float fSum = 0;
        while (values.hasNext()) {
            fSum += Float.parseFloat(values.next().toString());
        }
        output.collect(key, new Text(String.valueOf(fSum)));
        reporter.setStatus("finished " + field + " ::host = " + mHostname);
        return;
    }
    // sum long values
    if (field.startsWith(VALUE_TYPE_LONG)) {
        long lSum = 0;
        while (values.hasNext()) {
            lSum += Long.parseLong(values.next().toString());
        }
        output.collect(key, new Text(String.valueOf(lSum)));
    }
    reporter.setStatus("finished " + field + " ::host = " + mHostname);
}

From source file:alluxio.client.hadoop.DFSIOIntegrationTest.java

License:Apache License

@SuppressWarnings("deprecation")
private void createControlFile(org.apache.hadoop.fs.FileSystem fs, long nrBytes, // in bytes
        int nrFiles) throws IOException {
    LOG.info("creating control file: " + nrBytes + " bytes, " + nrFiles + " files");

    Path controlDir = getControlDir(mConfig);

    if (!fs.exists(controlDir)) {

        fs.delete(controlDir, true);//  w  ww. jav a 2s.co m

        for (int i = 0; i < nrFiles; i++) {
            String name = getFileName(i);
            Path controlFile = new Path(controlDir, "in_file_" + name);
            SequenceFile.Writer writer = null;
            try {
                writer = SequenceFile.createWriter(fs, mConfig, controlFile, Text.class, LongWritable.class,
                        CompressionType.NONE);
                writer.append(new Text(name), new LongWritable(nrBytes));
            } catch (Exception e) {
                throw new IOException(e.getLocalizedMessage());
            } finally {
                if (writer != null) {
                    writer.close();
                }
                writer = null;
            }
        }
    }
    LOG.info("created control files for: " + nrFiles + " files");
}

From source file:alluxio.hadoop.fs.AccumulatingReducer.java

License:Apache License

public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    String field = key.toString();

    reporter.setStatus("starting " + field + " ::host = " + mHostname);

    // concatenate strings
    if (field.startsWith(VALUE_TYPE_STRING)) {
        StringBuffer sSum = new StringBuffer();
        while (values.hasNext()) {
            sSum.append(values.next().toString()).append(";");
        }/*from   w w w . ja  va 2  s.  c om*/
        output.collect(key, new Text(sSum.toString()));
        reporter.setStatus("finished " + field + " ::host = " + mHostname);
        return;
    }
    // sum long values
    if (field.startsWith(VALUE_TYPE_FLOAT)) {
        float fSum = 0;
        while (values.hasNext()) {
            fSum += Float.parseFloat(values.next().toString());
        }
        output.collect(key, new Text(String.valueOf(fSum)));
        reporter.setStatus("finished " + field + " ::host = " + mHostname);
        return;
    }
    // sum long values
    if (field.startsWith(VALUE_TYPE_LONG)) {
        long lSum = 0;
        while (values.hasNext()) {
            lSum += Long.parseLong(values.next().toString());
        }
        output.collect(key, new Text(String.valueOf(lSum)));
    }
    reporter.setStatus("finished " + field + " ::host = " + mHostname);
}