Example usage for org.apache.hadoop.io Text Text

List of usage examples for org.apache.hadoop.io Text Text

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text Text.

Prototype

public Text(byte[] utf8) 

Source Link

Document

Construct from a byte array.

Usage

From source file:hadoop_serialize.java

License:Apache License

public static void main(String[] args) throws java.io.IOException {
    //System.err.println("Writing byte stream to stdout");
    DataOutputStream os = new DataOutputStream(System.out);

    //System.err.println("Writing a sequence of numbers");

    //System.err.println("WritableUtils.writeVInt: 42, 4242, 424242, 42424242, -42");
    WritableUtils.writeVInt(os, 42);/*w w  w .  ja v  a 2 s  . c  om*/
    WritableUtils.writeVInt(os, 4242);
    WritableUtils.writeVInt(os, 424242);
    WritableUtils.writeVInt(os, 42424242);
    WritableUtils.writeVInt(os, -42);

    //System.err.println("WritableUtils.writeVLong 42, 424242, 4242424242");
    WritableUtils.writeVLong(os, 42L);
    WritableUtils.writeVLong(os, 424242L);
    WritableUtils.writeVLong(os, 4242424242L);
    //
    //System.err.println("WritableUtils.writeString \"hello world\"");
    WritableUtils.writeString(os, "hello world");
    WritableUtils.writeString(os, "oggi \u00e8 gioved\u00ec");

    // This file contains: writeVInt of 42, 4242, 424242, 42424242, -42; writeVLong of 42, 424242, 4242424242; 2 writeString calls

    //System.err.println("Text.write \"I'm a Text object\"");
    Text t = new Text("\u00e0 Text object");
    t.write(os);

    os.close();
}

From source file:$package.TextFileSetSink.java

License:Apache License

@Override
public void transform(StructuredRecord input, Emitter<KeyValue<NullWritable, Text>> emitter) throws Exception {
    StringBuilder joinedFields = new StringBuilder();
    Iterator<Schema.Field> fieldIter = input.getSchema().getFields().iterator();
    if (!fieldIter.hasNext()) {
        // shouldn't happen
        return;// w  w  w  .j av a  2  s. c  om
    }

    Object val = input.get(fieldIter.next().getName());
    if (val != null) {
        joinedFields.append(val);
    }
    while (fieldIter.hasNext()) {
        String fieldName = fieldIter.next().getName();
        joinedFields.append(config.fieldSeparator);
        val = input.get(fieldName);
        if (val != null) {
            joinedFields.append(val);
        }
    }
    emitter.emit(new KeyValue<>(NullWritable.get(), new Text(joinedFields.toString())));
}

From source file:accumulo.balancer.GroupBalancer.java

License:Apache License

public GroupBalancer(String tableId) {
    this.tableId = tableId;
    this.textTableId = new Text(tableId);
}

From source file:accumulo.RowsWithoutColumnIterator.java

License:Apache License

@Override
public void init(SortedKeyValueIterator<Key, Value> source, Map<String, String> options,
        IteratorEnvironment env) throws IOException {
    super.init(source, options, env);

    // Get the columns whose rows that contain them we don't want to return
    if (options.containsKey(COLUMNS_TO_IGNORE)) {
        String columnsToIgnoreValue = options.get(COLUMNS_TO_IGNORE);

        Iterable<String> splitColumns = Splitter.on(',').split(columnsToIgnoreValue);
        for (String splitColumn : splitColumns) {
            columnsToIgnore.add(new Text(splitColumn));
        }// ww  w .j  a v a 2s  . co m
    }
}

From source file:adept.mapreduce.AdeptMapper.java

License:Apache License

public void map(Text key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {

    try {/*from   w ww  .ja va2 s.  co m*/
        HltContentContainer hltcontentcontainer = (HltContentContainer) xmlserializer
                .deserializeString(value.toString(), HltContentContainer.class);
        hltcontentcontainer = doProcess(hltcontentcontainer);

        String serializedHltContainer = xmlserializer.serializeAsString(hltcontentcontainer);
        serializedHltContainer = serializedHltContainer.replaceAll("\\r\\n", " ");
        serializedHltContainer = serializedHltContainer.replaceAll("\\n", " ");
        output.collect(key, new Text(serializedHltContainer));
    } catch (Exception e) {
        //System.out.println(e.getMessage());
        System.out.println("Exception thrown in map function: " + e.getLocalizedMessage());
    }

}

From source file:adept.mapreduce.PreprocessingJob.java

License:Apache License

public void map(Text key, Text value, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    HltContentContainer hltcontentcontainer = new HltContentContainer();
    Document doc = DocumentMaker.getInstance().createDefaultDocument(key.toString(), null, null, null, null,
            value.toString(), hltcontentcontainer);

    // sentence segmentation.For now, consider all text as a single sentence.
    List<Sentence> sentences = new ArrayList<Sentence>();
    sentences.addAll(//from  www  . j  a v  a  2  s . co m
            OpenNLPSentenceSegmenter.getInstance().getSentences(doc.getValue(), doc.getDefaultTokenStream()));
    hltcontentcontainer.setSentences(sentences);

    XMLSerializer xmlserializer = new XMLSerializer(SerializationType.XML);
    String serializedHltContainer = xmlserializer.serializeAsString(hltcontentcontainer);
    serializedHltContainer = serializedHltContainer.replaceAll("\r\n", " ");
    serializedHltContainer = serializedHltContainer.replaceAll("\n", " ");
    output.collect(key, new Text(serializedHltContainer));
}

From source file:AllLab_Skeleton.Lab1.WordCount_Reducer.java

@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
    context.write(new Text("Count : "), new IntWritable(count));
}

From source file:alluxio.client.hadoop.AccumulatingReducer.java

License:Apache License

/**
 * This method accumulates values based on their type.
 *
 * @param key the type of values//w w  w . ja  va2s .com
 * @param values the values to accumulates
 * @param output collect the result of accumulating
 * @param reporter to report progress and update status information
 */
public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    String field = key.toString();

    reporter.setStatus("starting " + field + " ::host = " + mHostname);

    // concatenate strings
    if (field.startsWith(VALUE_TYPE_STRING)) {
        StringBuilder sSum = new StringBuilder();
        while (values.hasNext()) {
            sSum.append(values.next().toString()).append(";");
        }
        output.collect(key, new Text(sSum.toString()));
        reporter.setStatus("finished " + field + " ::host = " + mHostname);
        return;
    }
    // sum long values
    if (field.startsWith(VALUE_TYPE_FLOAT)) {
        float fSum = 0;
        while (values.hasNext()) {
            fSum += Float.parseFloat(values.next().toString());
        }
        output.collect(key, new Text(String.valueOf(fSum)));
        reporter.setStatus("finished " + field + " ::host = " + mHostname);
        return;
    }
    // sum long values
    if (field.startsWith(VALUE_TYPE_LONG)) {
        long lSum = 0;
        while (values.hasNext()) {
            lSum += Long.parseLong(values.next().toString());
        }
        output.collect(key, new Text(String.valueOf(lSum)));
    }
    reporter.setStatus("finished " + field + " ::host = " + mHostname);
}

From source file:alluxio.client.hadoop.DFSIOIntegrationTest.java

License:Apache License

@SuppressWarnings("deprecation")
private void createControlFile(org.apache.hadoop.fs.FileSystem fs, long nrBytes, // in bytes
        int nrFiles) throws IOException {
    LOG.info("creating control file: " + nrBytes + " bytes, " + nrFiles + " files");

    Path controlDir = getControlDir(mConfig);

    if (!fs.exists(controlDir)) {

        fs.delete(controlDir, true);//  w  ww. jav a 2s.co m

        for (int i = 0; i < nrFiles; i++) {
            String name = getFileName(i);
            Path controlFile = new Path(controlDir, "in_file_" + name);
            SequenceFile.Writer writer = null;
            try {
                writer = SequenceFile.createWriter(fs, mConfig, controlFile, Text.class, LongWritable.class,
                        CompressionType.NONE);
                writer.append(new Text(name), new LongWritable(nrBytes));
            } catch (Exception e) {
                throw new IOException(e.getLocalizedMessage());
            } finally {
                if (writer != null) {
                    writer.close();
                }
                writer = null;
            }
        }
    }
    LOG.info("created control files for: " + nrFiles + " files");
}

From source file:alluxio.hadoop.fs.AccumulatingReducer.java

License:Apache License

public void reduce(Text key, Iterator<Text> values, OutputCollector<Text, Text> output, Reporter reporter)
        throws IOException {
    String field = key.toString();

    reporter.setStatus("starting " + field + " ::host = " + mHostname);

    // concatenate strings
    if (field.startsWith(VALUE_TYPE_STRING)) {
        StringBuffer sSum = new StringBuffer();
        while (values.hasNext()) {
            sSum.append(values.next().toString()).append(";");
        }/*from   w w w . ja  va 2  s.  c om*/
        output.collect(key, new Text(sSum.toString()));
        reporter.setStatus("finished " + field + " ::host = " + mHostname);
        return;
    }
    // sum long values
    if (field.startsWith(VALUE_TYPE_FLOAT)) {
        float fSum = 0;
        while (values.hasNext()) {
            fSum += Float.parseFloat(values.next().toString());
        }
        output.collect(key, new Text(String.valueOf(fSum)));
        reporter.setStatus("finished " + field + " ::host = " + mHostname);
        return;
    }
    // sum long values
    if (field.startsWith(VALUE_TYPE_LONG)) {
        long lSum = 0;
        while (values.hasNext()) {
            lSum += Long.parseLong(values.next().toString());
        }
        output.collect(key, new Text(String.valueOf(lSum)));
    }
    reporter.setStatus("finished " + field + " ::host = " + mHostname);
}