Example usage for org.apache.hadoop.io Text write

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text write.

Prototype

@Override
public void write(DataOutput out) throws IOException

Source Link

Document

serialize write this object to out length uses zero-compressed encoding

Usage

From source file:gobblin.compat.TextSerializerTest.java

License:Apache License

@Test
public void testDeserialize() throws IOException {
    // Use Hadoop's serializer, verify our deserializer can read the string back
    for (String textToSerialize : textsToSerialize) {
        ByteArrayOutputStream bOs = new ByteArrayOutputStream();
        DataOutputStream dataOutputStream = new DataOutputStream(bOs);

        Text hadoopText = new Text();
        hadoopText.set(textToSerialize);
        hadoopText.write(dataOutputStream);
        dataOutputStream.close();/*from   w w w .  ja  v  a2  s.  com*/

        ByteArrayInputStream bIn = new ByteArrayInputStream(bOs.toByteArray());
        DataInputStream dataInputStream = new DataInputStream(bIn);

        String deserializedString = TextSerializer.readTextAsString(dataInputStream);

        Assert.assertEquals(deserializedString, textToSerialize);
    }
}

From source file:gobblin.runtime.JobState.java

License:Apache License

public void write(DataOutput out, boolean writeTasks) throws IOException {
    Text text = new Text();
    text.set(this.jobName);
    text.write(out);
    text.set(this.jobId);
    text.write(out);//from   ww w.  j a va2  s .c o m
    out.writeLong(this.startTime);
    out.writeLong(this.endTime);
    out.writeLong(this.duration);
    text.set(this.state.name());
    text.write(out);
    out.writeInt(this.taskCount);
    if (writeTasks) {
        out.writeInt(this.taskStates.size() + this.skippedTaskStates.size());
        for (TaskState taskState : this.taskStates.values()) {
            taskState.write(out);
        }
        for (TaskState taskState : this.skippedTaskStates.values()) {
            taskState.write(out);
        }
    } else {
        out.writeInt(0);
    }
    super.write(out);
}

From source file:gobblin.runtime.TaskState.java

License:Apache License

@Override
public void write(DataOutput out) throws IOException {
    Text text = new Text();
    text.set(this.jobId);
    text.write(out);
    text.set(this.taskId);
    text.write(out);/*w w  w  .  ja  va  2  s  . c  o  m*/
    out.writeLong(this.startTime);
    out.writeLong(this.endTime);
    out.writeLong(this.duration);
    super.write(out);
}

From source file:info.halo9pan.word2vec.hadoop.mr.SortInputFormat.java

License:Apache License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * /*from w w w.j  a  v a 2 s  .c o m*/
 * @param job
 *            the job to sample
 * @param partFile
 *            where to write the output file to
 * @throws Throwable
 *             if something goes wrong
 */
public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable {
    long t1 = System.currentTimeMillis();
    Configuration conf = job.getConfiguration();
    final SortInputFormat inFormat = new SortInputFormat();
    final TextSampler sampler = new TextSampler();
    int partitions = job.getNumReduceTasks();
    long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
    final List<InputSplit> splits = inFormat.getSplits(job);
    long t2 = System.currentTimeMillis();
    System.out.println("Computing input splits took " + (t2 - t1) + "ms");
    int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size());
    System.out.println("Sampling " + samples + " splits of " + splits.size());
    final long recordsPerSample = sampleSize / samples;
    final int sampleStep = splits.size() / samples;
    Thread[] samplerReader = new Thread[samples];
    SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        final int idx = i;
        samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) {
            {
                setDaemon(true);
            }

            public void run() {
                long records = 0;
                try {
                    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
                            new TaskAttemptID());
                    RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx),
                            context);
                    reader.initialize(splits.get(sampleStep * idx), context);
                    while (reader.nextKeyValue()) {
                        sampler.addKey(new Text(reader.getCurrentKey()));
                        records += 1;
                        if (recordsPerSample <= records) {
                            break;
                        }
                    }
                } catch (IOException ie) {
                    System.err.println(
                            "Got an exception while reading splits " + StringUtils.stringifyException(ie));
                    throw new RuntimeException(ie);
                } catch (InterruptedException e) {

                }
            }
        };
        samplerReader[i].start();
    }
    FileSystem outFs = partFile.getFileSystem(conf);
    DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10,
            outFs.getDefaultBlockSize(partFile));
    for (int i = 0; i < samples; i++) {
        try {
            samplerReader[i].join();
            if (threadGroup.getThrowable() != null) {
                throw threadGroup.getThrowable();
            }
        } catch (InterruptedException e) {
        }
    }
    for (Text split : sampler.createPartitions(partitions)) {
        split.write(writer);
    }
    writer.close();
    long t3 = System.currentTimeMillis();
    System.out.println("Computing parititions took " + (t3 - t2) + "ms");
}

From source file:it.uniroma1.bdc.tesi.piccioli.giraphstandalone.ksimplecycle.TextAndHashes.java

@Override
public void write(DataOutput out) throws IOException {
    int size;/*  w w  w .  j a v a 2 s .c  om*/
    int sizeSet;
    value.write(out);

    this.generatedHash = new HashSet<Integer>();
    this.seenHash = new HashMap<Text, Set<Integer>>();

    size = this.generatedHash.size();
    out.writeInt(size);
    for (Integer item : generatedHash) {
        out.writeInt(item);
    }

    size = this.seenHash.size();
    out.writeInt(size);//scrivo numero di chiavi nella MAP
    for (Text itemKey : seenHash.keySet()) {
        itemKey.write(out);//Scrivo chiave 

        sizeSet = this.seenHash.get(itemKey).size();//Numero elementi per la chiave
        out.writeInt(sizeSet);//Scrivo numero elementi 

        for (Integer item : seenHash.get(itemKey)) {
            out.writeInt(item);//Scrivo elementi
        }
    }

}

From source file:it.uniroma1.bdc.tesi.piccioli.giraphstandalone.message.CustomMessageWithAggregatedPath.java

@Override
public void write(DataOutput out) throws IOException {
    out.writeInt(visitedVertex.size());//from   w w w. ja v  a  2 s .co  m
    for (Set<Text> item : visitedVertex) {
        out.writeInt(item.size());
        for (Text itemText : item) {
            itemText.write(out);
        }
    }
}

From source file:it.uniroma1.bdc.tesi.piccioli.giraphstandalone.message.CustomMessageWithPath.java

@Override
public void write(DataOutput out) throws IOException {

    out.writeInt(visitedVertex.size());/*from  ww w .java 2  s .  co  m*/
    for (Text item : visitedVertex) {
        item.write(out);
    }
    sourceVertex.write(out);
}

From source file:net.darkseraphim.webanalytics.hadoop.csv.Row.java

License:Apache License

public void write(DataOutput dataoutput) throws IOException {
    dataoutput.writeInt(this.size());
    for (Text element : this) {
        element.write(dataoutput);
    }/*from  www  . j a v a  2s . co  m*/
}

From source file:org.acaro.graffiti.processing.ResultSet.java

License:Apache License

@Override
public void write(DataOutput output) throws IOException {

    output.writeInt(results.size());/* www.java2  s  .  c o  m*/
    for (Text result : results) {
        result.write(output);
    }
}

From source file:org.acaro.graffiti.processing.Vertex.java

License:Apache License

@Override
public void write(DataOutput out) throws IOException {

    vertexId.write(out);/*w  ww .j  av  a 2  s  . c  o  m*/

    out.writeInt(labelledOutEdgeMap.size());
    for (Entry<Text, Set<Text>> label : labelledOutEdgeMap.entrySet()) {
        label.getKey().write(out);
        out.writeInt(label.getValue().size());
        for (Text dest : label.getValue()) {
            dest.write(out);
        }
    }

    out.writeInt(msgList.size());
    for (Message msg : msgList) {
        msg.write(out);
    }

    out.writeBoolean(halt);
}