Example usage for org.apache.hadoop.io Text write

List of usage examples for org.apache.hadoop.io Text write


In this page you can find the example usage for org.apache.hadoop.io Text write.


public void write(DataOutput out) throws IOException 

Source Link


serialize write this object to out length uses zero-compressed encoding


From source file:hadoop_serialize.java

License:Apache License

public static void main(String[] args) throws java.io.IOException {
    //System.err.println("Writing byte stream to stdout");
    DataOutputStream os = new DataOutputStream(System.out);

    //System.err.println("Writing a sequence of numbers");

    //System.err.println("WritableUtils.writeVInt: 42, 4242, 424242, 42424242, -42");
    WritableUtils.writeVInt(os, 42);//w w  w . j a va2  s. co  m
    WritableUtils.writeVInt(os, 4242);
    WritableUtils.writeVInt(os, 424242);
    WritableUtils.writeVInt(os, 42424242);
    WritableUtils.writeVInt(os, -42);

    //System.err.println("WritableUtils.writeVLong 42, 424242, 4242424242");
    WritableUtils.writeVLong(os, 42L);
    WritableUtils.writeVLong(os, 424242L);
    WritableUtils.writeVLong(os, 4242424242L);
    //System.err.println("WritableUtils.writeString \"hello world\"");
    WritableUtils.writeString(os, "hello world");
    WritableUtils.writeString(os, "oggi \u00e8 gioved\u00ec");

    // This file contains: writeVInt of 42, 4242, 424242, 42424242, -42; writeVLong of 42, 424242, 4242424242; 2 writeString calls

    //System.err.println("Text.write \"I'm a Text object\"");
    Text t = new Text("\u00e0 Text object");


From source file:com.chinamobile.bcbsp.bspcontroller.HDFSOperator.java

License:Apache License

 * serialize the WorkerManager Status//from  w w w .  j a va2s  . com
 * @param uri
 *        BSPfile output uri
 * @param wmlist
 *        workerManager list
 * @param staffsLoadFactor
 *        for load balancing
 * @throws IOException
 *        exceptions during handle BSPfile.
public void serializeWorkerManagerStatus(String uri, Collection<WorkerManagerStatus> wmlist,
        double staffsLoadFactor) throws IOException {
    // synchronized(HDFSOperator.class){
    // this.fs = FileSystem.get(URI.create(uri),conf);
    // this.isFSExist();
    Double loadfactor = staffsLoadFactor;
    // Path path = new Path(uri);
    // out = fs.create(path);
    bspout = new BSPFSDataOutputStreamImpl(uri, 1, conf);
    Text factor = new Text(loadfactor.toString());
    // factor.write(out);
    // for (WorkerManagerStatus wmStatus : wmlist) {
    // wmStatus.write(out);
    // }
    // out.flush();
    // out.close();
    for (WorkerManagerStatus wmStatus : wmlist) {
    // fs.close();
    // }

From source file:com.cloudera.cdk.morphline.hadoop.rcfile.ReadRCFileTest.java

License:Apache License

private void createRCFile(final String fileName, final int numRecords, final int maxColumns)
        throws IOException {
    // Write the sequence file
    SequenceFile.Metadata metadata = getMetadataForRCFile();
    Configuration conf = new Configuration();
    conf.set(RCFile.COLUMN_NUMBER_CONF_STR, String.valueOf(maxColumns));
    Path inputFile = dfs.makeQualified(new Path(testDirectory, fileName));
    RCFile.Writer rcFileWriter = new RCFile.Writer(dfs, conf, inputFile, null, metadata, null);
    for (int row = 0; row < numRecords; row++) {
        BytesRefArrayWritable dataWrite = new BytesRefArrayWritable(maxColumns);
        for (int column = 0; column < maxColumns; column++) {
            Text sampleText = new Text("ROW-NUM:" + row + ", COLUMN-NUM:" + column);
            ByteArrayDataOutput dataOutput = ByteStreams.newDataOutput();
            dataWrite.set(column, new BytesRefWritable(dataOutput.toByteArray()));
        }/*from  w  w w.j  a va2  s . c  o m*/

From source file:com.cloudera.recordservice.examples.terasort.TeraInputFormat.java

License:Apache License

 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * @param job the job to sample//from w ww.  j a v  a  2 s . co m
 * @param partFile where to write the output file to
 * @throws Throwable if something goes wrong
public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable {
    long t1 = System.currentTimeMillis();
    Configuration conf = job.getConfiguration();
    final TeraInputFormat inFormat = new TeraInputFormat();
    final TextSampler sampler = new TextSampler();
    int partitions = job.getNumReduceTasks();
    long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
    final List<InputSplit> splits = inFormat.getSplits(job);
    long t2 = System.currentTimeMillis();
    System.out.println("Computing input splits took " + (t2 - t1) + "ms");
    int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size());
    System.out.println("Sampling " + samples + " splits of " + splits.size());
    final long recordsPerSample = sampleSize / samples;
    final int sampleStep = splits.size() / samples;
    Thread[] samplerReader = new Thread[samples];
    SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        final int idx = i;
        samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) {

            public void run() {
                long records = 0;
                try {
                    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
                            new TaskAttemptID());
                    RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx),
                    reader.initialize(splits.get(sampleStep * idx), context);
                    while (reader.nextKeyValue()) {
                        sampler.addKey(new Text(reader.getCurrentKey()));
                        records += 1;
                        if (recordsPerSample <= records) {
                } catch (IOException ie) {
                            "Got an exception while reading splits " + StringUtils.stringifyException(ie));
                    throw new RuntimeException(ie);
                } catch (InterruptedException e) {

    FileSystem outFs = partFile.getFileSystem(conf);
    DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10,
    for (int i = 0; i < samples; i++) {
        try {
            if (threadGroup.getThrowable() != null) {
                throw threadGroup.getThrowable();
        } catch (InterruptedException e) {
    for (Text split : sampler.createPartitions(partitions)) {
    long t3 = System.currentTimeMillis();
    System.out.println("Computing parititions took " + (t3 - t2) + "ms");

From source file:com.ikanow.aleph2.analytics.hadoop.assets.ObjectNodeWritableComparable.java

License:Apache License

public void write(DataOutput out) throws IOException {
    final Text text = new Text();
    text.set(_object_node.toString());/*from  w ww  . j av a 2  s.  c  om*/


From source file:com.marcolotz.MRComponents.SerializerConverter.java

License:Creative Commons License

 * This is a refactored manner to serialize a String. Basically it transform
 * it into a Text and then writes it in the DataOutput.
 * /*ww  w.  j a  va  2s .co  m*/
 * @param outputString
 * @param out
 * @throws IOException
public static void writeString(String outputString, DataOutput out) throws IOException {
    Text writtenString;

    /* Prevents a null string exception when writting it to the output */
    if (outputString == null) {
        writtenString = new Text("null");
    } else {
        writtenString = new Text(outputString);

From source file:com.marklogic.contentpump.RDFWritable.java

License:Apache License

public void write(DataOutput out) throws IOException {
    if (graphUri == null) {
        out.writeByte(0);/*from   w  ww . j  a v a2  s .c o  m*/
    } else {
        Text t = new Text(graphUri);
    if (value instanceof Text) {
        ((Text) value).write(out);
    } else if (value instanceof MarkLogicNode) {
        ((MarkLogicNode) value).write(out);
    } else if (value instanceof BytesWritable) {
        ((BytesWritable) value).write(out);
    //serialize permissions
    if (permissions == null) {
    } else {
        for (int i = 0; i < permissions.length; i++) {
            Text role = new Text(permissions[i].getRole());
            Text cap = new Text(permissions[i].getCapability().toString());

From source file:com.marklogic.mapreduce.MarkLogicInputSplit.java

License:Apache License

public void write(DataOutput out) throws IOException {
    out.writeLong(start);// www .  ja  va2s. c o  m
    Text forestIdText = new Text(forestId.toByteArray());
    if (hostName != null && hostName.length > 0) {
        Text.writeString(out, hostName[0]);

From source file:com.phantom.hadoop.examples.terasort.TeraInputFormat.java

License:Apache License

 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * //from   www. j  a  v  a  2 s .  c om
 * @param job
 *            the job to sample
 * @param partFile
 *            where to write the output file to
 * @throws Throwable
 *             if something goes wrong
public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable {
    long t1 = System.currentTimeMillis();
    Configuration conf = job.getConfiguration();
    final TeraInputFormat inFormat = new TeraInputFormat();
    final TextSampler sampler = new TextSampler();
    int partitions = job.getNumReduceTasks();
    long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
    final List<InputSplit> splits = inFormat.getSplits(job);
    long t2 = System.currentTimeMillis();
    System.out.println("Computing input splits took " + (t2 - t1) + "ms");
    int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size());
    System.out.println("Sampling " + samples + " splits of " + splits.size());
    final long recordsPerSample = sampleSize / samples;
    final int sampleStep = splits.size() / samples;
    Thread[] samplerReader = new Thread[samples];
    SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        final int idx = i;
        samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) {

            public void run() {
                long records = 0;
                try {
                    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
                            new TaskAttemptID());
                    RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx),
                    reader.initialize(splits.get(sampleStep * idx), context);
                    while (reader.nextKeyValue()) {
                        sampler.addKey(new Text(reader.getCurrentKey()));
                        records += 1;
                        if (recordsPerSample <= records) {
                } catch (IOException ie) {
                            "Got an exception while reading splits " + StringUtils.stringifyException(ie));
                    throw new RuntimeException(ie);
                } catch (InterruptedException e) {

    FileSystem outFs = partFile.getFileSystem(conf);
    DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10,
    for (int i = 0; i < samples; i++) {
        try {
            if (threadGroup.getThrowable() != null) {
                throw threadGroup.getThrowable();
        } catch (InterruptedException e) {
    for (Text split : sampler.createPartitions(partitions)) {
    long t3 = System.currentTimeMillis();
    System.out.println("Computing parititions took " + (t3 - t2) + "ms");

From source file:com.yolodata.tbana.hadoop.mapred.util.ArrayListTextWritable.java

License:Open Source License

public void write(DataOutput dataoutput) throws IOException {
    for (Text element : this) {
    }/*from   w  w  w. j  a  v  a 2s  .c  om*/