Example usage for org.apache.hadoop.io Text write

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text write.

Prototype

@Override
public void write(DataOutput out) throws IOException

Source Link

Document

serialize write this object to out length uses zero-compressed encoding

Usage

From source file:hadoop_serialize.java

License:Apache License

public static void main(String[] args) throws java.io.IOException {
    //System.err.println("Writing byte stream to stdout");
    DataOutputStream os = new DataOutputStream(System.out);

    //System.err.println("Writing a sequence of numbers");

    //System.err.println("WritableUtils.writeVInt: 42, 4242, 424242, 42424242, -42");
    WritableUtils.writeVInt(os, 42);//w w  w . j a va2  s. co  m
    WritableUtils.writeVInt(os, 4242);
    WritableUtils.writeVInt(os, 424242);
    WritableUtils.writeVInt(os, 42424242);
    WritableUtils.writeVInt(os, -42);

    //System.err.println("WritableUtils.writeVLong 42, 424242, 4242424242");
    WritableUtils.writeVLong(os, 42L);
    WritableUtils.writeVLong(os, 424242L);
    WritableUtils.writeVLong(os, 4242424242L);
    //
    //System.err.println("WritableUtils.writeString \"hello world\"");
    WritableUtils.writeString(os, "hello world");
    WritableUtils.writeString(os, "oggi \u00e8 gioved\u00ec");

    // This file contains: writeVInt of 42, 4242, 424242, 42424242, -42; writeVLong of 42, 424242, 4242424242; 2 writeString calls

    //System.err.println("Text.write \"I'm a Text object\"");
    Text t = new Text("\u00e0 Text object");
    t.write(os);

    os.close();
}

From source file:com.chinamobile.bcbsp.bspcontroller.HDFSOperator.java

License:Apache License

/**
 * serialize the WorkerManager Status//from  w w w .  j a va2s  . com
 * @param uri
 *        BSPfile output uri
 * @param wmlist
 *        workerManager list
 * @param staffsLoadFactor
 *        for load balancing
 * @throws IOException
 *        exceptions during handle BSPfile.
 */
public void serializeWorkerManagerStatus(String uri, Collection<WorkerManagerStatus> wmlist,
        double staffsLoadFactor) throws IOException {
    // synchronized(HDFSOperator.class){
    // this.fs = FileSystem.get(URI.create(uri),conf);
    // this.isFSExist();
    Double loadfactor = staffsLoadFactor;
    // Path path = new Path(uri);
    // out = fs.create(path);
    bspout = new BSPFSDataOutputStreamImpl(uri, 1, conf);
    Text factor = new Text(loadfactor.toString());
    // factor.write(out);
    // for (WorkerManagerStatus wmStatus : wmlist) {
    // wmStatus.write(out);
    // }
    // out.flush();
    // out.close();
    factor.write(bspout.getOut());
    for (WorkerManagerStatus wmStatus : wmlist) {
        wmStatus.write(bspout.getOut());
    }
    bspout.flush();
    bspout.close();
    // fs.close();
    // }
}

From source file:com.cloudera.cdk.morphline.hadoop.rcfile.ReadRCFileTest.java

License:Apache License

private void createRCFile(final String fileName, final int numRecords, final int maxColumns)
        throws IOException {
    // Write the sequence file
    SequenceFile.Metadata metadata = getMetadataForRCFile();
    Configuration conf = new Configuration();
    conf.set(RCFile.COLUMN_NUMBER_CONF_STR, String.valueOf(maxColumns));
    Path inputFile = dfs.makeQualified(new Path(testDirectory, fileName));
    RCFile.Writer rcFileWriter = new RCFile.Writer(dfs, conf, inputFile, null, metadata, null);
    for (int row = 0; row < numRecords; row++) {
        BytesRefArrayWritable dataWrite = new BytesRefArrayWritable(maxColumns);
        dataWrite.resetValid(maxColumns);
        for (int column = 0; column < maxColumns; column++) {
            Text sampleText = new Text("ROW-NUM:" + row + ", COLUMN-NUM:" + column);
            ByteArrayDataOutput dataOutput = ByteStreams.newDataOutput();
            sampleText.write(dataOutput);
            dataWrite.set(column, new BytesRefWritable(dataOutput.toByteArray()));
        }/*from  w  w w.j  a va2  s . c  o m*/
        rcFileWriter.append(dataWrite);
    }
    rcFileWriter.close();
}

From source file:com.cloudera.recordservice.examples.terasort.TeraInputFormat.java

License:Apache License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * @param job the job to sample//from w ww.  j a v  a  2 s . co m
 * @param partFile where to write the output file to
 * @throws Throwable if something goes wrong
 */
public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable {
    long t1 = System.currentTimeMillis();
    Configuration conf = job.getConfiguration();
    final TeraInputFormat inFormat = new TeraInputFormat();
    final TextSampler sampler = new TextSampler();
    int partitions = job.getNumReduceTasks();
    long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
    final List<InputSplit> splits = inFormat.getSplits(job);
    long t2 = System.currentTimeMillis();
    System.out.println("Computing input splits took " + (t2 - t1) + "ms");
    int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size());
    System.out.println("Sampling " + samples + " splits of " + splits.size());
    final long recordsPerSample = sampleSize / samples;
    final int sampleStep = splits.size() / samples;
    Thread[] samplerReader = new Thread[samples];
    SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        final int idx = i;
        samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) {
            {
                setDaemon(true);
            }

            @Override
            public void run() {
                long records = 0;
                try {
                    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
                            new TaskAttemptID());
                    RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx),
                            context);
                    reader.initialize(splits.get(sampleStep * idx), context);
                    while (reader.nextKeyValue()) {
                        sampler.addKey(new Text(reader.getCurrentKey()));
                        records += 1;
                        if (recordsPerSample <= records) {
                            break;
                        }
                    }
                } catch (IOException ie) {
                    System.err.println(
                            "Got an exception while reading splits " + StringUtils.stringifyException(ie));
                    throw new RuntimeException(ie);
                } catch (InterruptedException e) {

                }
            }
        };
        samplerReader[i].start();
    }
    FileSystem outFs = partFile.getFileSystem(conf);
    DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10,
            outFs.getDefaultBlockSize(partFile));
    for (int i = 0; i < samples; i++) {
        try {
            samplerReader[i].join();
            if (threadGroup.getThrowable() != null) {
                throw threadGroup.getThrowable();
            }
        } catch (InterruptedException e) {
        }
    }
    for (Text split : sampler.createPartitions(partitions)) {
        split.write(writer);
    }
    writer.close();
    long t3 = System.currentTimeMillis();
    System.out.println("Computing parititions took " + (t3 - t2) + "ms");
}

From source file:com.ikanow.aleph2.analytics.hadoop.assets.ObjectNodeWritableComparable.java

License:Apache License

@Override
public void write(DataOutput out) throws IOException {
    final Text text = new Text();
    text.set(_object_node.toString());/*from  w ww  . j av a 2  s.  c  om*/

    text.write(out);
}

From source file:com.marcolotz.MRComponents.SerializerConverter.java

License:Creative Commons License

/***
 * This is a refactored manner to serialize a String. Basically it transform
 * it into a Text and then writes it in the DataOutput.
 * /*ww  w.  j a  va  2s .co  m*/
 * @param outputString
 * @param out
 * @throws IOException
 */
public static void writeString(String outputString, DataOutput out) throws IOException {
    Text writtenString;

    /* Prevents a null string exception when writting it to the output */
    if (outputString == null) {
        writtenString = new Text("null");
    } else {
        writtenString = new Text(outputString);
    }
    writtenString.write(out);
}

From source file:com.marklogic.contentpump.RDFWritable.java

License:Apache License

@Override
public void write(DataOutput out) throws IOException {
    if (graphUri == null) {
        out.writeByte(0);/*from   w  ww . j  a v a2  s .c o  m*/
    } else {
        out.writeByte(1);
        Text t = new Text(graphUri);
        t.write(out);
    }
    out.writeByte(type);
    if (value instanceof Text) {
        ((Text) value).write(out);
    } else if (value instanceof MarkLogicNode) {
        ((MarkLogicNode) value).write(out);
    } else if (value instanceof BytesWritable) {
        ((BytesWritable) value).write(out);
    }
    //serialize permissions
    if (permissions == null) {
        out.writeByte(0);
    } else {
        out.writeByte(permissions.length);
        for (int i = 0; i < permissions.length; i++) {
            Text role = new Text(permissions[i].getRole());
            Text cap = new Text(permissions[i].getCapability().toString());
            role.write(out);
            cap.write(out);
        }
    }
}

From source file:com.marklogic.mapreduce.MarkLogicInputSplit.java

License:Apache License

@Override
public void write(DataOutput out) throws IOException {
    out.writeLong(start);// www .  ja  va2s. c o  m
    out.writeLong(length);
    Text forestIdText = new Text(forestId.toByteArray());
    forestIdText.write(out);
    if (hostName != null && hostName.length > 0) {
        Text.writeString(out, hostName[0]);
    }
    out.writeBoolean(isLastSplit);
}

From source file:com.phantom.hadoop.examples.terasort.TeraInputFormat.java

License:Apache License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * //from   www. j  a  v  a  2 s .  c om
 * @param job
 *            the job to sample
 * @param partFile
 *            where to write the output file to
 * @throws Throwable
 *             if something goes wrong
 */
public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable {
    long t1 = System.currentTimeMillis();
    Configuration conf = job.getConfiguration();
    final TeraInputFormat inFormat = new TeraInputFormat();
    final TextSampler sampler = new TextSampler();
    int partitions = job.getNumReduceTasks();
    long sampleSize = conf.getLong(SAMPLE_SIZE, 100000);
    final List<InputSplit> splits = inFormat.getSplits(job);
    long t2 = System.currentTimeMillis();
    System.out.println("Computing input splits took " + (t2 - t1) + "ms");
    int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size());
    System.out.println("Sampling " + samples + " splits of " + splits.size());
    final long recordsPerSample = sampleSize / samples;
    final int sampleStep = splits.size() / samples;
    Thread[] samplerReader = new Thread[samples];
    SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        final int idx = i;
        samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) {
            {
                setDaemon(true);
            }

            public void run() {
                long records = 0;
                try {
                    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
                            new TaskAttemptID());
                    RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx),
                            context);
                    reader.initialize(splits.get(sampleStep * idx), context);
                    while (reader.nextKeyValue()) {
                        sampler.addKey(new Text(reader.getCurrentKey()));
                        records += 1;
                        if (recordsPerSample <= records) {
                            break;
                        }
                    }
                } catch (IOException ie) {
                    System.err.println(
                            "Got an exception while reading splits " + StringUtils.stringifyException(ie));
                    throw new RuntimeException(ie);
                } catch (InterruptedException e) {

                }
            }
        };
        samplerReader[i].start();
    }
    FileSystem outFs = partFile.getFileSystem(conf);
    DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10,
            outFs.getDefaultBlockSize(partFile));
    for (int i = 0; i < samples; i++) {
        try {
            samplerReader[i].join();
            if (threadGroup.getThrowable() != null) {
                throw threadGroup.getThrowable();
            }
        } catch (InterruptedException e) {
        }
    }
    for (Text split : sampler.createPartitions(partitions)) {
        split.write(writer);
    }
    writer.close();
    long t3 = System.currentTimeMillis();
    System.out.println("Computing parititions took " + (t3 - t2) + "ms");
}

From source file:com.yolodata.tbana.hadoop.mapred.util.ArrayListTextWritable.java

License:Open Source License

@Override
public void write(DataOutput dataoutput) throws IOException {
    dataoutput.writeInt(this.size());
    for (Text element : this) {
        element.write(dataoutput);
    }/*from   w  w  w. j  a  v  a 2s  .c  om*/
}