Example usage for org.apache.hadoop.io Text write

List of usage examples for org.apache.hadoop.io Text write

Introduction

In this page you can find the example usage for org.apache.hadoop.io Text write.

Prototype

@Override
public void write(DataOutput out) throws IOException 

Source Link

Document

serialize write this object to out length uses zero-compressed encoding

Usage

From source file:org.apache.gobblin.runtime.JobState.java

License:Apache License

public void write(DataOutput out, boolean writeTasks, boolean writePreviousWorkUnitStates) throws IOException {
    Text text = new Text();
    text.set(this.jobName);
    text.write(out);
    text.set(this.jobId);
    text.write(out);/*from   w ww.  j  a va2  s  .  c o  m*/
    out.writeLong(this.startTime);
    out.writeLong(this.endTime);
    out.writeLong(this.duration);
    text.set(this.state.name());
    text.write(out);
    out.writeInt(this.taskCount);
    if (writeTasks) {
        out.writeInt(this.taskStates.size() + this.skippedTaskStates.size());
        for (TaskState taskState : this.taskStates.values()) {
            taskState.write(out);
        }
        for (TaskState taskState : this.skippedTaskStates.values()) {
            taskState.write(out);
        }
    } else {
        out.writeInt(0);
    }
    super.write(out, writePreviousWorkUnitStates);
}

From source file:org.apache.hadoop.examples.terasort.TeraInputFormat.java

License:Apache License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * @param job the job to sample/*from   www.ja v  a2s. c  om*/
 * @param partFile where to write the output file to
 * @throws Throwable if something goes wrong
 */
public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable {
    long t1 = System.currentTimeMillis();
    Configuration conf = job.getConfiguration();
    final TeraInputFormat inFormat = new TeraInputFormat();
    final TextSampler sampler = new TextSampler();
    int partitions = job.getNumReduceTasks();
    long sampleSize = conf.getLong(TeraSortConfigKeys.SAMPLE_SIZE.key(),
            TeraSortConfigKeys.DEFAULT_SAMPLE_SIZE);
    final List<InputSplit> splits = inFormat.getSplits(job);
    long t2 = System.currentTimeMillis();
    System.out.println("Computing input splits took " + (t2 - t1) + "ms");
    int samples = Math.min(
            conf.getInt(TeraSortConfigKeys.NUM_PARTITIONS.key(), TeraSortConfigKeys.DEFAULT_NUM_PARTITIONS),
            splits.size());
    System.out.println("Sampling " + samples + " splits of " + splits.size());
    final long recordsPerSample = sampleSize / samples;
    final int sampleStep = splits.size() / samples;
    Thread[] samplerReader = new Thread[samples];
    SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        final int idx = i;
        samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) {
            {
                setDaemon(true);
            }

            public void run() {
                long records = 0;
                try {
                    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
                            new TaskAttemptID());
                    RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx),
                            context);
                    reader.initialize(splits.get(sampleStep * idx), context);
                    while (reader.nextKeyValue()) {
                        sampler.addKey(new Text(reader.getCurrentKey()));
                        records += 1;
                        if (recordsPerSample <= records) {
                            break;
                        }
                    }
                } catch (IOException ie) {
                    System.err.println(
                            "Got an exception while reading splits " + StringUtils.stringifyException(ie));
                    throw new RuntimeException(ie);
                } catch (InterruptedException e) {

                }
            }
        };
        samplerReader[i].start();
    }
    FileSystem outFs = partFile.getFileSystem(conf);
    DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10,
            outFs.getDefaultBlockSize(partFile));
    for (int i = 0; i < samples; i++) {
        try {
            samplerReader[i].join();
            if (threadGroup.getThrowable() != null) {
                throw threadGroup.getThrowable();
            }
        } catch (InterruptedException e) {
        }
    }
    for (Text split : sampler.createPartitions(partitions)) {
        split.write(writer);
    }
    writer.close();
    long t3 = System.currentTimeMillis();
    System.out.println("Computing parititions took " + (t3 - t2) + "ms");
}

From source file:org.apache.hama.bsp.message.TestMessageIO.java

License:Apache License

public void testNonSpillBuffer() throws Exception {

    SpillingDataOutputBuffer outputBuffer = new SpillingDataOutputBuffer();
    Text text = new Text("Testing the spillage of spilling buffer");

    for (int i = 0; i < 100; ++i) {
        text.write(outputBuffer);
    }//from w  w  w  .  ja v a 2 s  .  c om
    assertTrue(outputBuffer != null);
    assertTrue(outputBuffer.size() == 4000);
    assertFalse(outputBuffer.hasSpilled());
    outputBuffer.close();
}

From source file:org.apache.hama.bsp.message.TestMessageIO.java

License:Apache License

public void testSpillBuffer() throws Exception {

    Configuration conf = new HamaConfiguration();
    String fileName = System.getProperty("java.io.tmpdir") + File.separatorChar
            + new BigInteger(128, new SecureRandom()).toString(32);
    SpilledDataProcessor processor = new WriteSpilledDataProcessor(fileName);
    processor.init(conf);//from ww  w .  ja  v a2  s .c  o m
    SpillingDataOutputBuffer outputBuffer = new SpillingDataOutputBuffer(2, 1024, 1024, true, processor);
    Text text = new Text("Testing the spillage of spilling buffer");
    for (int i = 0; i < 100; ++i) {
        text.write(outputBuffer);
    }

    assertTrue(outputBuffer != null);
    assertTrue(outputBuffer.size() == 4000);
    assertTrue(outputBuffer.hasSpilled());
    File f = new File(fileName);
    assertTrue(f.exists());
    assertTrue(f.delete());
    outputBuffer.close();

}

From source file:org.apache.hama.bsp.message.TestMessageIO.java

License:Apache License

public void testSpillInputStream() throws Exception {

    File f = null;//from  w w w  .  j av  a2s  . c om
    try {
        String fileName = System.getProperty("java.io.tmpdir") + File.separatorChar
                + "testSpillInputStream.txt";
        Configuration conf = new HamaConfiguration();
        SpilledDataProcessor processor = new WriteSpilledDataProcessor(fileName);
        processor.init(conf);
        SpillingDataOutputBuffer outputBuffer = new SpillingDataOutputBuffer(2, 1024, 1024, true, processor);
        Text text = new Text("Testing the spillage of spilling buffer");
        for (int i = 0; i < 100; ++i) {
            text.write(outputBuffer);
            outputBuffer.markRecordEnd();
        }

        assertTrue(outputBuffer != null);
        assertTrue(outputBuffer.size() == 4000);
        assertTrue(outputBuffer.hasSpilled());
        f = new File(fileName);
        assertTrue(f.exists());
        outputBuffer.close();
        assertTrue(f.length() == 4000);// + (4000 / 1024 + 1) * 4));

        SpilledDataInputBuffer inputBuffer = outputBuffer.getInputStreamToRead(fileName);

        for (int i = 0; i < 100; ++i) {
            text.readFields(inputBuffer);
            assertTrue("Testing the spillage of spilling buffer".equals(text.toString()));
            text.clear();
        }

        try {
            text.readFields(inputBuffer);
            assertTrue(false);
        } catch (EOFException eof) {
            assertTrue(true);
        }

        inputBuffer.close();
        inputBuffer.completeReading(false);
        assertTrue(f.exists());
        inputBuffer.completeReading(true);
        assertFalse(f.exists());
    } finally {
        if (f != null) {
            if (f.exists()) {
                f.delete();
            }
        }
    }

}

From source file:org.apache.hama.bsp.TestCheckpoint.java

License:Apache License

public void testPeerRecovery() throws Exception {
    Configuration config = new Configuration();
    config.set(SyncServiceFactory.SYNC_CLIENT_CLASS, TempSyncClient.class.getName());
    config.set(Constants.FAULT_TOLERANCE_CLASS, AsyncRcvdMsgCheckpointImpl.class.getName());
    config.setBoolean(Constants.CHECKPOINT_ENABLED, true);
    int port = BSPNetUtils.getFreePort(12502);
    LOG.info("Got port = " + port);

    config.set(Constants.PEER_HOST, Constants.DEFAULT_PEER_HOST);
    config.setInt(Constants.PEER_PORT, port);

    config.set("bsp.output.dir", "/tmp/hama-test_out");
    config.set("bsp.local.dir", "/tmp/hama-test");

    FileSystem dfs = FileSystem.get(config);
    BSPJob job = new BSPJob(new BSPJobID("checkpttest", 1), "/tmp");
    TaskAttemptID taskId = new TaskAttemptID(new TaskID(job.getJobID(), 1), 1);

    TestMessageManager messenger = new TestMessageManager();
    PeerSyncClient syncClient = SyncServiceFactory.getPeerSyncClient(config);

    Text txtMessage = new Text("data");
    String writeKey = "job_checkpttest_0001/checkpoint/1/";

    Writable[] writableArr = new Writable[2];
    writableArr[0] = new LongWritable(3L);
    writableArr[1] = new LongWritable(5L);
    ArrayWritable arrWritable = new ArrayWritable(LongWritable.class);
    arrWritable.set(writableArr);//  w w w. j a  v  a2 s. com
    syncClient.storeInformation(writeKey, arrWritable, true, null);

    String writePath = "checkpoint/job_checkpttest_0001/3/1";
    FSDataOutputStream out = dfs.create(new Path(writePath));
    for (int i = 0; i < 5; ++i) {
        out.writeUTF(txtMessage.getClass().getCanonicalName());
        txtMessage.write(out);
    }
    out.close();

    @SuppressWarnings("unused")
    BSPPeer<?, ?, ?, ?, Text> bspTask = new TestBSPPeer(job, config, taskId, new Counters(), 3L,
            (BSPPeerSyncClient) syncClient, messenger, TaskStatus.State.RECOVERING);

    BSPMessageBundle<Text> bundleRead = messenger.getLoopbackBundle();
    assertEquals(5, bundleRead.size());

    String recoveredMsg = bundleRead.iterator().next().toString();
    assertEquals(recoveredMsg, "data");
    dfs.delete(new Path("checkpoint"), true);
}

From source file:org.apache.mahout.common.StringTuple.java

License:Apache License

@Override
public void write(DataOutput out) throws IOException {
    out.writeInt(tuple.size());/*  w ww  . j a v  a2s . c  o  m*/
    Text value = new Text();
    for (String entry : tuple) {
        value.set(entry);
        value.write(out);
    }
}

From source file:org.goldenorb.io.checkpoint.CheckPointDataTest.java

License:Apache License

/**
 * Tests the CheckPointDataOutput class by writing several different types of Writables to the checkpoint.
 * //  w w w .j a  v a 2s  . c o m
 * @throws Exception
 */
@Test
public void testCheckpointOutput() throws Exception {

    int superStep = 0;
    int partition = 0;
    OrbConfiguration orbConf = new OrbConfiguration();
    orbConf.set("fs.default.name", "hdfs://localhost:" + cluster.getNameNodePort());
    orbConf.setJobNumber("0");
    orbConf.setFileOutputPath("test");

    CheckPointDataOutput checkpointOutput = new CheckPointDataOutput(orbConf, superStep, partition);

    IntWritable intOutput = new IntWritable(4);
    intOutput.write(checkpointOutput);

    LongWritable longOutput = new LongWritable(9223372036854775807L);
    longOutput.write(checkpointOutput);

    Text textOutput = new Text("test");
    textOutput.write(checkpointOutput);

    FloatWritable floatOutput = new FloatWritable(3.14159F);
    floatOutput.write(checkpointOutput);

    checkpointOutput.close();

    assertThat(checkpointOutput, notNullValue());
}

From source file:org.smartfrog.services.hadoop.junitmr.JUnitMRUtils.java

License:Open Source License

static void writeText(DataOutput out, String s) throws IOException {
    Text t = new Text(s);
    t.write(out);
}

From source file:org.terrier.structures.serialization.TestFixedSizeTextFactory.java

License:Mozilla Public License

static byte[] getBytes(FixedSizeWriteableFactory<Text> factory, String s) throws Exception {
    ByteArrayOutputStream buffer = new ByteArrayOutputStream();
    DataOutputStream dos = new DataOutputStream(buffer);
    Text t = getText(factory, s);
    t.write(dos);
    return buffer.toByteArray();
}