List of usage examples for org.apache.hadoop.io Text write
@Override public void write(DataOutput out) throws IOException
From source file:org.apache.gobblin.runtime.JobState.java
License:Apache License
public void write(DataOutput out, boolean writeTasks, boolean writePreviousWorkUnitStates) throws IOException { Text text = new Text(); text.set(this.jobName); text.write(out); text.set(this.jobId); text.write(out);/*from w ww. j a va2 s . c o m*/ out.writeLong(this.startTime); out.writeLong(this.endTime); out.writeLong(this.duration); text.set(this.state.name()); text.write(out); out.writeInt(this.taskCount); if (writeTasks) { out.writeInt(this.taskStates.size() + this.skippedTaskStates.size()); for (TaskState taskState : this.taskStates.values()) { taskState.write(out); } for (TaskState taskState : this.skippedTaskStates.values()) { taskState.write(out); } } else { out.writeInt(0); } super.write(out, writePreviousWorkUnitStates); }
From source file:org.apache.hadoop.examples.terasort.TeraInputFormat.java
License:Apache License
/** * Use the input splits to take samples of the input and generate sample * keys. By default reads 100,000 keys from 10 locations in the input, sorts * them and picks N-1 keys to generate N equally sized partitions. * @param job the job to sample/*from www.ja v a2s. c om*/ * @param partFile where to write the output file to * @throws Throwable if something goes wrong */ public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable { long t1 = System.currentTimeMillis(); Configuration conf = job.getConfiguration(); final TeraInputFormat inFormat = new TeraInputFormat(); final TextSampler sampler = new TextSampler(); int partitions = job.getNumReduceTasks(); long sampleSize = conf.getLong(TeraSortConfigKeys.SAMPLE_SIZE.key(), TeraSortConfigKeys.DEFAULT_SAMPLE_SIZE); final List<InputSplit> splits = inFormat.getSplits(job); long t2 = System.currentTimeMillis(); System.out.println("Computing input splits took " + (t2 - t1) + "ms"); int samples = Math.min( conf.getInt(TeraSortConfigKeys.NUM_PARTITIONS.key(), TeraSortConfigKeys.DEFAULT_NUM_PARTITIONS), splits.size()); System.out.println("Sampling " + samples + " splits of " + splits.size()); final long recordsPerSample = sampleSize / samples; final int sampleStep = splits.size() / samples; Thread[] samplerReader = new Thread[samples]; SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group"); // take N samples from different parts of the input for (int i = 0; i < samples; ++i) { final int idx = i; samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) { { setDaemon(true); } public void run() { long records = 0; try { TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx), context); reader.initialize(splits.get(sampleStep * idx), context); while (reader.nextKeyValue()) { sampler.addKey(new Text(reader.getCurrentKey())); records += 1; if (recordsPerSample <= records) { break; } } } catch (IOException ie) { System.err.println( "Got an exception while reading splits " + StringUtils.stringifyException(ie)); throw new RuntimeException(ie); } catch (InterruptedException e) { } } }; samplerReader[i].start(); } FileSystem outFs = partFile.getFileSystem(conf); DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10, outFs.getDefaultBlockSize(partFile)); for (int i = 0; i < samples; i++) { try { samplerReader[i].join(); if (threadGroup.getThrowable() != null) { throw threadGroup.getThrowable(); } } catch (InterruptedException e) { } } for (Text split : sampler.createPartitions(partitions)) { split.write(writer); } writer.close(); long t3 = System.currentTimeMillis(); System.out.println("Computing parititions took " + (t3 - t2) + "ms"); }
From source file:org.apache.hama.bsp.message.TestMessageIO.java
License:Apache License
public void testNonSpillBuffer() throws Exception { SpillingDataOutputBuffer outputBuffer = new SpillingDataOutputBuffer(); Text text = new Text("Testing the spillage of spilling buffer"); for (int i = 0; i < 100; ++i) { text.write(outputBuffer); }//from w w w . ja v a 2 s . c om assertTrue(outputBuffer != null); assertTrue(outputBuffer.size() == 4000); assertFalse(outputBuffer.hasSpilled()); outputBuffer.close(); }
From source file:org.apache.hama.bsp.message.TestMessageIO.java
License:Apache License
public void testSpillBuffer() throws Exception { Configuration conf = new HamaConfiguration(); String fileName = System.getProperty("java.io.tmpdir") + File.separatorChar + new BigInteger(128, new SecureRandom()).toString(32); SpilledDataProcessor processor = new WriteSpilledDataProcessor(fileName); processor.init(conf);//from ww w . ja v a2 s .c o m SpillingDataOutputBuffer outputBuffer = new SpillingDataOutputBuffer(2, 1024, 1024, true, processor); Text text = new Text("Testing the spillage of spilling buffer"); for (int i = 0; i < 100; ++i) { text.write(outputBuffer); } assertTrue(outputBuffer != null); assertTrue(outputBuffer.size() == 4000); assertTrue(outputBuffer.hasSpilled()); File f = new File(fileName); assertTrue(f.exists()); assertTrue(f.delete()); outputBuffer.close(); }
From source file:org.apache.hama.bsp.message.TestMessageIO.java
License:Apache License
public void testSpillInputStream() throws Exception { File f = null;//from w w w . j av a2s . c om try { String fileName = System.getProperty("java.io.tmpdir") + File.separatorChar + "testSpillInputStream.txt"; Configuration conf = new HamaConfiguration(); SpilledDataProcessor processor = new WriteSpilledDataProcessor(fileName); processor.init(conf); SpillingDataOutputBuffer outputBuffer = new SpillingDataOutputBuffer(2, 1024, 1024, true, processor); Text text = new Text("Testing the spillage of spilling buffer"); for (int i = 0; i < 100; ++i) { text.write(outputBuffer); outputBuffer.markRecordEnd(); } assertTrue(outputBuffer != null); assertTrue(outputBuffer.size() == 4000); assertTrue(outputBuffer.hasSpilled()); f = new File(fileName); assertTrue(f.exists()); outputBuffer.close(); assertTrue(f.length() == 4000);// + (4000 / 1024 + 1) * 4)); SpilledDataInputBuffer inputBuffer = outputBuffer.getInputStreamToRead(fileName); for (int i = 0; i < 100; ++i) { text.readFields(inputBuffer); assertTrue("Testing the spillage of spilling buffer".equals(text.toString())); text.clear(); } try { text.readFields(inputBuffer); assertTrue(false); } catch (EOFException eof) { assertTrue(true); } inputBuffer.close(); inputBuffer.completeReading(false); assertTrue(f.exists()); inputBuffer.completeReading(true); assertFalse(f.exists()); } finally { if (f != null) { if (f.exists()) { f.delete(); } } } }
From source file:org.apache.hama.bsp.TestCheckpoint.java
License:Apache License
public void testPeerRecovery() throws Exception { Configuration config = new Configuration(); config.set(SyncServiceFactory.SYNC_CLIENT_CLASS, TempSyncClient.class.getName()); config.set(Constants.FAULT_TOLERANCE_CLASS, AsyncRcvdMsgCheckpointImpl.class.getName()); config.setBoolean(Constants.CHECKPOINT_ENABLED, true); int port = BSPNetUtils.getFreePort(12502); LOG.info("Got port = " + port); config.set(Constants.PEER_HOST, Constants.DEFAULT_PEER_HOST); config.setInt(Constants.PEER_PORT, port); config.set("bsp.output.dir", "/tmp/hama-test_out"); config.set("bsp.local.dir", "/tmp/hama-test"); FileSystem dfs = FileSystem.get(config); BSPJob job = new BSPJob(new BSPJobID("checkpttest", 1), "/tmp"); TaskAttemptID taskId = new TaskAttemptID(new TaskID(job.getJobID(), 1), 1); TestMessageManager messenger = new TestMessageManager(); PeerSyncClient syncClient = SyncServiceFactory.getPeerSyncClient(config); Text txtMessage = new Text("data"); String writeKey = "job_checkpttest_0001/checkpoint/1/"; Writable[] writableArr = new Writable[2]; writableArr[0] = new LongWritable(3L); writableArr[1] = new LongWritable(5L); ArrayWritable arrWritable = new ArrayWritable(LongWritable.class); arrWritable.set(writableArr);// w w w. j a v a2 s. com syncClient.storeInformation(writeKey, arrWritable, true, null); String writePath = "checkpoint/job_checkpttest_0001/3/1"; FSDataOutputStream out = dfs.create(new Path(writePath)); for (int i = 0; i < 5; ++i) { out.writeUTF(txtMessage.getClass().getCanonicalName()); txtMessage.write(out); } out.close(); @SuppressWarnings("unused") BSPPeer<?, ?, ?, ?, Text> bspTask = new TestBSPPeer(job, config, taskId, new Counters(), 3L, (BSPPeerSyncClient) syncClient, messenger, TaskStatus.State.RECOVERING); BSPMessageBundle<Text> bundleRead = messenger.getLoopbackBundle(); assertEquals(5, bundleRead.size()); String recoveredMsg = bundleRead.iterator().next().toString(); assertEquals(recoveredMsg, "data"); dfs.delete(new Path("checkpoint"), true); }
From source file:org.apache.mahout.common.StringTuple.java
License:Apache License
@Override public void write(DataOutput out) throws IOException { out.writeInt(tuple.size());/* w ww . j a v a2s . c o m*/ Text value = new Text(); for (String entry : tuple) { value.set(entry); value.write(out); } }
From source file:org.goldenorb.io.checkpoint.CheckPointDataTest.java
License:Apache License
/** * Tests the CheckPointDataOutput class by writing several different types of Writables to the checkpoint. * // w w w .j a v a 2s . c o m * @throws Exception */ @Test public void testCheckpointOutput() throws Exception { int superStep = 0; int partition = 0; OrbConfiguration orbConf = new OrbConfiguration(); orbConf.set("fs.default.name", "hdfs://localhost:" + cluster.getNameNodePort()); orbConf.setJobNumber("0"); orbConf.setFileOutputPath("test"); CheckPointDataOutput checkpointOutput = new CheckPointDataOutput(orbConf, superStep, partition); IntWritable intOutput = new IntWritable(4); intOutput.write(checkpointOutput); LongWritable longOutput = new LongWritable(9223372036854775807L); longOutput.write(checkpointOutput); Text textOutput = new Text("test"); textOutput.write(checkpointOutput); FloatWritable floatOutput = new FloatWritable(3.14159F); floatOutput.write(checkpointOutput); checkpointOutput.close(); assertThat(checkpointOutput, notNullValue()); }
From source file:org.smartfrog.services.hadoop.junitmr.JUnitMRUtils.java
License:Open Source License
static void writeText(DataOutput out, String s) throws IOException { Text t = new Text(s); t.write(out); }
From source file:org.terrier.structures.serialization.TestFixedSizeTextFactory.java
License:Mozilla Public License
static byte[] getBytes(FixedSizeWriteableFactory<Text> factory, String s) throws Exception { ByteArrayOutputStream buffer = new ByteArrayOutputStream(); DataOutputStream dos = new DataOutputStream(buffer); Text t = getText(factory, s); t.write(dos); return buffer.toByteArray(); }