List of usage examples for org.apache.hadoop.io Text write
@Override public void write(DataOutput out) throws IOException
From source file:gobblin.compat.TextSerializerTest.java
License:Apache License
@Test public void testDeserialize() throws IOException { // Use Hadoop's serializer, verify our deserializer can read the string back for (String textToSerialize : textsToSerialize) { ByteArrayOutputStream bOs = new ByteArrayOutputStream(); DataOutputStream dataOutputStream = new DataOutputStream(bOs); Text hadoopText = new Text(); hadoopText.set(textToSerialize); hadoopText.write(dataOutputStream); dataOutputStream.close();/*from w w w . ja v a2 s. com*/ ByteArrayInputStream bIn = new ByteArrayInputStream(bOs.toByteArray()); DataInputStream dataInputStream = new DataInputStream(bIn); String deserializedString = TextSerializer.readTextAsString(dataInputStream); Assert.assertEquals(deserializedString, textToSerialize); } }
From source file:gobblin.runtime.JobState.java
License:Apache License
public void write(DataOutput out, boolean writeTasks) throws IOException { Text text = new Text(); text.set(this.jobName); text.write(out); text.set(this.jobId); text.write(out);//from ww w. j a va2 s .c o m out.writeLong(this.startTime); out.writeLong(this.endTime); out.writeLong(this.duration); text.set(this.state.name()); text.write(out); out.writeInt(this.taskCount); if (writeTasks) { out.writeInt(this.taskStates.size() + this.skippedTaskStates.size()); for (TaskState taskState : this.taskStates.values()) { taskState.write(out); } for (TaskState taskState : this.skippedTaskStates.values()) { taskState.write(out); } } else { out.writeInt(0); } super.write(out); }
From source file:gobblin.runtime.TaskState.java
License:Apache License
@Override public void write(DataOutput out) throws IOException { Text text = new Text(); text.set(this.jobId); text.write(out); text.set(this.taskId); text.write(out);/*w w w . ja va 2 s . c o m*/ out.writeLong(this.startTime); out.writeLong(this.endTime); out.writeLong(this.duration); super.write(out); }
From source file:info.halo9pan.word2vec.hadoop.mr.SortInputFormat.java
License:Apache License
/** * Use the input splits to take samples of the input and generate sample * keys. By default reads 100,000 keys from 10 locations in the input, sorts * them and picks N-1 keys to generate N equally sized partitions. * /*from w w w.j a v a 2 s .c o m*/ * @param job * the job to sample * @param partFile * where to write the output file to * @throws Throwable * if something goes wrong */ public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable { long t1 = System.currentTimeMillis(); Configuration conf = job.getConfiguration(); final SortInputFormat inFormat = new SortInputFormat(); final TextSampler sampler = new TextSampler(); int partitions = job.getNumReduceTasks(); long sampleSize = conf.getLong(SAMPLE_SIZE, 100000); final List<InputSplit> splits = inFormat.getSplits(job); long t2 = System.currentTimeMillis(); System.out.println("Computing input splits took " + (t2 - t1) + "ms"); int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size()); System.out.println("Sampling " + samples + " splits of " + splits.size()); final long recordsPerSample = sampleSize / samples; final int sampleStep = splits.size() / samples; Thread[] samplerReader = new Thread[samples]; SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group"); // take N samples from different parts of the input for (int i = 0; i < samples; ++i) { final int idx = i; samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) { { setDaemon(true); } public void run() { long records = 0; try { TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx), context); reader.initialize(splits.get(sampleStep * idx), context); while (reader.nextKeyValue()) { sampler.addKey(new Text(reader.getCurrentKey())); records += 1; if (recordsPerSample <= records) { break; } } } catch (IOException ie) { System.err.println( "Got an exception while reading splits " + StringUtils.stringifyException(ie)); throw new RuntimeException(ie); } catch (InterruptedException e) { } } }; samplerReader[i].start(); } FileSystem outFs = partFile.getFileSystem(conf); DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10, outFs.getDefaultBlockSize(partFile)); for (int i = 0; i < samples; i++) { try { samplerReader[i].join(); if (threadGroup.getThrowable() != null) { throw threadGroup.getThrowable(); } } catch (InterruptedException e) { } } for (Text split : sampler.createPartitions(partitions)) { split.write(writer); } writer.close(); long t3 = System.currentTimeMillis(); System.out.println("Computing parititions took " + (t3 - t2) + "ms"); }
From source file:it.uniroma1.bdc.tesi.piccioli.giraphstandalone.ksimplecycle.TextAndHashes.java
@Override public void write(DataOutput out) throws IOException { int size;/* w w w . j a v a 2 s .c om*/ int sizeSet; value.write(out); this.generatedHash = new HashSet<Integer>(); this.seenHash = new HashMap<Text, Set<Integer>>(); size = this.generatedHash.size(); out.writeInt(size); for (Integer item : generatedHash) { out.writeInt(item); } size = this.seenHash.size(); out.writeInt(size);//scrivo numero di chiavi nella MAP for (Text itemKey : seenHash.keySet()) { itemKey.write(out);//Scrivo chiave sizeSet = this.seenHash.get(itemKey).size();//Numero elementi per la chiave out.writeInt(sizeSet);//Scrivo numero elementi for (Integer item : seenHash.get(itemKey)) { out.writeInt(item);//Scrivo elementi } } }
From source file:it.uniroma1.bdc.tesi.piccioli.giraphstandalone.message.CustomMessageWithAggregatedPath.java
@Override public void write(DataOutput out) throws IOException { out.writeInt(visitedVertex.size());//from w w w. ja v a 2 s .co m for (Set<Text> item : visitedVertex) { out.writeInt(item.size()); for (Text itemText : item) { itemText.write(out); } } }
From source file:it.uniroma1.bdc.tesi.piccioli.giraphstandalone.message.CustomMessageWithPath.java
@Override public void write(DataOutput out) throws IOException { out.writeInt(visitedVertex.size());/*from ww w .java 2 s . co m*/ for (Text item : visitedVertex) { item.write(out); } sourceVertex.write(out); }
From source file:net.darkseraphim.webanalytics.hadoop.csv.Row.java
License:Apache License
public void write(DataOutput dataoutput) throws IOException { dataoutput.writeInt(this.size()); for (Text element : this) { element.write(dataoutput); }/*from www . j a v a 2s . co m*/ }
From source file:org.acaro.graffiti.processing.ResultSet.java
License:Apache License
@Override public void write(DataOutput output) throws IOException { output.writeInt(results.size());/* www.java2 s . c o m*/ for (Text result : results) { result.write(output); } }
From source file:org.acaro.graffiti.processing.Vertex.java
License:Apache License
@Override public void write(DataOutput out) throws IOException { vertexId.write(out);/*w ww .j av a 2 s . c o m*/ out.writeInt(labelledOutEdgeMap.size()); for (Entry<Text, Set<Text>> label : labelledOutEdgeMap.entrySet()) { label.getKey().write(out); out.writeInt(label.getValue().size()); for (Text dest : label.getValue()) { dest.write(out); } } out.writeInt(msgList.size()); for (Message msg : msgList) { msg.write(out); } out.writeBoolean(halt); }