List of usage examples for org.apache.hadoop.io Text write
@Override public void write(DataOutput out) throws IOException
From source file:hadoop_serialize.java
License:Apache License
public static void main(String[] args) throws java.io.IOException { //System.err.println("Writing byte stream to stdout"); DataOutputStream os = new DataOutputStream(System.out); //System.err.println("Writing a sequence of numbers"); //System.err.println("WritableUtils.writeVInt: 42, 4242, 424242, 42424242, -42"); WritableUtils.writeVInt(os, 42);//w w w . j a va2 s. co m WritableUtils.writeVInt(os, 4242); WritableUtils.writeVInt(os, 424242); WritableUtils.writeVInt(os, 42424242); WritableUtils.writeVInt(os, -42); //System.err.println("WritableUtils.writeVLong 42, 424242, 4242424242"); WritableUtils.writeVLong(os, 42L); WritableUtils.writeVLong(os, 424242L); WritableUtils.writeVLong(os, 4242424242L); // //System.err.println("WritableUtils.writeString \"hello world\""); WritableUtils.writeString(os, "hello world"); WritableUtils.writeString(os, "oggi \u00e8 gioved\u00ec"); // This file contains: writeVInt of 42, 4242, 424242, 42424242, -42; writeVLong of 42, 424242, 4242424242; 2 writeString calls //System.err.println("Text.write \"I'm a Text object\""); Text t = new Text("\u00e0 Text object"); t.write(os); os.close(); }
From source file:com.chinamobile.bcbsp.bspcontroller.HDFSOperator.java
License:Apache License
/** * serialize the WorkerManager Status//from w w w . j a va2s . com * @param uri * BSPfile output uri * @param wmlist * workerManager list * @param staffsLoadFactor * for load balancing * @throws IOException * exceptions during handle BSPfile. */ public void serializeWorkerManagerStatus(String uri, Collection<WorkerManagerStatus> wmlist, double staffsLoadFactor) throws IOException { // synchronized(HDFSOperator.class){ // this.fs = FileSystem.get(URI.create(uri),conf); // this.isFSExist(); Double loadfactor = staffsLoadFactor; // Path path = new Path(uri); // out = fs.create(path); bspout = new BSPFSDataOutputStreamImpl(uri, 1, conf); Text factor = new Text(loadfactor.toString()); // factor.write(out); // for (WorkerManagerStatus wmStatus : wmlist) { // wmStatus.write(out); // } // out.flush(); // out.close(); factor.write(bspout.getOut()); for (WorkerManagerStatus wmStatus : wmlist) { wmStatus.write(bspout.getOut()); } bspout.flush(); bspout.close(); // fs.close(); // } }
From source file:com.cloudera.cdk.morphline.hadoop.rcfile.ReadRCFileTest.java
License:Apache License
private void createRCFile(final String fileName, final int numRecords, final int maxColumns) throws IOException { // Write the sequence file SequenceFile.Metadata metadata = getMetadataForRCFile(); Configuration conf = new Configuration(); conf.set(RCFile.COLUMN_NUMBER_CONF_STR, String.valueOf(maxColumns)); Path inputFile = dfs.makeQualified(new Path(testDirectory, fileName)); RCFile.Writer rcFileWriter = new RCFile.Writer(dfs, conf, inputFile, null, metadata, null); for (int row = 0; row < numRecords; row++) { BytesRefArrayWritable dataWrite = new BytesRefArrayWritable(maxColumns); dataWrite.resetValid(maxColumns); for (int column = 0; column < maxColumns; column++) { Text sampleText = new Text("ROW-NUM:" + row + ", COLUMN-NUM:" + column); ByteArrayDataOutput dataOutput = ByteStreams.newDataOutput(); sampleText.write(dataOutput); dataWrite.set(column, new BytesRefWritable(dataOutput.toByteArray())); }/*from w w w.j a va2 s . c o m*/ rcFileWriter.append(dataWrite); } rcFileWriter.close(); }
From source file:com.cloudera.recordservice.examples.terasort.TeraInputFormat.java
License:Apache License
/** * Use the input splits to take samples of the input and generate sample * keys. By default reads 100,000 keys from 10 locations in the input, sorts * them and picks N-1 keys to generate N equally sized partitions. * @param job the job to sample//from w ww. j a v a 2 s . co m * @param partFile where to write the output file to * @throws Throwable if something goes wrong */ public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable { long t1 = System.currentTimeMillis(); Configuration conf = job.getConfiguration(); final TeraInputFormat inFormat = new TeraInputFormat(); final TextSampler sampler = new TextSampler(); int partitions = job.getNumReduceTasks(); long sampleSize = conf.getLong(SAMPLE_SIZE, 100000); final List<InputSplit> splits = inFormat.getSplits(job); long t2 = System.currentTimeMillis(); System.out.println("Computing input splits took " + (t2 - t1) + "ms"); int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size()); System.out.println("Sampling " + samples + " splits of " + splits.size()); final long recordsPerSample = sampleSize / samples; final int sampleStep = splits.size() / samples; Thread[] samplerReader = new Thread[samples]; SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group"); // take N samples from different parts of the input for (int i = 0; i < samples; ++i) { final int idx = i; samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) { { setDaemon(true); } @Override public void run() { long records = 0; try { TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx), context); reader.initialize(splits.get(sampleStep * idx), context); while (reader.nextKeyValue()) { sampler.addKey(new Text(reader.getCurrentKey())); records += 1; if (recordsPerSample <= records) { break; } } } catch (IOException ie) { System.err.println( "Got an exception while reading splits " + StringUtils.stringifyException(ie)); throw new RuntimeException(ie); } catch (InterruptedException e) { } } }; samplerReader[i].start(); } FileSystem outFs = partFile.getFileSystem(conf); DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10, outFs.getDefaultBlockSize(partFile)); for (int i = 0; i < samples; i++) { try { samplerReader[i].join(); if (threadGroup.getThrowable() != null) { throw threadGroup.getThrowable(); } } catch (InterruptedException e) { } } for (Text split : sampler.createPartitions(partitions)) { split.write(writer); } writer.close(); long t3 = System.currentTimeMillis(); System.out.println("Computing parititions took " + (t3 - t2) + "ms"); }
From source file:com.ikanow.aleph2.analytics.hadoop.assets.ObjectNodeWritableComparable.java
License:Apache License
@Override public void write(DataOutput out) throws IOException { final Text text = new Text(); text.set(_object_node.toString());/*from w ww . j av a 2 s. c om*/ text.write(out); }
From source file:com.marcolotz.MRComponents.SerializerConverter.java
License:Creative Commons License
/*** * This is a refactored manner to serialize a String. Basically it transform * it into a Text and then writes it in the DataOutput. * /*ww w. j a va 2s .co m*/ * @param outputString * @param out * @throws IOException */ public static void writeString(String outputString, DataOutput out) throws IOException { Text writtenString; /* Prevents a null string exception when writting it to the output */ if (outputString == null) { writtenString = new Text("null"); } else { writtenString = new Text(outputString); } writtenString.write(out); }
From source file:com.marklogic.contentpump.RDFWritable.java
License:Apache License
@Override public void write(DataOutput out) throws IOException { if (graphUri == null) { out.writeByte(0);/*from w ww . j a v a2 s .c o m*/ } else { out.writeByte(1); Text t = new Text(graphUri); t.write(out); } out.writeByte(type); if (value instanceof Text) { ((Text) value).write(out); } else if (value instanceof MarkLogicNode) { ((MarkLogicNode) value).write(out); } else if (value instanceof BytesWritable) { ((BytesWritable) value).write(out); } //serialize permissions if (permissions == null) { out.writeByte(0); } else { out.writeByte(permissions.length); for (int i = 0; i < permissions.length; i++) { Text role = new Text(permissions[i].getRole()); Text cap = new Text(permissions[i].getCapability().toString()); role.write(out); cap.write(out); } } }
From source file:com.marklogic.mapreduce.MarkLogicInputSplit.java
License:Apache License
@Override public void write(DataOutput out) throws IOException { out.writeLong(start);// www . ja va2s. c o m out.writeLong(length); Text forestIdText = new Text(forestId.toByteArray()); forestIdText.write(out); if (hostName != null && hostName.length > 0) { Text.writeString(out, hostName[0]); } out.writeBoolean(isLastSplit); }
From source file:com.phantom.hadoop.examples.terasort.TeraInputFormat.java
License:Apache License
/** * Use the input splits to take samples of the input and generate sample * keys. By default reads 100,000 keys from 10 locations in the input, sorts * them and picks N-1 keys to generate N equally sized partitions. * //from www. j a v a 2 s . c om * @param job * the job to sample * @param partFile * where to write the output file to * @throws Throwable * if something goes wrong */ public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable { long t1 = System.currentTimeMillis(); Configuration conf = job.getConfiguration(); final TeraInputFormat inFormat = new TeraInputFormat(); final TextSampler sampler = new TextSampler(); int partitions = job.getNumReduceTasks(); long sampleSize = conf.getLong(SAMPLE_SIZE, 100000); final List<InputSplit> splits = inFormat.getSplits(job); long t2 = System.currentTimeMillis(); System.out.println("Computing input splits took " + (t2 - t1) + "ms"); int samples = Math.min(conf.getInt(NUM_PARTITIONS, 10), splits.size()); System.out.println("Sampling " + samples + " splits of " + splits.size()); final long recordsPerSample = sampleSize / samples; final int sampleStep = splits.size() / samples; Thread[] samplerReader = new Thread[samples]; SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group"); // take N samples from different parts of the input for (int i = 0; i < samples; ++i) { final int idx = i; samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) { { setDaemon(true); } public void run() { long records = 0; try { TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(), new TaskAttemptID()); RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx), context); reader.initialize(splits.get(sampleStep * idx), context); while (reader.nextKeyValue()) { sampler.addKey(new Text(reader.getCurrentKey())); records += 1; if (recordsPerSample <= records) { break; } } } catch (IOException ie) { System.err.println( "Got an exception while reading splits " + StringUtils.stringifyException(ie)); throw new RuntimeException(ie); } catch (InterruptedException e) { } } }; samplerReader[i].start(); } FileSystem outFs = partFile.getFileSystem(conf); DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10, outFs.getDefaultBlockSize(partFile)); for (int i = 0; i < samples; i++) { try { samplerReader[i].join(); if (threadGroup.getThrowable() != null) { throw threadGroup.getThrowable(); } } catch (InterruptedException e) { } } for (Text split : sampler.createPartitions(partitions)) { split.write(writer); } writer.close(); long t3 = System.currentTimeMillis(); System.out.println("Computing parititions took " + (t3 - t2) + "ms"); }
From source file:com.yolodata.tbana.hadoop.mapred.util.ArrayListTextWritable.java
License:Open Source License
@Override public void write(DataOutput dataoutput) throws IOException { dataoutput.writeInt(this.size()); for (Text element : this) { element.write(dataoutput); }/*from w w w. j a v a 2s .c om*/ }