List of usage examples for org.apache.hadoop.io LongWritable set
public void set(long value)
From source file:com.jfolson.hive.serde.RTypedBytesWritableInput.java
License:Apache License
public LongWritable readLong(LongWritable lw) throws IOException { if (lw == null) { lw = new LongWritable(); }//from w ww . jav a 2s . co m lw.set(in.readLong()); return lw; }
From source file:com.kylinolap.job.hadoop.cube.RowKeyDistributionCheckerMapper.java
License:Apache License
@Override protected void cleanup(Context context) throws IOException, InterruptedException { LongWritable outputValue = new LongWritable(); for (Entry<Text, Long> kv : resultMap.entrySet()) { outputValue.set(kv.getValue()); context.write(kv.getKey(), outputValue); }/*from w w w. j a v a 2 s . c om*/ }
From source file:com.linkedin.cubert.plan.physical.PerfProfiler.java
License:Open Source License
private void resetOperatorTime() { LongWritable[] curCumulativeOperatorTime = cumulativeOperatorTime[currentPassIndex]; for (LongWritable singleOperatorTime : curCumulativeOperatorTime) { if (singleOperatorTime != null) singleOperatorTime.set(0); }//from w ww .j a v a 2s . c om }
From source file:com.microsoft.canberra.tf.util.SparseVectorRecordReader.java
License:Open Source License
@Override public boolean next(final LongWritable recordId, final Document data) throws IOException { if (!this.textRecordReader.next(this.offset, this.text)) { return false; }/*from w w w. j ava2 s .c o m*/ // LOG.log(Level.FINEST, "RecordReader: {0} :: {1}", new Object[] { this.offset, this.text }); final String[] fields = this.text.toString().split("\\s+"); if (fields.length <= 1) { return false; } // First element contains the unique document ID: recordId.set(Long.parseLong(fields[0])); data.clearTokens(recordId.get(), fields.length - 2); // Second element contains the number of unique items in the document: // assert(Integer.parseInt(fields[1]) == fields.length - 2); for (int i = 2; i < fields.length; ++i) { // Each element is colon-separated pair of integers, item_hash:item_frequency final String[] pair = fields[i].split(":", 2); data.add(Integer.valueOf(pair[0]), Double.valueOf(pair[1])); } // LOG.log(Level.FINEST, "RecordReader: {0} :: {1}", new Object[] { recordId, data }); return true; }
From source file:com.pinterest.secor.uploader.UploaderTest.java
License:Apache License
public void testTrimFiles() throws Exception { Mockito.when(mZookeeperConnector.getCommittedOffsetCount(mTopicPartition)).thenReturn(21L); Mockito.when(mOffsetTracker.setCommittedOffsetCount(mTopicPartition, 21L)).thenReturn(20L); Mockito.when(mOffsetTracker.getLastSeenOffset(mTopicPartition)).thenReturn(21L); HashSet<LogFilePath> logFilePaths = new HashSet<LogFilePath>(); logFilePaths.add(mLogFilePath);/*from w w w. j ava2 s . c om*/ Mockito.when(mFileRegistry.getPaths(mTopicPartition)).thenReturn(logFilePaths); PowerMockito.mockStatic(FileSystem.class); SequenceFile.Reader reader = mUploader.getReader(); Mockito.doReturn(LongWritable.class).when(reader).getKeyClass(); Mockito.doReturn(BytesWritable.class).when(reader).getValueClass(); Mockito.when(reader.next(Mockito.any(Writable.class), Mockito.any(Writable.class))) .thenAnswer(new Answer<Boolean>() { private int mCallCount = 0; @Override public Boolean answer(InvocationOnMock invocation) throws Throwable { if (mCallCount == 2) { return false; } LongWritable key = (LongWritable) invocation.getArguments()[0]; key.set(20 + mCallCount++); return true; } }); PowerMockito.mockStatic(IdUtil.class); Mockito.when(IdUtil.getLocalMessageDir()).thenReturn("some_message_dir"); SequenceFile.Writer writer = Mockito.mock(SequenceFile.Writer.class); LogFilePath dstLogFilePath = new LogFilePath("/some_parent_dir/some_message_dir", "/some_parent_dir/some_message_dir/some_topic/some_partition/" + "some_other_partition/10_0_00000000000000000021"); Mockito.when(mFileRegistry.getOrCreateWriter(dstLogFilePath, null)).thenReturn(writer); mUploader.applyPolicy(); Mockito.verify(writer).append(Mockito.any(LongWritable.class), Mockito.any(BytesWritable.class)); Mockito.verify(mFileRegistry).deletePath(mLogFilePath); }
From source file:com.ricemap.spateDB.operations.Sampler.java
License:Apache License
/** * Records as many records as wanted until the total size of the text * serialization of sampled records exceed the given limit * @param fs//from w w w . ja v a 2 s . co m * @param files * @param total_size * @param output * @param inObj * @return * @throws IOException */ public static <T extends TextSerializable, O extends TextSerializable> int sampleLocalWithSize(FileSystem fs, Path[] files, long total_size, long seed, final ResultCollector<O> output, final T inObj, final O outObj) throws IOException { int average_record_size = 1024; // A wild guess for record size final LongWritable current_sample_size = new LongWritable(); int sample_count = 0; final ResultCollector<T> converter = createConverter(output, inObj, outObj); final ResultCollector<Text2> counter = new ResultCollector<Text2>() { @Override public void collect(Text2 r) { current_sample_size.set(current_sample_size.get() + r.getLength()); inObj.fromText(r); converter.collect(inObj); } }; while (current_sample_size.get() < total_size) { int count = (int) ((total_size - current_sample_size.get()) / average_record_size); if (count < 10) count = 10; sample_count += sampleLocalByCount(fs, files, count, seed, counter, new Text2(), new Text2()); // Change the seed to get different sample next time. // Still we need to ensure that repeating the program will generate // the same value seed += sample_count; // Update average_records_size average_record_size = (int) (current_sample_size.get() / sample_count); } return sample_count; }
From source file:com.ricemap.spateDB.operations.Tail.java
License:Apache License
/** * Reads a maximum of n lines from the stream starting from its current * position and going backward./*from w ww.j av a 2 s.c o m*/ * * @param in - An input stream. It'll be scanned from its current position * backward till position 0 * @param n - Maximum number of lines to return * @param stockObject - An object used to deserialize lines read. It can * be set to <code>null</code> if output is also <code>null</code>. In this * case, nothing is reported to the output. * @param output - An output collector used to report lines read. * @return - The position of the beginning of the earliest line read from * buffer. * @throws IOException */ public static <T extends TextSerializable> long tail(FSDataInputStream in, int n, T stockObject, ResultCollector<T> output) throws IOException { int lines_read = 0; long end = in.getPos(); long offset_of_last_eol = end; long last_read_byte = end; LongWritable line_offset = new LongWritable(); Text read_line = new Text(); Text remainder_from_last_buffer = new Text(); byte[] buffer = new byte[4096]; while (last_read_byte > 0 && lines_read < n) { // Read next chunk from the back long first_byte_to_read = (last_read_byte - 1) - (last_read_byte - 1) % buffer.length; in.seek(first_byte_to_read); int bytes_to_read = (int) (last_read_byte - first_byte_to_read); in.read(buffer, 0, bytes_to_read); last_read_byte = first_byte_to_read; // Iterate over bytes in this buffer int i_last_byte_consumed_in_buffer = bytes_to_read; int i_last_byte_examined_in_buffer = bytes_to_read; while (i_last_byte_examined_in_buffer > 0 && lines_read < n) { byte byte_examined = buffer[--i_last_byte_examined_in_buffer]; if (byte_examined == '\n' || byte_examined == '\r') { // Found an end of line character // Report this to output unless it's empty long offset_of_this_eol = first_byte_to_read + i_last_byte_examined_in_buffer; if (offset_of_last_eol - offset_of_this_eol > 1) { if (output != null) { read_line.clear(); // +1 is to skip the EOL at the beginning read_line.append(buffer, i_last_byte_examined_in_buffer + 1, i_last_byte_consumed_in_buffer - (i_last_byte_examined_in_buffer + 1)); // Also append bytes remaining from last buffer if (remainder_from_last_buffer.getLength() > 0) { read_line.append(remainder_from_last_buffer.getBytes(), 0, remainder_from_last_buffer.getLength()); } line_offset.set(offset_of_this_eol + 1); stockObject.fromText(read_line); output.collect(stockObject); } lines_read++; remainder_from_last_buffer.clear(); } i_last_byte_consumed_in_buffer = i_last_byte_examined_in_buffer; offset_of_last_eol = offset_of_this_eol; } } if (i_last_byte_consumed_in_buffer > 0) { // There are still some bytes not consumed in buffer if (remainder_from_last_buffer.getLength() == 0) { // Store whatever is remaining in remainder_from_last_buffer remainder_from_last_buffer.append(buffer, 0, i_last_byte_consumed_in_buffer); } else { // Prepend remaining bytes to Text Text t = new Text(); t.append(buffer, 0, i_last_byte_consumed_in_buffer); t.append(remainder_from_last_buffer.getBytes(), 0, remainder_from_last_buffer.getLength()); remainder_from_last_buffer = t; } } } if (lines_read < n && remainder_from_last_buffer.getLength() > 0) { // There is still one last line needs to be reported lines_read++; if (output != null) { read_line = remainder_from_last_buffer; line_offset.set(0); stockObject.fromText(read_line); output.collect(stockObject); } offset_of_last_eol = -1; } return offset_of_last_eol + 1; }
From source file:com.scaleoutsoftware.soss.hserver.Test_NamedMapInputFormat_Writables.java
License:Apache License
public static void main(String argv[]) throws Exception { InvocationGrid grid = HServerJob.getInvocationGridBuilder("MyGrid" + System.currentTimeMillis()) .addJar("C:\\development\\hserver\\bundle.jar").load(); // Create input and output map NamedMap<LongWritable, Text> inputMap = NamedMapFactory.getMap("inputMap", new WritableSerializer<LongWritable>(LongWritable.class), new WritableSerializer<Text>(Text.class)); NamedMap<Text, LongWritable> outputMap = NamedMapFactory.getMap("outputMap", new WritableSerializer<Text>(Text.class), new WritableSerializer<LongWritable>(LongWritable.class)); inputMap.clear();//from w ww. ja v a2s . c om outputMap.clear(); Thread.sleep(10000); // Puts 100k strings into the input map int numstrings = argv.length == 1 ? new Integer(argv[0]) : 30000000; BulkLoader<LongWritable, Text> put = inputMap.getBulkLoader(); Text a = new Text("The quick brown fox jumps over the lazy dog"); Text b = new Text("Few black taxis drive up major roads on quiet hazy nights"); LongWritable num = new LongWritable(); for (int i = 0; i < numstrings; i++) { num.set(i); put.put(num, i % 2 == 0 ? a : b); } put.close(); Thread.sleep(10000); // Sets up the Hadoop job, which uses GridInputFormat and GridOutputFormat Configuration conf = new Configuration(); HServerJob job = new HServerJob(conf, "Sample job"); job.setGrid(grid); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setCombinerClass(WordCountCombiner.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setInputFormatClass(NamedMapInputFormat.class); job.setOutputFormatClass(GridOutputFormat.class); // Setting the input format properties: input object class and input cache NamedMapInputFormat.setNamedMap(job, inputMap); // Setting the output cache GridOutputFormat.setNamedMap(job, outputMap); // Run the Hadoop job, to find the strings containing "foo" and put them in the output cache long time = System.currentTimeMillis(); job.waitForCompletion(true); System.out.println("Job done in " + (System.currentTimeMillis() - time)); // Validation for (Text key : outputMap.keySet()) { System.out.println(key + " was used " + outputMap.get(key) + "times"); } grid.unload(); }
From source file:com.tgam.hadoop.mapred.EscapedLineRecordReader.java
License:Apache License
/** Read a line. */ public synchronized boolean next(LongWritable key, Text value) throws IOException { while (pos < end) { key.set(pos); int newSize = in.readLine(value, maxLineLength, Math.max((int) Math.min(Integer.MAX_VALUE, end - pos), maxLineLength)); if (newSize == 0) { return false; }//from w w w.j av a 2 s . c om pos += newSize; if (newSize < maxLineLength) { return true; } // line too long. try again LOG.info("Skipped line of size " + newSize + " at pos " + (pos - newSize)); } return false; }
From source file:com.thinkbiganalytics.inputformat.hadoop.mapred.OmnitureDataFileRecordReader.java
License:Open Source License
public boolean next(LongWritable key, Text value) throws IOException { while (pos <= end) { key.set(pos); int newSize = lineReader.readLine(value, maxLineLength, Math.max((int) Math.min(Integer.MAX_VALUE, end - pos), maxLineLength)); pos += newSize;//www . j a v a2 s. co m if (newSize == 0) { return false; } String line = value.toString().replaceAll("\\\\\\\\", "").replaceAll("\\\\\t", " ") .replaceAll("\\\\(\n|\r|\r\n)", " "); value.set(line); if (newSize < maxLineLength) { return true; } // line too long. try again LOG.info("Skipped line of size " + newSize + " at pos " + (pos - newSize)); } return false; }