Example usage for org.apache.hadoop.io LongWritable set

List of usage examples for org.apache.hadoop.io LongWritable set

Introduction

In this page you can find the example usage for org.apache.hadoop.io LongWritable set.

Prototype

public void set(long value) 

Source Link

Document

Set the value of this LongWritable.

Usage

From source file:com.jfolson.hive.serde.RTypedBytesWritableInput.java

License:Apache License

public LongWritable readLong(LongWritable lw) throws IOException {
    if (lw == null) {
        lw = new LongWritable();
    }//from  w  ww .  jav  a 2s  .  co m
    lw.set(in.readLong());
    return lw;
}

From source file:com.kylinolap.job.hadoop.cube.RowKeyDistributionCheckerMapper.java

License:Apache License

@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
    LongWritable outputValue = new LongWritable();
    for (Entry<Text, Long> kv : resultMap.entrySet()) {
        outputValue.set(kv.getValue());
        context.write(kv.getKey(), outputValue);
    }/*from w w  w.  j  a  v  a 2 s . c om*/
}

From source file:com.linkedin.cubert.plan.physical.PerfProfiler.java

License:Open Source License

private void resetOperatorTime() {
    LongWritable[] curCumulativeOperatorTime = cumulativeOperatorTime[currentPassIndex];

    for (LongWritable singleOperatorTime : curCumulativeOperatorTime) {
        if (singleOperatorTime != null)
            singleOperatorTime.set(0);
    }//from  w ww  .j  a v a  2s .  c om
}

From source file:com.microsoft.canberra.tf.util.SparseVectorRecordReader.java

License:Open Source License

@Override
public boolean next(final LongWritable recordId, final Document data) throws IOException {

    if (!this.textRecordReader.next(this.offset, this.text)) {
        return false;
    }/*from   w w w.  j ava2 s .c  o m*/

    // LOG.log(Level.FINEST, "RecordReader: {0} :: {1}", new Object[] { this.offset, this.text });

    final String[] fields = this.text.toString().split("\\s+");

    if (fields.length <= 1) {
        return false;
    }

    // First element contains the unique document ID:
    recordId.set(Long.parseLong(fields[0]));

    data.clearTokens(recordId.get(), fields.length - 2);

    // Second element contains the number of unique items in the document:
    // assert(Integer.parseInt(fields[1]) == fields.length - 2);

    for (int i = 2; i < fields.length; ++i) {
        // Each element is colon-separated pair of integers, item_hash:item_frequency
        final String[] pair = fields[i].split(":", 2);
        data.add(Integer.valueOf(pair[0]), Double.valueOf(pair[1]));
    }

    // LOG.log(Level.FINEST, "RecordReader: {0} :: {1}", new Object[] { recordId, data });

    return true;
}

From source file:com.pinterest.secor.uploader.UploaderTest.java

License:Apache License

public void testTrimFiles() throws Exception {
    Mockito.when(mZookeeperConnector.getCommittedOffsetCount(mTopicPartition)).thenReturn(21L);
    Mockito.when(mOffsetTracker.setCommittedOffsetCount(mTopicPartition, 21L)).thenReturn(20L);
    Mockito.when(mOffsetTracker.getLastSeenOffset(mTopicPartition)).thenReturn(21L);

    HashSet<LogFilePath> logFilePaths = new HashSet<LogFilePath>();
    logFilePaths.add(mLogFilePath);/*from  w w w. j ava2  s  . c om*/
    Mockito.when(mFileRegistry.getPaths(mTopicPartition)).thenReturn(logFilePaths);

    PowerMockito.mockStatic(FileSystem.class);

    SequenceFile.Reader reader = mUploader.getReader();
    Mockito.doReturn(LongWritable.class).when(reader).getKeyClass();
    Mockito.doReturn(BytesWritable.class).when(reader).getValueClass();

    Mockito.when(reader.next(Mockito.any(Writable.class), Mockito.any(Writable.class)))
            .thenAnswer(new Answer<Boolean>() {
                private int mCallCount = 0;

                @Override
                public Boolean answer(InvocationOnMock invocation) throws Throwable {
                    if (mCallCount == 2) {
                        return false;
                    }
                    LongWritable key = (LongWritable) invocation.getArguments()[0];
                    key.set(20 + mCallCount++);
                    return true;
                }
            });

    PowerMockito.mockStatic(IdUtil.class);
    Mockito.when(IdUtil.getLocalMessageDir()).thenReturn("some_message_dir");

    SequenceFile.Writer writer = Mockito.mock(SequenceFile.Writer.class);
    LogFilePath dstLogFilePath = new LogFilePath("/some_parent_dir/some_message_dir",
            "/some_parent_dir/some_message_dir/some_topic/some_partition/"
                    + "some_other_partition/10_0_00000000000000000021");
    Mockito.when(mFileRegistry.getOrCreateWriter(dstLogFilePath, null)).thenReturn(writer);

    mUploader.applyPolicy();

    Mockito.verify(writer).append(Mockito.any(LongWritable.class), Mockito.any(BytesWritable.class));
    Mockito.verify(mFileRegistry).deletePath(mLogFilePath);
}

From source file:com.ricemap.spateDB.operations.Sampler.java

License:Apache License

/**
 * Records as many records as wanted until the total size of the text
 * serialization of sampled records exceed the given limit
 * @param fs//from   w w  w . ja v a  2 s  .  co m
 * @param files
 * @param total_size
 * @param output
 * @param inObj
 * @return
 * @throws IOException
 */
public static <T extends TextSerializable, O extends TextSerializable> int sampleLocalWithSize(FileSystem fs,
        Path[] files, long total_size, long seed, final ResultCollector<O> output, final T inObj,
        final O outObj) throws IOException {
    int average_record_size = 1024; // A wild guess for record size
    final LongWritable current_sample_size = new LongWritable();
    int sample_count = 0;

    final ResultCollector<T> converter = createConverter(output, inObj, outObj);

    final ResultCollector<Text2> counter = new ResultCollector<Text2>() {
        @Override
        public void collect(Text2 r) {
            current_sample_size.set(current_sample_size.get() + r.getLength());
            inObj.fromText(r);
            converter.collect(inObj);
        }
    };

    while (current_sample_size.get() < total_size) {
        int count = (int) ((total_size - current_sample_size.get()) / average_record_size);
        if (count < 10)
            count = 10;

        sample_count += sampleLocalByCount(fs, files, count, seed, counter, new Text2(), new Text2());
        // Change the seed to get different sample next time.
        // Still we need to ensure that repeating the program will generate
        // the same value
        seed += sample_count;
        // Update average_records_size
        average_record_size = (int) (current_sample_size.get() / sample_count);
    }
    return sample_count;
}

From source file:com.ricemap.spateDB.operations.Tail.java

License:Apache License

/**
 * Reads a maximum of n lines from the stream starting from its current
 * position and going backward./*from   w ww.j  av a 2 s.c  o  m*/
 * 
 * @param in - An input stream. It'll be scanned from its current position
 *   backward till position 0
 * @param n - Maximum number of lines to return
 * @param stockObject - An object used to deserialize lines read. It can
 *   be set to <code>null</code> if output is also <code>null</code>. In this
 *   case, nothing is reported to the output.
 * @param output - An output collector used to report lines read.
 * @return - The position of the beginning of the earliest line read from
 *   buffer.
 * @throws IOException
 */
public static <T extends TextSerializable> long tail(FSDataInputStream in, int n, T stockObject,
        ResultCollector<T> output) throws IOException {
    int lines_read = 0;
    long end = in.getPos();
    long offset_of_last_eol = end;
    long last_read_byte = end;

    LongWritable line_offset = new LongWritable();
    Text read_line = new Text();
    Text remainder_from_last_buffer = new Text();
    byte[] buffer = new byte[4096];

    while (last_read_byte > 0 && lines_read < n) {
        // Read next chunk from the back
        long first_byte_to_read = (last_read_byte - 1) - (last_read_byte - 1) % buffer.length;
        in.seek(first_byte_to_read);
        int bytes_to_read = (int) (last_read_byte - first_byte_to_read);
        in.read(buffer, 0, bytes_to_read);
        last_read_byte = first_byte_to_read;

        // Iterate over bytes in this buffer
        int i_last_byte_consumed_in_buffer = bytes_to_read;
        int i_last_byte_examined_in_buffer = bytes_to_read;
        while (i_last_byte_examined_in_buffer > 0 && lines_read < n) {
            byte byte_examined = buffer[--i_last_byte_examined_in_buffer];
            if (byte_examined == '\n' || byte_examined == '\r') {
                // Found an end of line character
                // Report this to output unless it's empty
                long offset_of_this_eol = first_byte_to_read + i_last_byte_examined_in_buffer;
                if (offset_of_last_eol - offset_of_this_eol > 1) {
                    if (output != null) {
                        read_line.clear();
                        // +1 is to skip the EOL at the beginning
                        read_line.append(buffer, i_last_byte_examined_in_buffer + 1,
                                i_last_byte_consumed_in_buffer - (i_last_byte_examined_in_buffer + 1));
                        // Also append bytes remaining from last buffer
                        if (remainder_from_last_buffer.getLength() > 0) {
                            read_line.append(remainder_from_last_buffer.getBytes(), 0,
                                    remainder_from_last_buffer.getLength());
                        }
                        line_offset.set(offset_of_this_eol + 1);
                        stockObject.fromText(read_line);
                        output.collect(stockObject);
                    }
                    lines_read++;
                    remainder_from_last_buffer.clear();
                }
                i_last_byte_consumed_in_buffer = i_last_byte_examined_in_buffer;
                offset_of_last_eol = offset_of_this_eol;
            }
        }
        if (i_last_byte_consumed_in_buffer > 0) {
            // There are still some bytes not consumed in buffer
            if (remainder_from_last_buffer.getLength() == 0) {
                // Store whatever is remaining in remainder_from_last_buffer
                remainder_from_last_buffer.append(buffer, 0, i_last_byte_consumed_in_buffer);
            } else {
                // Prepend remaining bytes to Text
                Text t = new Text();
                t.append(buffer, 0, i_last_byte_consumed_in_buffer);
                t.append(remainder_from_last_buffer.getBytes(), 0, remainder_from_last_buffer.getLength());
                remainder_from_last_buffer = t;
            }
        }
    }

    if (lines_read < n && remainder_from_last_buffer.getLength() > 0) {
        // There is still one last line needs to be reported
        lines_read++;
        if (output != null) {
            read_line = remainder_from_last_buffer;
            line_offset.set(0);
            stockObject.fromText(read_line);
            output.collect(stockObject);
        }
        offset_of_last_eol = -1;
    }

    return offset_of_last_eol + 1;
}

From source file:com.scaleoutsoftware.soss.hserver.Test_NamedMapInputFormat_Writables.java

License:Apache License

public static void main(String argv[]) throws Exception {
    InvocationGrid grid = HServerJob.getInvocationGridBuilder("MyGrid" + System.currentTimeMillis())
            .addJar("C:\\development\\hserver\\bundle.jar").load();

    // Create input and output map
    NamedMap<LongWritable, Text> inputMap = NamedMapFactory.getMap("inputMap",
            new WritableSerializer<LongWritable>(LongWritable.class), new WritableSerializer<Text>(Text.class));
    NamedMap<Text, LongWritable> outputMap = NamedMapFactory.getMap("outputMap",
            new WritableSerializer<Text>(Text.class), new WritableSerializer<LongWritable>(LongWritable.class));
    inputMap.clear();//from  w  ww. ja  v  a2s .  c om
    outputMap.clear();

    Thread.sleep(10000);

    // Puts 100k strings into the input map
    int numstrings = argv.length == 1 ? new Integer(argv[0]) : 30000000;
    BulkLoader<LongWritable, Text> put = inputMap.getBulkLoader();
    Text a = new Text("The quick brown fox jumps over the lazy dog");
    Text b = new Text("Few black taxis drive up major roads on quiet hazy nights");
    LongWritable num = new LongWritable();
    for (int i = 0; i < numstrings; i++) {
        num.set(i);
        put.put(num, i % 2 == 0 ? a : b);
    }
    put.close();
    Thread.sleep(10000);

    // Sets up the Hadoop job, which uses GridInputFormat and GridOutputFormat
    Configuration conf = new Configuration();
    HServerJob job = new HServerJob(conf, "Sample job");
    job.setGrid(grid);
    job.setMapperClass(WordCountMapper.class);
    job.setReducerClass(WordCountReducer.class);
    job.setCombinerClass(WordCountCombiner.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LongWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    job.setInputFormatClass(NamedMapInputFormat.class);
    job.setOutputFormatClass(GridOutputFormat.class);

    // Setting the input format properties: input object class and input cache
    NamedMapInputFormat.setNamedMap(job, inputMap);

    // Setting the output cache
    GridOutputFormat.setNamedMap(job, outputMap);

    // Run the Hadoop job, to find the strings containing "foo" and put them in the output cache
    long time = System.currentTimeMillis();
    job.waitForCompletion(true);
    System.out.println("Job done in " + (System.currentTimeMillis() - time));

    // Validation
    for (Text key : outputMap.keySet()) {
        System.out.println(key + " was used " + outputMap.get(key) + "times");
    }
    grid.unload();
}

From source file:com.tgam.hadoop.mapred.EscapedLineRecordReader.java

License:Apache License

/** Read a line. */
public synchronized boolean next(LongWritable key, Text value) throws IOException {

    while (pos < end) {
        key.set(pos);

        int newSize = in.readLine(value, maxLineLength,
                Math.max((int) Math.min(Integer.MAX_VALUE, end - pos), maxLineLength));
        if (newSize == 0) {
            return false;
        }//from   w w w.j av  a  2 s .  c om
        pos += newSize;
        if (newSize < maxLineLength) {
            return true;
        }

        // line too long. try again
        LOG.info("Skipped line of size " + newSize + " at pos " + (pos - newSize));
    }

    return false;
}

From source file:com.thinkbiganalytics.inputformat.hadoop.mapred.OmnitureDataFileRecordReader.java

License:Open Source License

public boolean next(LongWritable key, Text value) throws IOException {
    while (pos <= end) {
        key.set(pos);

        int newSize = lineReader.readLine(value, maxLineLength,
                Math.max((int) Math.min(Integer.MAX_VALUE, end - pos), maxLineLength));
        pos += newSize;//www . j  a v  a2 s. co m

        if (newSize == 0) {
            return false;
        }

        String line = value.toString().replaceAll("\\\\\\\\", "").replaceAll("\\\\\t", " ")
                .replaceAll("\\\\(\n|\r|\r\n)", " ");
        value.set(line);

        if (newSize < maxLineLength) {
            return true;
        }

        // line too long. try again
        LOG.info("Skipped line of size " + newSize + " at pos " + (pos - newSize));
    }

    return false;
}