Example usage for org.apache.hadoop.io LongWritable get

List of usage examples for org.apache.hadoop.io LongWritable get

Introduction

In this page you can find the example usage for org.apache.hadoop.io LongWritable get.

Prototype

public long get() 

Source Link

Document

Return the value of this LongWritable.

Usage

From source file:org.apache.tez.mapreduce.processor.MapUtils.java

License:Apache License

private static InputSplit createInputSplit(FileSystem fs, Path workDir, JobConf job, Path file)
        throws IOException {
    FileInputFormat.setInputPaths(job, workDir);

    LOG.info("Generating data at path: " + file);
    // create a file with length entries
    @SuppressWarnings("deprecation")
    SequenceFile.Writer writer = SequenceFile.createWriter(fs, job, file, LongWritable.class, Text.class);
    try {/* w  ww.  ja  v  a 2  s. c  o m*/
        Random r = new Random(System.currentTimeMillis());
        LongWritable key = new LongWritable();
        Text value = new Text();
        for (int i = 10; i > 0; i--) {
            key.set(r.nextInt(1000));
            value.set(Integer.toString(i));
            writer.append(key, value);
            LOG.info("<k, v> : <" + key.get() + ", " + value + ">");
        }
    } finally {
        writer.close();
    }

    SequenceFileInputFormat<LongWritable, Text> format = new SequenceFileInputFormat<LongWritable, Text>();
    InputSplit[] splits = format.getSplits(job, 1);
    System.err.println("#split = " + splits.length + " ; " + "#locs = " + splits[0].getLocations().length + "; "
            + "loc = " + splits[0].getLocations()[0] + "; " + "off = " + splits[0].getLength() + "; "
            + "file = " + ((FileSplit) splits[0]).getPath());
    return splits[0];
}

From source file:org.apache.tez.mapreduce.processor.reduce.TestReduceProcessor.java

License:Apache License

@Test(timeout = 5000)
public void testReduceProcessor() throws Exception {
    final String dagName = "mrdag0";
    String mapVertexName = MultiStageMRConfigUtil.getInitialMapVertexName();
    String reduceVertexName = MultiStageMRConfigUtil.getFinalReduceVertexName();
    JobConf jobConf = new JobConf(defaultConf);
    setUpJobConf(jobConf);//from  www  . j  a v  a2 s.c  om

    MRHelpers.translateMRConfToTez(jobConf);
    jobConf.setInt(MRJobConfig.APPLICATION_ATTEMPT_ID, 0);

    jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR,
            new Path(workDir, "localized-resources").toUri().toString());
    jobConf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false);

    Path mapInput = new Path(workDir, "map0");
    MapUtils.generateInputSplit(localFs, workDir, jobConf, mapInput);

    InputSpec mapInputSpec = new InputSpec("NullSrcVertex",
            InputDescriptor.create(MRInputLegacy.class.getName())
                    .setUserPayload(UserPayload.create(ByteBuffer.wrap(MRRuntimeProtos.MRInputUserPayloadProto
                            .newBuilder().setConfigurationBytes(TezUtils.createByteStringFromConf(jobConf))
                            .build().toByteArray()))),
            1);
    OutputSpec mapOutputSpec = new OutputSpec("NullDestVertex",
            OutputDescriptor.create(OrderedPartitionedKVOutput.class.getName())
                    .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)),
            1);
    // Run a map

    TestUmbilical testUmbilical = new TestUmbilical();

    LogicalIOProcessorRuntimeTask mapTask = MapUtils.createLogicalTask(localFs, workDir, jobConf, 0, mapInput,
            testUmbilical, dagName, mapVertexName, Collections.singletonList(mapInputSpec),
            Collections.singletonList(mapOutputSpec));

    mapTask.initialize();
    mapTask.run();
    mapTask.close();

    // One VME, One DME
    Assert.assertEquals(2, testUmbilical.getEvents().size());
    Assert.assertEquals(EventType.VERTEX_MANAGER_EVENT, testUmbilical.getEvents().get(0).getEventType());
    Assert.assertEquals(EventType.COMPOSITE_DATA_MOVEMENT_EVENT,
            testUmbilical.getEvents().get(1).getEventType());

    CompositeDataMovementEvent cdmEvent = (CompositeDataMovementEvent) testUmbilical.getEvents().get(1)
            .getEvent();
    Assert.assertEquals(1, cdmEvent.getCount());
    DataMovementEvent dme = cdmEvent.getEvents().iterator().next();
    dme.setTargetIndex(0);

    LOG.info("Starting reduce...");

    JobTokenIdentifier identifier = new JobTokenIdentifier(new Text(dagName));
    JobTokenSecretManager jobTokenSecretManager = new JobTokenSecretManager();
    Token<JobTokenIdentifier> shuffleToken = new Token<JobTokenIdentifier>(identifier, jobTokenSecretManager);
    shuffleToken.setService(identifier.getJobId());

    jobConf.setOutputFormat(SequenceFileOutputFormat.class);
    jobConf.set(MRFrameworkConfigs.TASK_LOCAL_RESOURCE_DIR,
            new Path(workDir, "localized-resources").toUri().toString());
    jobConf.setBoolean(TezRuntimeConfiguration.TEZ_RUNTIME_OPTIMIZE_LOCAL_FETCH, true);
    FileOutputFormat.setOutputPath(jobConf, new Path(workDir, "output"));
    ProcessorDescriptor reduceProcessorDesc = ProcessorDescriptor.create(ReduceProcessor.class.getName())
            .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf));

    InputSpec reduceInputSpec = new InputSpec(mapVertexName,
            InputDescriptor.create(OrderedGroupedInputLegacy.class.getName())
                    .setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)),
            1);
    OutputSpec reduceOutputSpec = new OutputSpec("NullDestinationVertex", OutputDescriptor
            .create(MROutputLegacy.class.getName()).setUserPayload(TezUtils.createUserPayloadFromConf(jobConf)),
            1);

    // Now run a reduce
    TaskSpec taskSpec = new TaskSpec(TezTestUtils.getMockTaskAttemptId(0, 1, 0, 0), dagName, reduceVertexName,
            -1, reduceProcessorDesc, Collections.singletonList(reduceInputSpec),
            Collections.singletonList(reduceOutputSpec), null);

    Map<String, ByteBuffer> serviceConsumerMetadata = new HashMap<String, ByteBuffer>();
    serviceConsumerMetadata.put(ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID,
            ShuffleUtils.convertJobTokenToBytes(shuffleToken));
    Map<String, String> serviceProviderEnvMap = new HashMap<String, String>();
    ByteBuffer shufflePortBb = ByteBuffer.allocate(4).putInt(0, 8000);
    AuxiliaryServiceHelper.setServiceDataIntoEnv(ShuffleUtils.SHUFFLE_HANDLER_SERVICE_ID, shufflePortBb,
            serviceProviderEnvMap);

    LogicalIOProcessorRuntimeTask task = new LogicalIOProcessorRuntimeTask(taskSpec, 0, jobConf,
            new String[] { workDir.toString() }, new TestUmbilical(), serviceConsumerMetadata,
            serviceProviderEnvMap, HashMultimap.<String, String>create(), null, "",
            new ExecutionContextImpl("localhost"), Runtime.getRuntime().maxMemory());

    List<Event> destEvents = new LinkedList<Event>();
    destEvents.add(dme);
    task.initialize();
    OrderedGroupedInputLegacy sortedOut = (OrderedGroupedInputLegacy) task.getInputs().values().iterator()
            .next();
    sortedOut.handleEvents(destEvents);
    task.run();
    task.close();

    // MRTask mrTask = (MRTask)t.getProcessor();
    // TODO NEWTEZ Verify the partitioner has not been created
    // Likely not applicable anymore.
    // Assert.assertNull(mrTask.getPartitioner());

    // Only a task commit happens, hence the data is still in the temporary directory.
    Path reduceOutputDir = new Path(new Path(workDir, "output"),
            "_temporary/0/" + IDConverter.toMRTaskIdForOutput(TezTestUtils.getMockTaskId(0, 1, 0)));

    Path reduceOutputFile = new Path(reduceOutputDir, "part-v001-o000-00000");

    SequenceFile.Reader reader = new SequenceFile.Reader(localFs, reduceOutputFile, jobConf);

    LongWritable key = new LongWritable();
    Text value = new Text();
    long prev = Long.MIN_VALUE;
    while (reader.next(key, value)) {
        if (prev != Long.MIN_VALUE) {
            Assert.assertTrue(prev < key.get());
            prev = key.get();
        }
    }

    reader.close();
}

From source file:org.apache.tez.runtime.library.common.sort.impl.TestTezMerger.java

License:Apache License

private void merge(List<Path> pathList, int mergeFactor, RawComparator rc) throws Exception {
    //Merge datasets
    TezMerger merger = new TezMerger();
    TezRawKeyValueIterator records = merger.merge(defaultConf, localFs, IntWritable.class, LongWritable.class,
            null, false, 0, 1024, pathList.toArray(new Path[pathList.size()]), true, mergeFactor,
            new Path(workDir, "tmp_" + System.nanoTime()), ((rc == null) ? comparator : rc), new Reporter(),
            null, null, null, new Progress());

    //Verify the merged data is correct
    Map<Integer, Integer> dataMap = Maps.newHashMap();
    int pk = -1;//ww  w  .ja va2s .  c o  m
    while (records.next()) {
        DataInputBuffer key = records.getKey();
        DataInputBuffer value = records.getValue();

        IntWritable k = new IntWritable();
        k.readFields(key);
        LongWritable v = new LongWritable();
        v.readFields(value);

        if (records.isSameKey()) {
            LOG.info("\tSame Key : key=" + k.get() + ", val=" + v.get());
            //More than one key should be present in the source data
            assertTrue(verificationDataSet.get(k.get()).size() > 1);
            //Ensure this is same as the previous key we saw
            assertTrue("previousKey=" + pk + ", current=" + k.get(), pk == k.get());
        } else {
            LOG.info("key=" + k.get() + ", val=" + v.get());
        }
        pk = k.get();

        int keyCount = (dataMap.containsKey(k.get())) ? (dataMap.get(k.get()) + 1) : 1;
        dataMap.put(k.get(), keyCount);
    }

    //Verify if the number of distinct entries is the same in source and the test
    assertTrue(
            "dataMap=" + dataMap.keySet().size() + ", verificationSet=" + verificationDataSet.keySet().size(),
            dataMap.keySet().size() == verificationDataSet.keySet().size());

    //Verify with source data
    for (Integer key : verificationDataSet.keySet()) {
        assertTrue(
                "Data size for " + key + " not matching with source; dataSize:" + dataMap.get(key).intValue()
                        + ", source:" + verificationDataSet.get(key).size(),
                dataMap.get(key).intValue() == verificationDataSet.get(key).size());
    }

    //Verify if every key has the same number of repeated items in the source dataset as well
    for (Map.Entry<Integer, Integer> entry : dataMap.entrySet()) {
        assertTrue(entry.getKey() + "", verificationDataSet.get(entry.getKey()).size() == entry.getValue());
    }

    LOG.info("******************");
    verificationDataSet.clear();
}

From source file:org.apache.tez.runtime.library.common.sort.impl.TestTezMerger.java

License:Apache License

/**
 * Generate data set for ifile.  Create repeated keys if needed.
 *
 * @param keyCount    approximate number of keys to be created
 * @param repeatCount number of times a key should be repeated
 * @return/* ww  w.  ja v  a 2 s  .  c  o  m*/
 */
static TreeMultimap<Integer, Long> createDataForIFile(int keyCount, int repeatCount) {
    TreeMultimap<Integer, Long> dataSet = TreeMultimap.create();
    Random rnd = new Random();
    for (int i = 0; i < keyCount; i++) {
        if (repeatCount > 0 && (rnd.nextInt(keyCount) % 2 == 0)) {
            //repeat this key
            for (int j = 0; j < repeatCount; j++) {
                IntWritable key = new IntWritable(rnd.nextInt(keyCount));
                LongWritable value = new LongWritable(System.nanoTime());
                dataSet.put(key.get(), value.get());
            }
            i += repeatCount;
            LOG.info("Repeated key count=" + (repeatCount));
        } else {
            IntWritable key = new IntWritable(rnd.nextInt(keyCount));
            LongWritable value = new LongWritable(System.nanoTime());
            dataSet.put(key.get(), value.get());
        }
    }
    for (Integer key : dataSet.keySet()) {
        for (Long value : dataSet.get(key)) {
            LOG.info("Key=" + key + ", val=" + value);
        }
    }
    LOG.info("=============");
    return dataSet;
}

From source file:org.apache.tez.runtime.library.common.writers.TestUnorderedPartitionedKVWriter.java

License:Apache License

private void baseTest(int numRecords, int numPartitions, Set<Integer> skippedPartitions, boolean shouldCompress)
        throws IOException, InterruptedException {
    PartitionerForTest partitioner = new PartitionerForTest();
    ApplicationId appId = ApplicationId.newInstance(10000, 1);
    TezCounters counters = new TezCounters();
    String uniqueId = UUID.randomUUID().toString();
    OutputContext outputContext = createMockOutputContext(counters, appId, uniqueId);

    Configuration conf = createConfiguration(outputContext, IntWritable.class, LongWritable.class,
            shouldCompress, -1);/* w  w w. j ava2 s.  co m*/
    CompressionCodec codec = null;
    if (shouldCompress) {
        codec = new DefaultCodec();
        ((Configurable) codec).setConf(conf);
    }

    int numOutputs = numPartitions;
    long availableMemory = 2048;
    int numRecordsWritten = 0;

    Map<Integer, Multimap<Integer, Long>> expectedValues = new HashMap<Integer, Multimap<Integer, Long>>();
    for (int i = 0; i < numOutputs; i++) {
        expectedValues.put(i, LinkedListMultimap.<Integer, Long>create());
    }

    UnorderedPartitionedKVWriter kvWriter = new UnorderedPartitionedKVWriterForTest(outputContext, conf,
            numOutputs, availableMemory);

    int sizePerBuffer = kvWriter.sizePerBuffer;
    int sizePerRecord = 4 + 8; // IntW + LongW
    int sizePerRecordWithOverhead = sizePerRecord + 12; // Record + META_OVERHEAD

    IntWritable intWritable = new IntWritable();
    LongWritable longWritable = new LongWritable();
    for (int i = 0; i < numRecords; i++) {
        intWritable.set(i);
        longWritable.set(i);
        int partition = partitioner.getPartition(intWritable, longWritable, numOutputs);
        if (skippedPartitions != null && skippedPartitions.contains(partition)) {
            continue;
        }
        expectedValues.get(partition).put(intWritable.get(), longWritable.get());
        kvWriter.write(intWritable, longWritable);
        numRecordsWritten++;
    }
    List<Event> events = kvWriter.close();

    int recordsPerBuffer = sizePerBuffer / sizePerRecordWithOverhead;
    int numExpectedSpills = numRecordsWritten / recordsPerBuffer;

    verify(outputContext, never()).fatalError(any(Throwable.class), any(String.class));

    // Verify the status of the buffers
    if (numExpectedSpills == 0) {
        assertEquals(1, kvWriter.numInitializedBuffers);
    } else {
        assertTrue(kvWriter.numInitializedBuffers > 1);
    }
    assertNull(kvWriter.currentBuffer);
    assertEquals(0, kvWriter.availableBuffers.size());

    // Verify the counters
    TezCounter outputRecordBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES);
    TezCounter outputRecordsCounter = counters.findCounter(TaskCounter.OUTPUT_RECORDS);
    TezCounter outputBytesWithOverheadCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_WITH_OVERHEAD);
    TezCounter fileOutputBytesCounter = counters.findCounter(TaskCounter.OUTPUT_BYTES_PHYSICAL);
    TezCounter spilledRecordsCounter = counters.findCounter(TaskCounter.SPILLED_RECORDS);
    TezCounter additionalSpillBytesWritternCounter = counters
            .findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_WRITTEN);
    TezCounter additionalSpillBytesReadCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILLS_BYTES_READ);
    TezCounter numAdditionalSpillsCounter = counters.findCounter(TaskCounter.ADDITIONAL_SPILL_COUNT);
    assertEquals(numRecordsWritten * sizePerRecord, outputRecordBytesCounter.getValue());
    assertEquals(numRecordsWritten, outputRecordsCounter.getValue());
    assertEquals(numRecordsWritten * sizePerRecordWithOverhead, outputBytesWithOverheadCounter.getValue());
    long fileOutputBytes = fileOutputBytesCounter.getValue();
    if (numRecordsWritten > 0) {
        assertTrue(fileOutputBytes > 0);
        if (!shouldCompress) {
            assertTrue(fileOutputBytes > outputRecordBytesCounter.getValue());
        }
    } else {
        assertEquals(0, fileOutputBytes);
    }
    assertEquals(recordsPerBuffer * numExpectedSpills, spilledRecordsCounter.getValue());
    long additionalSpillBytesWritten = additionalSpillBytesWritternCounter.getValue();
    long additionalSpillBytesRead = additionalSpillBytesReadCounter.getValue();
    if (numExpectedSpills == 0) {
        assertEquals(0, additionalSpillBytesWritten);
        assertEquals(0, additionalSpillBytesRead);
    } else {
        assertTrue(additionalSpillBytesWritten > 0);
        assertTrue(additionalSpillBytesRead > 0);
        if (!shouldCompress) {
            assertTrue(additionalSpillBytesWritten > (recordsPerBuffer * numExpectedSpills * sizePerRecord));
            assertTrue(additionalSpillBytesRead > (recordsPerBuffer * numExpectedSpills * sizePerRecord));
        }
    }
    assertTrue(additionalSpillBytesWritten == additionalSpillBytesRead);
    assertEquals(numExpectedSpills, numAdditionalSpillsCounter.getValue());

    BitSet emptyPartitionBits = null;
    // Verify the event returned
    assertEquals(1, events.size());
    assertTrue(events.get(0) instanceof CompositeDataMovementEvent);
    CompositeDataMovementEvent cdme = (CompositeDataMovementEvent) events.get(0);
    assertEquals(0, cdme.getSourceIndexStart());
    assertEquals(numOutputs, cdme.getCount());
    DataMovementEventPayloadProto eventProto = DataMovementEventPayloadProto
            .parseFrom(ByteString.copyFrom(cdme.getUserPayload()));
    assertFalse(eventProto.hasData());
    if (skippedPartitions == null && numRecordsWritten > 0) {
        assertFalse(eventProto.hasEmptyPartitions());
        emptyPartitionBits = new BitSet(numPartitions);
    } else {
        assertTrue(eventProto.hasEmptyPartitions());
        byte[] emptyPartitions = TezCommonUtils
                .decompressByteStringToByteArray(eventProto.getEmptyPartitions());
        emptyPartitionBits = TezUtilsInternal.fromByteArray(emptyPartitions);
        if (numRecordsWritten == 0) {
            assertEquals(numPartitions, emptyPartitionBits.cardinality());
        } else {
            for (Integer e : skippedPartitions) {
                assertTrue(emptyPartitionBits.get(e));
            }
            assertEquals(skippedPartitions.size(), emptyPartitionBits.cardinality());
        }
    }
    if (emptyPartitionBits.cardinality() != numPartitions) {
        assertEquals(HOST_STRING, eventProto.getHost());
        assertEquals(SHUFFLE_PORT, eventProto.getPort());
        assertEquals(uniqueId, eventProto.getPathComponent());
    } else {
        assertFalse(eventProto.hasHost());
        assertFalse(eventProto.hasPort());
        assertFalse(eventProto.hasPathComponent());
    }

    // Verify the actual data
    TezTaskOutput taskOutput = new TezTaskOutputFiles(conf, uniqueId);
    Path outputFilePath = kvWriter.finalOutPath;
    Path spillFilePath = kvWriter.finalIndexPath;

    if (numRecordsWritten > 0) {
        assertTrue(localFs.exists(outputFilePath));
        assertTrue(localFs.exists(spillFilePath));
    } else {
        return;
    }

    // Special case for 0 records.
    TezSpillRecord spillRecord = new TezSpillRecord(spillFilePath, conf);
    DataInputBuffer keyBuffer = new DataInputBuffer();
    DataInputBuffer valBuffer = new DataInputBuffer();
    IntWritable keyDeser = new IntWritable();
    LongWritable valDeser = new LongWritable();
    for (int i = 0; i < numOutputs; i++) {
        if (skippedPartitions != null && skippedPartitions.contains(i)) {
            continue;
        }
        TezIndexRecord indexRecord = spillRecord.getIndex(i);
        FSDataInputStream inStream = FileSystem.getLocal(conf).open(outputFilePath);
        inStream.seek(indexRecord.getStartOffset());
        IFile.Reader reader = new IFile.Reader(inStream, indexRecord.getPartLength(), codec, null, null, false,
                0, -1);
        while (reader.nextRawKey(keyBuffer)) {
            reader.nextRawValue(valBuffer);
            keyDeser.readFields(keyBuffer);
            valDeser.readFields(valBuffer);
            int partition = partitioner.getPartition(keyDeser, valDeser, numOutputs);
            assertTrue(expectedValues.get(partition).remove(keyDeser.get(), valDeser.get()));
        }
        inStream.close();
    }
    for (int i = 0; i < numOutputs; i++) {
        assertEquals(0, expectedValues.get(i).size());
        expectedValues.remove(i);
    }
    assertEquals(0, expectedValues.size());
}

From source file:org.bgi.flexlab.gaea.tools.mapreduce.vcf.sort.VCFSort.java

License:Open Source License

@Override
protected void reduce(LongWritable key, Iterable<VariantContextWritable> records,
        Reducer<LongWritable, VariantContextWritable, NullWritable, VariantContextWritable>.Context ctx)
        throws IOException, InterruptedException {
    int id = (int) (key.get() >> 40);
    for (VariantContextWritable rec : records)
        //NullWirtable.get()null;
        mos.write(multiOutputs.get(id), NullWritable.get(), rec);
}

From source file:org.bgi.flexlab.gaea.tools.mapreduce.vcfqualitycontrol.variantrecalibratioin.VariantRecalibrationMapper.java

License:Open Source License

@Override
public void map(LongWritable key, VariantContextWritable value, Context context)
        throws IOException, InterruptedException {
    VariantContext vc = value.get();/* w  ww .jav  a  2  s . com*/
    if (!validContext(vc))
        return;

    VariantDatumMessenger datum = new VariantDatumMessenger.Builder(manager, vc, options).decodeAnnotations()
            .setLoc(genomeLocParser).setOriginalQual().setFlagV().setPrior().build();
    if (datum != null) {
        context.write(new IntWritable((int) key.get()), new Text(datum.toString()));
    }
}

From source file:org.commoncrawl.util.JoinValue.java

License:Open Source License

public JoinValue(TextBytes tag, LongWritable value) {
    _tag = tag;
    _type = LONG_TYPE_JOIN_VALUE;
    _longValue = value.get();
}

From source file:org.datavec.hadoop.records.reader.mapfile.index.LongIndexToKey.java

License:Apache License

@Override
public List<Pair<Long, Long>> initialize(MapFile.Reader[] readers, Class<? extends Writable> valueClass)
        throws IOException {

    List<Pair<Long, Long>> l = new ArrayList<>(readers.length);
    for (MapFile.Reader r : readers) {
        //Get the first and last keys:
        long first = -1;
        long last = -1;

        //First key: no method for this for some inexplicable reason :/
        LongWritable k = new LongWritable();
        Writable v = ReflectionUtils.newInstance(valueClass, null);
        boolean hasNext = r.next(k, v);
        if (!hasNext) {
            //This map file is empty - no data
            l.add(new Pair<>(-1L, -1L));
            continue;
        }//from  w w w . ja v a  2s  .  c  om
        first = k.get();

        //Last key: easy
        r.reset();
        r.finalKey(k);
        last = k.get();

        l.add(new Pair<>(first, last));
    }

    //Check that things are actually contiguous:
    List<Pair<Long, Long>> sorted = new ArrayList<>(l.size());
    for (Pair<Long, Long> p : l) {
        if (p.getLeft() >= 0) {
            sorted.add(p);
        }
    }
    Collections.sort(sorted, new Comparator<Pair<Long, Long>>() {
        @Override
        public int compare(Pair<Long, Long> o1, Pair<Long, Long> o2) {
            return Long.compare(o1.getFirst(), o2.getFirst());
        }
    });

    if (sorted.size() == 0) {
        throw new IllegalStateException("Map file is empty - no data available");
    }
    if (sorted.get(0).getFirst() != 0L) {
        throw new UnsupportedOperationException("Minimum key value is not 0: got " + sorted.get(0).getFirst());
    }

    for (int i = 0; i < sorted.size() - 1; i++) {
        long currLast = sorted.get(i).getSecond();
        long nextFirst = sorted.get(i + 1).getFirst();

        if (nextFirst == -1) {
            //Skip empty map file
            continue;
        }

        if (currLast + 1 != nextFirst) {
            throw new IllegalStateException(
                    "Keys are not contiguous between readers: first/last indices (inclusive) " + "are " + sorted
                            + ".\n LongIndexKey assumes unique and contiguous LongWritable keys");
        }
    }

    readerIndices = l;
    return readerIndices;
}

From source file:org.eobjects.hadoopdatacleaner.mapreduce.flatfile.FlatFileMapper.java

License:Open Source License

@Override
public void map(LongWritable key, Text csvLine, final Context context)
        throws IOException, InterruptedException {
    if (key.get() == 0L) {
        context.getConfiguration().set("csv.header.line", csvLine.toString());
        csvParser.parseHeaderRow(csvLine);
    } else {/*from  w  w w .  ja  v  a 2 s .  com*/
        while (context.getConfiguration().get("csv.header.line") == null) {
            // Wait for the header to be read.
        }
        InputRow inputRow = csvParser.prepareRow(csvLine);

        Callback mapperEmitterCallback = new MapperEmitter.Callback() {

            public void write(Text key, SortedMapWritable row) throws IOException, InterruptedException {
                context.write(key, row);

            }
        };

        mapperDelegate.run(inputRow, mapperEmitterCallback);

    }

}