Example usage for org.apache.hadoop.mapreduce RecordReader close

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordReader close.

Prototype

public abstract void close() throws IOException;

Source Link

Document

Close the record reader.

Usage

From source file:org.apache.mnemonic.mapreduce.MneMapreduceChunkDataTest.java

License:Apache License

@Test(enabled = true, dependsOnMethods = { "testWriteChunkData" })
public void testReadChunkData() throws Exception {
    List<String> partfns = new ArrayList<String>();
    long reccnt = 0L;
    long tsize = 0L;
    Checksum cs = new CRC32();
    cs.reset();/*ww w. ja  v  a2  s . c o m*/
    File folder = new File(m_workdir.toString());
    File[] listfiles = folder.listFiles();
    for (int idx = 0; idx < listfiles.length; ++idx) {
        if (listfiles[idx].isFile()
                && listfiles[idx].getName().startsWith(MneConfigHelper.getBaseOutputName(m_conf, null))
                && listfiles[idx].getName().endsWith(MneConfigHelper.DEFAULT_FILE_EXTENSION)) {
            partfns.add(listfiles[idx].getName());
        }
    }
    Collections.sort(partfns); // keep the order for checksum
    for (int idx = 0; idx < partfns.size(); ++idx) {
        System.out.println(String.format("Verifying : %s", partfns.get(idx)));
        FileSplit split = new FileSplit(new Path(m_workdir, partfns.get(idx)), 0, 0L, new String[0]);
        InputFormat<NullWritable, MneDurableInputValue<DurableChunk<?>>> inputFormat = new MneInputFormat<MneDurableInputValue<DurableChunk<?>>, DurableChunk<?>>();
        RecordReader<NullWritable, MneDurableInputValue<DurableChunk<?>>> reader = inputFormat
                .createRecordReader(split, m_tacontext);
        MneDurableInputValue<DurableChunk<?>> dchkval = null;
        while (reader.nextKeyValue()) {
            dchkval = reader.getCurrentValue();
            byte b;
            for (int j = 0; j < dchkval.getValue().getSize(); ++j) {
                b = unsafe.getByte(dchkval.getValue().get() + j);
                cs.update(b);
            }
            tsize += dchkval.getValue().getSize();
            ++reccnt;
        }
        reader.close();
    }
    AssertJUnit.assertEquals(m_reccnt, reccnt);
    AssertJUnit.assertEquals(m_totalsize, tsize);
    AssertJUnit.assertEquals(m_checksum, cs.getValue());
    System.out.println(String.format("The checksum of chunk is %d", m_checksum));
}

From source file:org.apache.mnemonic.mapreduce.MneMapreduceLongDataTest.java

License:Apache License

@Test(enabled = true, dependsOnMethods = { "testWriteLongData" })
public void testReadLongData() throws Exception {
    long sum = 0L;
    long reccnt = 0L;
    File folder = new File(m_workdir.toString());
    File[] listfiles = folder.listFiles();
    for (int idx = 0; idx < listfiles.length; ++idx) {
        if (listfiles[idx].isFile()
                && listfiles[idx].getName().startsWith(MneConfigHelper.getBaseOutputName(m_conf, null))
                && listfiles[idx].getName().endsWith(MneConfigHelper.DEFAULT_FILE_EXTENSION)) {
            System.out.println(String.format("Verifying : %s", listfiles[idx].getName()));
            FileSplit split = new FileSplit(new Path(m_workdir, listfiles[idx].getName()), 0, 0L,
                    new String[0]);
            InputFormat<NullWritable, MneDurableInputValue<Long>> inputFormat = new MneInputFormat<MneDurableInputValue<Long>, Long>();
            RecordReader<NullWritable, MneDurableInputValue<Long>> reader = inputFormat
                    .createRecordReader(split, m_tacontext);
            MneDurableInputValue<Long> mdval = null;
            while (reader.nextKeyValue()) {
                mdval = reader.getCurrentValue();
                sum += mdval.getValue();
                ++reccnt;//w  w  w.  j a v a2 s  . com
            }
            reader.close();
        }
    }
    AssertJUnit.assertEquals(m_sum, sum);
    AssertJUnit.assertEquals(m_reccnt, reccnt);
    System.out.println(String.format("The checksum of long data is %d", sum));
}

From source file:org.apache.mnemonic.mapreduce.MneMapreducePersonDataTest.java

License:Apache License

@Test(enabled = true, dependsOnMethods = { "testWritePersonData" })
public void testReadPersonData() throws Exception {
    long sumage = 0L;
    long reccnt = 0L;
    File folder = new File(m_workdir.toString());
    File[] listfiles = folder.listFiles();
    for (int idx = 0; idx < listfiles.length; ++idx) {
        if (listfiles[idx].isFile()
                && listfiles[idx].getName().startsWith(MneConfigHelper.getBaseOutputName(m_conf, null))
                && listfiles[idx].getName().endsWith(MneConfigHelper.DEFAULT_FILE_EXTENSION)) {
            System.out.println(String.format("Verifying : %s", listfiles[idx].getName()));
            FileSplit split = new FileSplit(new Path(m_workdir, listfiles[idx].getName()), 0, 0L,
                    new String[0]);
            InputFormat<NullWritable, MneDurableInputValue<Person<Long>>> inputFormat = new MneInputFormat<MneDurableInputValue<Person<Long>>, Person<Long>>();
            RecordReader<NullWritable, MneDurableInputValue<Person<Long>>> reader = inputFormat
                    .createRecordReader(split, m_tacontext);
            MneDurableInputValue<Person<Long>> personval = null;
            while (reader.nextKeyValue()) {
                personval = reader.getCurrentValue();
                AssertJUnit.assertTrue(personval.getValue().getAge() < 51);
                sumage += personval.getValue().getAge();
                ++reccnt;/* w w  w . j  a  v a 2s.c o m*/
            }
            reader.close();
        }
    }
    AssertJUnit.assertEquals(m_reccnt, reccnt);
    AssertJUnit.assertEquals(m_sumage, sumage);
    System.out.println(String.format("The checksum of ages is %d", sumage));
}

From source file:org.apache.tinkerpop.gremlin.hadoop.structure.io.HadoopElementIterator.java

License:Apache License

@Override
public void close() {
    try {/*from w  w  w.  ja  v  a2  s .  com*/
        for (final RecordReader reader : this.readers) {
            reader.close();
        }
    } catch (final IOException e) {
        throw new IllegalStateException(e.getMessage(), e);
    }
}

From source file:org.commoncrawl.hadoop.io.mapreduce.ArcFileInputFormatTests.java

License:Apache License

static void validateSplit(FileSystem fs, InputSplit split, List<Pair<Path, List<TestRecord>>> splits,
        RecordReader<Text, BytesWritable> reader) throws IOException, InterruptedException {

    int splitDataIndex = getIndexOfSplit(splits, split);

    Assert.assertTrue(splitDataIndex != -1);

    List<TestRecord> records = splits.get(splitDataIndex).e1;

    int itemIndex = 0;
    // iterate and validate stuff ... 
    while (reader.nextKeyValue()) {
        Text key = reader.getCurrentKey();
        BytesWritable value = reader.getCurrentValue();

        TestRecord testRecord = records.get(itemIndex++);
        // get test key bytes as utf-8 bytes ... 
        byte[] testKeyBytes = testRecord.url.getBytes(Charset.forName("UTF-8"));
        // compare against raw key bytes to validate key is the same (Text's utf-8 mapping code replaces invalid characters 
        // with ?, which causes our test case (which does use invalid characters to from the key, to break.
        Assert.assertTrue(ArcFileReaderTests.compareTo(testKeyBytes, 0, testKeyBytes.length, key.getBytes(), 0,
                key.getLength()) == 0);/*from  w ww. j a  v a 2s.co  m*/
        // retured bytes represent the header(encoded in utf-8), terminated by a \r\n\r\n. The content follows this terminator
        // we search for this specific byte pattern to locate start of content, then compare it against source ... 
        int indexofHeaderTerminator = ByteArrayUtils.indexOf(value.getBytes(), 0, value.getLength(),
                "\r\n\r\n".getBytes());
        indexofHeaderTerminator += 4;
        Assert.assertTrue(ArcFileReaderTests.compareTo(testRecord.data, 0, testRecord.data.length,
                value.getBytes(), indexofHeaderTerminator, testRecord.data.length) == 0);
    }
    reader.close();

    Assert.assertEquals(itemIndex, ArcFileReaderTests.BASIC_TEST_RECORD_COUNT);

    splits.remove(splitDataIndex);

}

From source file:org.commoncrawl.hadoop.io.mapreduce.ArcFileInputFormatTests.java

License:Apache License

static void validateArcFileItemSplit(FileSystem fs, InputSplit split, List<Pair<Path, List<TestRecord>>> splits,
        RecordReader<Text, ArcFileItem> reader) throws IOException, InterruptedException {

    int splitDataIndex = getIndexOfSplit(splits, split);

    Assert.assertTrue(splitDataIndex != -1);

    List<TestRecord> records = splits.get(splitDataIndex).e1;

    int itemIndex = 0;
    // iterate and validate stuff ...
    while (reader.nextKeyValue()) {

        Text key = reader.getCurrentKey();
        ArcFileItem value = reader.getCurrentValue();

        TestRecord testRecord = records.get(itemIndex++);

        // get test key bytes as utf-8 bytes ... 
        byte[] testKeyBytes = testRecord.url.getBytes(Charset.forName("UTF-8"));
        // compare against raw key bytes to validate key is the same (Text's utf-8 mapping code replaces invalid characters 
        // with ?, which causes our test case (which does use invalid characters to from the key, to break.
        Assert.assertTrue(ArcFileReaderTests.compareTo(testKeyBytes, 0, testKeyBytes.length, key.getBytes(), 0,
                key.getLength()) == 0);//  w w  w.ja v  a 2  s . c om
        // retured bytes represent the header(encoded in utf-8), terminated by a \r\n\r\n. The content follows this terminator
        // we search for this specific byte pattern to locate start of content, then compare it against source ... 
        Assert.assertTrue(ArcFileReaderTests.compareTo(testRecord.data, 0, testRecord.data.length,
                value.getContent().getReadOnlyBytes(), value.getContent().getOffset(),
                value.getContent().getCount()) == 0);
        NIOHttpHeaders headers = ArcFileItemUtils.buildHeaderFromArcFileItemHeaders(value.getHeaderItems());
        // validate metadata 
        Assert.assertEquals("text/html", headers.findValue(Constants.ARCFileHeader_ARC_MimeType));
        Assert.assertEquals(value.getArcFilePos(), testRecord.streamPos);
        Assert.assertEquals(value.getArcFileSize(), testRecord.rawSize);
        Assert.assertEquals("test-value", headers.findValue("test"));
        Assert.assertEquals(value.getArcFileName(), ((FileSplit) split).getPath().getName());

    }
    reader.close();

    Assert.assertEquals(itemIndex, ArcFileReaderTests.BASIC_TEST_RECORD_COUNT);

    splits.remove(splitDataIndex);

}

From source file:org.msgpack.hadoop.mapreduce.input.TestMessagePackInputFormat.java

License:Apache License

void checkFormat(Job job) throws Exception {
    TaskAttemptContext attemptContext = new TaskAttemptContext(job.getConfiguration(),
            new TaskAttemptID("123", 0, false, 1, 2));

    MessagePackInputFormat format = new MessagePackInputFormat();
    FileInputFormat.setInputPaths(job, workDir);

    List<InputSplit> splits = format.getSplits(job);
    assertEquals(1, splits.size());//from  w  ww  .j  a  v a2 s  .co  m
    for (int j = 0; j < splits.size(); j++) {
        RecordReader<LongWritable, MessagePackWritable> reader = format.createRecordReader(splits.get(j),
                attemptContext);
        reader.initialize(splits.get(j), attemptContext);

        int count = 0;
        try {
            while (reader.nextKeyValue()) {
                LongWritable key = reader.getCurrentKey();
                Value val = reader.getCurrentValue().get();
                assertEquals(count, val.asIntegerValue().getLong());
                count++;
            }
        } finally {
            reader.close();
        }
    }
}

From source file:org.tensorflow.hadoop.io.TFRecordFileTest.java

License:Open Source License

@Test
public void testInputOutputFormat() throws Exception {
    Configuration conf = new Configuration();
    Job job = Job.getInstance(conf);/*from   w w  w  .ja v a 2  s . c o m*/

    Path outdir = new Path(System.getProperty("test.build.data", "/tmp"), "tfr-test");

    TFRecordFileOutputFormat.setOutputPath(job, outdir);

    TaskAttemptContext context = MapReduceTestUtil.createDummyMapTaskAttemptContext(job.getConfiguration());
    OutputFormat<BytesWritable, NullWritable> outputFormat = new TFRecordFileOutputFormat();
    OutputCommitter committer = outputFormat.getOutputCommitter(context);
    committer.setupJob(job);
    RecordWriter<BytesWritable, NullWritable> writer = outputFormat.getRecordWriter(context);

    // Write Example with random numbers
    Random rand = new Random();
    Map<Long, Long> records = new TreeMap<Long, Long>();
    try {
        for (int i = 0; i < RECORDS; ++i) {
            long randValue = rand.nextLong();
            records.put((long) i, randValue);
            Int64List data = Int64List.newBuilder().addValue(i).addValue(randValue).build();
            Feature feature = Feature.newBuilder().setInt64List(data).build();
            Features features = Features.newBuilder().putFeature("data", feature).build();
            Example example = Example.newBuilder().setFeatures(features).build();
            BytesWritable key = new BytesWritable(example.toByteArray());
            writer.write(key, NullWritable.get());
        }
    } finally {
        writer.close(context);
    }
    committer.commitTask(context);
    committer.commitJob(job);

    // Read and compare
    TFRecordFileInputFormat.setInputPaths(job, outdir);
    InputFormat<BytesWritable, NullWritable> inputFormat = new TFRecordFileInputFormat();
    for (InputSplit split : inputFormat.getSplits(job)) {
        RecordReader<BytesWritable, NullWritable> reader = inputFormat.createRecordReader(split, context);
        MapContext<BytesWritable, NullWritable, BytesWritable, NullWritable> mcontext = new MapContextImpl<BytesWritable, NullWritable, BytesWritable, NullWritable>(
                job.getConfiguration(), context.getTaskAttemptID(), reader, null, null,
                MapReduceTestUtil.createDummyReporter(), split);
        reader.initialize(split, mcontext);
        try {
            while (reader.nextKeyValue()) {
                BytesWritable bytes = reader.getCurrentKey();
                Example example = Example.parseFrom(bytes.getBytes());
                Int64List data = example.getFeatures().getFeatureMap().get("data").getInt64List();
                Long key = data.getValue(0);
                Long value = data.getValue(1);
                assertEquals(records.get(key), value);
                records.remove(key);
            }
        } finally {
            reader.close();
        }
    }
    assertEquals(0, records.size());
}

From source file:org.warcbase.mapreduce.lib.Chain.java

License:Apache License

@SuppressWarnings("unchecked")
void runMapper(TaskInputOutputContext context, int index) throws IOException, InterruptedException {
    Mapper mapper = mappers.get(index);//from w w  w .  j a va2s .  c o  m
    RecordReader rr = new ChainRecordReader(context);
    RecordWriter rw = new ChainRecordWriter(context);
    Mapper.Context mapperContext = createMapContext(rr, rw, context, getConf(index));
    mapper.run(mapperContext);
    rr.close();
    rw.close(context);
}