Example usage for org.apache.hadoop.mapreduce RecordReader close

List of usage examples for org.apache.hadoop.mapreduce RecordReader close

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordReader close.

Prototype

public abstract void close() throws IOException;

Source Link

Document

Close the record reader.

Usage

From source file:co.cask.cdap.template.etl.common.ETLDBInputFormat.java

License:Apache License

@Override
protected RecordReader createDBRecordReader(DBInputSplit split, Configuration conf) throws IOException {
    final RecordReader dbRecordReader = super.createDBRecordReader(split, conf);
    return new RecordReader() {
        @Override/*from  www . j  av  a 2s.c o  m*/
        public void initialize(InputSplit split, TaskAttemptContext context)
                throws IOException, InterruptedException {
            dbRecordReader.initialize(split, context);
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            return dbRecordReader.nextKeyValue();
        }

        @Override
        public Object getCurrentKey() throws IOException, InterruptedException {
            return dbRecordReader.getCurrentKey();
        }

        @Override
        public Object getCurrentValue() throws IOException, InterruptedException {
            return dbRecordReader.getCurrentValue();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return dbRecordReader.getProgress();
        }

        @Override
        public void close() throws IOException {
            dbRecordReader.close();
            try {
                DriverManager.deregisterDriver(driverShim);
            } catch (SQLException e) {
                throw new IOException(e);
            }
        }
    };
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.input.HCatTupleInputFormat.java

License:Apache License

@Override
public RecordReader<ITuple, NullWritable> createRecordReader(InputSplit split, TaskAttemptContext taskContext)
        throws IOException, InterruptedException {

    HCatInputFormat iF = new HCatInputFormat();

    @SuppressWarnings("rawtypes")
    final RecordReader<WritableComparable, HCatRecord> hCatRecordReader = iF.createRecordReader(split,
            taskContext);/*w  w w . j av a2  s.c  om*/

    return new RecordReader<ITuple, NullWritable>() {

        ITuple tuple = new Tuple(pangoolSchema);

        @Override
        public void close() throws IOException {
            hCatRecordReader.close();
        }

        @Override
        public ITuple getCurrentKey() throws IOException, InterruptedException {
            HCatRecord record = hCatRecordReader.getCurrentValue();
            // Perform conversion between HCatRecord and Tuple
            for (int pos = 0; pos < schema.size(); pos++) {
                tuple.set(pos, record.get(pos));
            }
            return tuple;
        }

        @Override
        public NullWritable getCurrentValue() throws IOException, InterruptedException {
            return NullWritable.get();
        }

        @Override
        public float getProgress() throws IOException, InterruptedException {
            return hCatRecordReader.getProgress();
        }

        @Override
        public void initialize(InputSplit iS, TaskAttemptContext context)
                throws IOException, InterruptedException {
            hCatRecordReader.initialize(iS, context);
        }

        @Override
        public boolean nextKeyValue() throws IOException, InterruptedException {
            return hCatRecordReader.nextKeyValue();
        }
    };
}

From source file:com.datasalt.pangool.tuplemr.mapred.lib.output.TestTupleInputOutputFormat.java

License:Apache License

public void testSplits(long maxSplitSize, int generatedRows) throws IOException, InterruptedException,
        IllegalArgumentException, SecurityException, ClassNotFoundException, InstantiationException,
        IllegalAccessException, InvocationTargetException, NoSuchMethodException {
    logger.info("Testing maxSplitSize: " + maxSplitSize + " and generatedRows:" + generatedRows);
    FileSystem fS = FileSystem.get(getConf());
    Random r = new Random(1);
    Schema schema = new Schema("schema", Fields.parse("i:int,s:string"));
    ITuple tuple = new Tuple(schema);

    Path outPath = new Path(OUT);
    TupleFile.Writer writer = new TupleFile.Writer(FileSystem.get(getConf()), getConf(), outPath, schema);
    for (int i = 0; i < generatedRows; i++) {
        tuple.set("i", r.nextInt());
        tuple.set("s", r.nextLong() + "");
        writer.append(tuple);//from  w  w  w .  j a  v  a2s. com
    }
    writer.close();

    TupleInputFormat format = ReflectionUtils.newInstance(TupleInputFormat.class, getConf());
    Job job = new Job(getConf());
    FileInputFormat.setInputPaths(job, outPath);
    logger.info("Using max input split size: " + maxSplitSize);
    FileInputFormat.setMaxInputSplitSize(job, maxSplitSize);
    job.setInputFormatClass(FileInputFormat.class);

    // Read all the splits and count. The number of read rows must
    // be the same than the written ones.
    int count = 0;
    for (InputSplit split : format.getSplits(job)) {
        TaskAttemptID attemptId = new TaskAttemptID(new TaskID(), 1);
        TaskAttemptContext attemptContext = TaskAttemptContextFactory.get(getConf(), attemptId);
        logger.info("Sampling split: " + split);
        RecordReader<ITuple, NullWritable> reader = format.createRecordReader(split, attemptContext);
        reader.initialize(split, attemptContext);
        while (reader.nextKeyValue()) {
            tuple = reader.getCurrentKey();
            count++;
        }
        reader.close();
    }

    assertEquals(generatedRows, count);

    HadoopUtils.deleteIfExists(fS, outPath);
}

From source file:com.hadoop.mapreduce.TestLzoTextInputFormat.java

License:Open Source License

/**
 * Generate random data, compress it, index and md5 hash the data.
 * Then read it all back and md5 that too, to verify that it all went ok.
 * /*  www. j  a  v a  2s. c  om*/
 * @param testWithIndex Should we index or not?
 * @param charsToOutput How many characters of random data should we output.
 * @throws IOException
 * @throws NoSuchAlgorithmException
 * @throws InterruptedException
 */
private void runTest(boolean testWithIndex, int charsToOutput)
        throws IOException, NoSuchAlgorithmException, InterruptedException {

    if (!GPLNativeCodeLoader.isNativeCodeLoaded()) {
        LOG.warn("Cannot run this test without the native lzo libraries");
        return;
    }

    Configuration conf = new Configuration();
    conf.setLong("fs.local.block.size", charsToOutput / 2);
    // reducing block size to force a split of the tiny file
    conf.set("io.compression.codecs", LzopCodec.class.getName());

    FileSystem localFs = FileSystem.getLocal(conf);
    localFs.delete(outputDir, true);
    localFs.mkdirs(outputDir);

    Job job = new Job(conf);
    TextOutputFormat.setCompressOutput(job, true);
    TextOutputFormat.setOutputCompressorClass(job, LzopCodec.class);
    TextOutputFormat.setOutputPath(job, outputDir);

    TaskAttemptContext attemptContext = new TaskAttemptContextImpl(job.getConfiguration(),
            new TaskAttemptID("123", 0, TaskType.REDUCE, 1, 2));

    // create some input data
    byte[] expectedMd5 = createTestInput(outputDir, localFs, attemptContext, charsToOutput);

    if (testWithIndex) {
        Path lzoFile = new Path(outputDir, lzoFileName);
        LzoTextInputFormat.createIndex(localFs, lzoFile);
    }

    LzoTextInputFormat inputFormat = new LzoTextInputFormat();
    TextInputFormat.setInputPaths(job, outputDir);

    List<InputSplit> is = inputFormat.getSplits(job);
    //verify we have the right number of lzo chunks
    if (testWithIndex && OUTPUT_BIG == charsToOutput) {
        assertEquals(3, is.size());
    } else {
        assertEquals(1, is.size());
    }

    // let's read it all and calculate the md5 hash
    for (InputSplit inputSplit : is) {
        RecordReader<LongWritable, Text> rr = inputFormat.createRecordReader(inputSplit, attemptContext);
        rr.initialize(inputSplit, attemptContext);

        while (rr.nextKeyValue()) {
            Text value = rr.getCurrentValue();

            md5.update(value.getBytes(), 0, value.getLength());
        }

        rr.close();
    }

    localFs.close();
    assertTrue(Arrays.equals(expectedMd5, md5.digest()));
}

From source file:com.inmobi.messaging.consumer.databus.mapreduce.TestDatabusInputFormatMapReduce.java

License:Apache License

/**
 * read the the given split.//from   w w w.  j  a va 2s .  co m
 * @return List : List of read messages
 */
private List<Message> readSplit(DatabusInputFormat format, org.apache.hadoop.mapreduce.InputSplit split,
        JobConf job) throws IOException, InterruptedException {
    List<Message> result = new ArrayList<Message>();
    RecordReader<LongWritable, Message> reader = format
            .createRecordReader((org.apache.hadoop.mapreduce.InputSplit) split, context);
    ((DatabusRecordReader) reader).initialize(split, context);
    while (reader.nextKeyValue()) {
        result.add(reader.getCurrentValue());
    }
    reader.close();
    return result;
}

From source file:com.metamx.milano.hadoop.MilanoProtoFileInputFormatTests.java

License:Apache License

@Test
public void testReadFile() throws Exception {
    MilanoProtoFileInputFormat inputFormat = new MilanoProtoFileInputFormat();

    FileSplit split = new FileSplit(readFile, 0, protoTestObjects.getFs().getFileStatus(readFile).getLen(),
            null);/*from w w  w  . jav  a  2s  .  co  m*/
    org.apache.hadoop.mapreduce.RecordReader<String, Message> recordReader = inputFormat
            .createRecordReader(split, protoTestObjects.getContext());
    recordReader.initialize(split, protoTestObjects.getContext());

    for (int i = 0; i < protoTestObjects.getTestItems().size(); i++) {
        Assert.assertTrue("Fewer objects than expected.", recordReader.nextKeyValue());
        Message message = recordReader.getCurrentValue();

        protoTestObjects.compareMessages(protoTestObjects.getTestItem(i), message);
    }

    recordReader.close();
}

From source file:com.metamx.milano.hadoop.MilanoProtoFileInputFormatTests.java

License:Apache License

@Test
public void testReadFileNoMetadata() throws Exception {
    MilanoProtoFileInputFormat inputFormat = new MilanoProtoFileInputFormat();
    inputFormat.setBuilder(Testing.TestItem.newBuilder());

    FileSplit split = new FileSplit(readFile, 0, protoTestObjects.getFs().getFileStatus(readFile).getLen(),
            null);//from   www  .j a v  a 2 s. co  m
    org.apache.hadoop.mapreduce.RecordReader<String, Message> recordReader = inputFormat
            .createRecordReader(split, protoTestObjects.getContext());
    recordReader.initialize(split, protoTestObjects.getContext());

    for (int i = 0; i < protoTestObjects.getTestItems().size(); i++) {
        Assert.assertTrue("Fewer objects than expected.", recordReader.nextKeyValue());
        Message message = recordReader.getCurrentValue();

        protoTestObjects.compareMessages(protoTestObjects.getTestItem(i), message);
    }

    recordReader.close();
}

From source file:com.scaleoutsoftware.soss.hserver.hadoop.MapperWrapperMapreduce.java

License:Apache License

/**
 * Runs mapper for the single split.//from   w  w w  .  j  a  v  a 2  s.  co m
 *
 * @param mapOutputAccumulator mapOutputAccumulator to use
 * @param split    split ot run on
 */

@Override
@SuppressWarnings("unchecked")
public void runSplit(MapOutputAccumulator<OUTKEY, OUTVALUE> mapOutputAccumulator, Object split, int splitIndex)
        throws IOException, ClassNotFoundException, InterruptedException {

    TaskAttemptID taskAttemptId = hadoopVersionSpecificCode.createTaskAttemptId(jobId, true, splitIndex);
    //Setup task ID info
    TaskAttemptContext taskContext = hadoopVersionSpecificCode.createTaskAttemptContext(configuration,
            taskAttemptId);

    InputFormat inputFormat = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), configuration);

    //Create RecordReader
    org.apache.hadoop.mapreduce.RecordReader<INKEY, INVALUE> input = inputFormat
            .createRecordReader((InputSplit) split, taskContext);

    //Make a mapper
    org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE> mapper;
    try {
        mapper = (org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>) mapperConstructor
                .newInstance();
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    org.apache.hadoop.mapreduce.RecordWriter output;
    OutputCommitter committer = null;
    if (mapOnlyJob) {
        OutputFormat outputFormat = ReflectionUtils.newInstance(jobContext.getOutputFormatClass(),
                configuration);
        output = (org.apache.hadoop.mapreduce.RecordWriter<OUTKEY, OUTVALUE>) outputFormat
                .getRecordWriter(taskContext);
        committer = outputFormat.getOutputCommitter(taskContext);
        committer.setupTask(taskContext);
    } else {
        output = new MapOutputCollector<OUTKEY, OUTVALUE>(mapOutputAccumulator);
    }

    input.initialize((InputSplit) split, taskContext);

    org.apache.hadoop.mapreduce.Mapper<INKEY, INVALUE, OUTKEY, OUTVALUE>.Context mapperContext = hadoopVersionSpecificCode
            .getMapperContext(configuration, taskAttemptId, input, output);
    mapper.run(mapperContext);

    input.close();

    output.close(mapperContext);

    if (mapOnlyJob && committer != null) {
        committer.commitTask(taskContext);
    }
}

From source file:com.splout.db.hadoop.SchemaSampler.java

License:Apache License

public static Schema sample(Configuration conf, Path input, InputFormat<ITuple, NullWritable> inputFormat)
        throws IOException, InterruptedException {
    Schema schema = null;//www  . ja  v a  2 s  . co m

    // sample schema from input path given the provided InputFormat
    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    FileInputFormat.setInputPaths(job, input);
    // get first inputSplit
    List<InputSplit> inputSplits = inputFormat.getSplits(job);
    if (inputSplits == null || inputSplits.size() == 0) {
        throw new IOException(
                "Given input format doesn't produce any input split. Can't sample first record. PATH: "
                        + input);
    }
    InputSplit inputSplit = inputSplits.get(0);
    TaskAttemptID attemptId = new TaskAttemptID(new TaskID(), 1);
    TaskAttemptContext attemptContext;
    try {
        attemptContext = TaskAttemptContextFactory.get(conf, attemptId);
    } catch (Exception e) {
        throw new IOException(e);
    }

    RecordReader<ITuple, NullWritable> rReader = inputFormat.createRecordReader(inputSplit, attemptContext);
    rReader.initialize(inputSplit, attemptContext);

    if (!rReader.nextKeyValue()) {
        throw new IOException(
                "Can't read first record of first input split of the given path [" + input + "].");
    }

    // finally get the sample schema
    schema = rReader.getCurrentKey().getSchema();
    log.info("Sampled schema from [" + input + "] : " + schema);
    rReader.close();

    return schema;
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.mapreduce.MapperOperatorDescriptor.java

License:Apache License

@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
        IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions)
        throws HyracksDataException {
    final HadoopHelper helper = new HadoopHelper(config);
    final Configuration conf = helper.getConfiguration();
    final Mapper<K1, V1, K2, V2> mapper = helper.getMapper();
    final InputFormat<K1, V1> inputFormat = helper.getInputFormat();
    final IInputSplitProvider isp = factory.createInputSplitProvider(partition);
    final TaskAttemptID taId = new TaskAttemptID("foo", jobId, true, partition, 0);
    final TaskAttemptContext taskAttemptContext = helper.createTaskAttemptContext(taId);

    final int framesLimit = helper.getSortFrameLimit(ctx);
    final IBinaryComparatorFactory[] comparatorFactories = helper.getSortComparatorFactories();

    class SortingRecordWriter extends RecordWriter<K2, V2> {
        private final ArrayTupleBuilder tb;
        private final ByteBuffer frame;
        private final FrameTupleAppender fta;
        private ExternalSortRunGenerator runGen;
        private int blockId;

        public SortingRecordWriter() throws HyracksDataException {
            tb = new ArrayTupleBuilder(2);
            frame = ctx.allocateFrame();
            fta = new FrameTupleAppender(ctx.getFrameSize());
            fta.reset(frame, true);// ww w .  j a v  a  2  s. co  m
        }

        public void initBlock(int blockId) throws HyracksDataException {
            runGen = new ExternalSortRunGenerator(ctx, new int[] { 0 }, null, comparatorFactories,
                    helper.getMapOutputRecordDescriptorWithoutExtraFields(), Algorithm.MERGE_SORT, framesLimit);
            this.blockId = blockId;
        }

        @Override
        public void close(TaskAttemptContext arg0) throws IOException, InterruptedException {
        }

        @Override
        public void write(K2 key, V2 value) throws IOException, InterruptedException {
            DataOutput dos = tb.getDataOutput();
            tb.reset();
            key.write(dos);
            tb.addFieldEndOffset();
            value.write(dos);
            tb.addFieldEndOffset();
            if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                runGen.nextFrame(frame);
                fta.reset(frame, true);
                if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                    throw new HyracksDataException("Record size (" + tb.getSize() + ") larger than frame size ("
                            + frame.capacity() + ")");
                }
            }
        }

        public void sortAndFlushBlock(final IFrameWriter writer) throws HyracksDataException {
            if (fta.getTupleCount() > 0) {
                runGen.nextFrame(frame);
                fta.reset(frame, true);
            }
            runGen.close();
            IFrameWriter delegatingWriter = new IFrameWriter() {
                private final FrameTupleAppender appender = new FrameTupleAppender(ctx.getFrameSize());
                private final ByteBuffer outFrame = ctx.allocateFrame();
                private final FrameTupleAccessor fta = new FrameTupleAccessor(ctx.getFrameSize(),
                        helper.getMapOutputRecordDescriptorWithoutExtraFields());
                private final ArrayTupleBuilder tb = new ArrayTupleBuilder(3);

                @Override
                public void open() throws HyracksDataException {
                    appender.reset(outFrame, true);
                }

                @Override
                public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
                    fta.reset(buffer);
                    int n = fta.getTupleCount();
                    for (int i = 0; i < n; ++i) {
                        tb.reset();
                        tb.addField(fta, i, 0);
                        tb.addField(fta, i, 1);
                        try {
                            tb.getDataOutput().writeInt(blockId);
                        } catch (IOException e) {
                            throw new HyracksDataException(e);
                        }
                        tb.addFieldEndOffset();
                        if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                            FrameUtils.flushFrame(outFrame, writer);
                            appender.reset(outFrame, true);
                            if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                                throw new IllegalStateException();
                            }
                        }
                    }
                }

                @Override
                public void close() throws HyracksDataException {
                    if (appender.getTupleCount() > 0) {
                        FrameUtils.flushFrame(outFrame, writer);
                    }
                }

                @Override
                public void fail() throws HyracksDataException {
                    // TODO Auto-generated method stub

                }
            };
            if (helper.hasCombiner()) {
                Reducer<K2, V2, K2, V2> combiner = helper.getCombiner();
                TaskAttemptID ctaId = new TaskAttemptID("foo", jobId, true, partition, 0);
                TaskAttemptContext ctaskAttemptContext = helper.createTaskAttemptContext(taId);
                final IFrameWriter outputWriter = delegatingWriter;
                RecordWriter<K2, V2> recordWriter = new RecordWriter<K2, V2>() {
                    private final FrameTupleAppender fta = new FrameTupleAppender(ctx.getFrameSize());
                    private final ByteBuffer buffer = ctx.allocateFrame();
                    private final ArrayTupleBuilder tb = new ArrayTupleBuilder(2);

                    {
                        fta.reset(buffer, true);
                        outputWriter.open();
                    }

                    @Override
                    public void write(K2 key, V2 value) throws IOException, InterruptedException {
                        DataOutput dos = tb.getDataOutput();
                        tb.reset();
                        key.write(dos);
                        tb.addFieldEndOffset();
                        value.write(dos);
                        tb.addFieldEndOffset();
                        if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                            FrameUtils.flushFrame(buffer, outputWriter);
                            fta.reset(buffer, true);
                            if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                                throw new IllegalStateException();
                            }
                        }
                    }

                    @Override
                    public void close(TaskAttemptContext context) throws IOException, InterruptedException {
                        if (fta.getTupleCount() > 0) {
                            FrameUtils.flushFrame(buffer, outputWriter);
                            outputWriter.close();
                        }
                    }
                };
                delegatingWriter = new ReduceWriter<K2, V2, K2, V2>(ctx, helper,
                        new int[] { HadoopHelper.KEY_FIELD_INDEX }, helper.getGroupingComparatorFactories(),
                        helper.getMapOutputRecordDescriptorWithoutExtraFields(), combiner, recordWriter, ctaId,
                        ctaskAttemptContext);
            }
            IBinaryComparator[] comparators = new IBinaryComparator[comparatorFactories.length];
            for (int i = 0; i < comparatorFactories.length; ++i) {
                comparators[i] = comparatorFactories[i].createBinaryComparator();
            }
            ExternalSortRunMerger merger = new ExternalSortRunMerger(ctx, runGen.getFrameSorter(),
                    runGen.getRuns(), new int[] { 0 }, comparators, null,
                    helper.getMapOutputRecordDescriptorWithoutExtraFields(), framesLimit, delegatingWriter);
            merger.process();
        }
    }

    return new AbstractUnaryOutputSourceOperatorNodePushable() {
        @Override
        public void initialize() throws HyracksDataException {
            writer.open();
            try {
                SortingRecordWriter recordWriter = new SortingRecordWriter();
                InputSplit split = null;
                int blockId = 0;
                while ((split = isp.next()) != null) {
                    try {
                        RecordReader<K1, V1> recordReader = inputFormat.createRecordReader(split,
                                taskAttemptContext);
                        ClassLoader ctxCL = Thread.currentThread().getContextClassLoader();
                        try {
                            Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
                            recordReader.initialize(split, taskAttemptContext);
                        } finally {
                            Thread.currentThread().setContextClassLoader(ctxCL);
                        }
                        recordWriter.initBlock(blockId);
                        Mapper<K1, V1, K2, V2>.Context mCtx = new MRContextUtil().createMapContext(conf, taId,
                                recordReader, recordWriter, null, null, split);
                        mapper.run(mCtx);
                        recordReader.close();
                        recordWriter.sortAndFlushBlock(writer);
                        ++blockId;
                    } catch (IOException e) {
                        throw new HyracksDataException(e);
                    } catch (InterruptedException e) {
                        throw new HyracksDataException(e);
                    }
                }
            } finally {
                writer.close();
            }
        }
    };
}