Example usage for org.apache.hadoop.mapreduce RecordReader initialize

List of usage examples for org.apache.hadoop.mapreduce RecordReader initialize

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce RecordReader initialize.

Prototype

public abstract void initialize(InputSplit split, TaskAttemptContext context)
        throws IOException, InterruptedException;

Source Link

Document

Called once at initialization.

Usage

From source file:org.apache.hadoop.examples.terasort.TeraInputFormat.java

License:Apache License

/**
 * Use the input splits to take samples of the input and generate sample
 * keys. By default reads 100,000 keys from 10 locations in the input, sorts
 * them and picks N-1 keys to generate N equally sized partitions.
 * @param job the job to sample//from  ww w . j  a v a  2 s.  c o  m
 * @param partFile where to write the output file to
 * @throws Throwable if something goes wrong
 */
public static void writePartitionFile(final JobContext job, Path partFile) throws Throwable {
    long t1 = System.currentTimeMillis();
    Configuration conf = job.getConfiguration();
    final TeraInputFormat inFormat = new TeraInputFormat();
    final TextSampler sampler = new TextSampler();
    int partitions = job.getNumReduceTasks();
    long sampleSize = conf.getLong(TeraSortConfigKeys.SAMPLE_SIZE.key(),
            TeraSortConfigKeys.DEFAULT_SAMPLE_SIZE);
    final List<InputSplit> splits = inFormat.getSplits(job);
    long t2 = System.currentTimeMillis();
    System.out.println("Computing input splits took " + (t2 - t1) + "ms");
    int samples = Math.min(
            conf.getInt(TeraSortConfigKeys.NUM_PARTITIONS.key(), TeraSortConfigKeys.DEFAULT_NUM_PARTITIONS),
            splits.size());
    System.out.println("Sampling " + samples + " splits of " + splits.size());
    final long recordsPerSample = sampleSize / samples;
    final int sampleStep = splits.size() / samples;
    Thread[] samplerReader = new Thread[samples];
    SamplerThreadGroup threadGroup = new SamplerThreadGroup("Sampler Reader Thread Group");
    // take N samples from different parts of the input
    for (int i = 0; i < samples; ++i) {
        final int idx = i;
        samplerReader[i] = new Thread(threadGroup, "Sampler Reader " + idx) {
            {
                setDaemon(true);
            }

            public void run() {
                long records = 0;
                try {
                    TaskAttemptContext context = new TaskAttemptContextImpl(job.getConfiguration(),
                            new TaskAttemptID());
                    RecordReader<Text, Text> reader = inFormat.createRecordReader(splits.get(sampleStep * idx),
                            context);
                    reader.initialize(splits.get(sampleStep * idx), context);
                    while (reader.nextKeyValue()) {
                        sampler.addKey(new Text(reader.getCurrentKey()));
                        records += 1;
                        if (recordsPerSample <= records) {
                            break;
                        }
                    }
                } catch (IOException ie) {
                    System.err.println(
                            "Got an exception while reading splits " + StringUtils.stringifyException(ie));
                    throw new RuntimeException(ie);
                } catch (InterruptedException e) {

                }
            }
        };
        samplerReader[i].start();
    }
    FileSystem outFs = partFile.getFileSystem(conf);
    DataOutputStream writer = outFs.create(partFile, true, 64 * 1024, (short) 10,
            outFs.getDefaultBlockSize(partFile));
    for (int i = 0; i < samples; i++) {
        try {
            samplerReader[i].join();
            if (threadGroup.getThrowable() != null) {
                throw threadGroup.getThrowable();
            }
        } catch (InterruptedException e) {
        }
    }
    for (Text split : sampler.createPartitions(partitions)) {
        split.write(writer);
    }
    writer.close();
    long t3 = System.currentTimeMillis();
    System.out.println("Computing parititions took " + (t3 - t2) + "ms");
}

From source file:org.apache.hcatalog.data.transfer.impl.HCatInputFormatReader.java

License:Apache License

@Override
public Iterator<HCatRecord> read() throws HCatException {

    HCatInputFormat inpFmt = new HCatInputFormat();
    RecordReader<WritableComparable, HCatRecord> rr;
    try {// w w  w  .j  ava 2  s.  c o  m
        TaskAttemptContext cntxt = HCatHadoopShims.Instance.get().createTaskAttemptContext(conf,
                new TaskAttemptID());
        rr = inpFmt.createRecordReader(split, cntxt);
        rr.initialize(split, cntxt);
    } catch (IOException e) {
        throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
    } catch (InterruptedException e) {
        throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
    }
    return new HCatRecordItr(rr);
}

From source file:org.apache.hcatalog.pig.TestE2EScenarios.java

License:Apache License

private void copyTable(String in, String out) throws IOException, InterruptedException {
    Job ijob = new Job();
    Job ojob = new Job();
    HCatInputFormat inpy = new HCatInputFormat();
    inpy.setInput(ijob, null, in);//from www.j  a  va2s  . com
    HCatOutputFormat oupy = new HCatOutputFormat();
    oupy.setOutput(ojob, OutputJobInfo.create(null, out, new HashMap<String, String>()));

    // Test HCatContext

    System.err.println("HCatContext INSTANCE is present : " + HCatContext.INSTANCE.getConf().isPresent());
    if (HCatContext.INSTANCE.getConf().isPresent()) {
        System.err.println("HCatContext tinyint->int promotion says " + HCatContext.INSTANCE.getConf().get()
                .getBoolean(HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION,
                        HCatConstants.HCAT_DATA_TINY_SMALL_INT_PROMOTION_DEFAULT));
    }

    HCatSchema tableSchema = inpy.getTableSchema(ijob.getConfiguration());
    System.err.println("Copying from [" + in + "] to [" + out + "] with schema : " + tableSchema.toString());
    oupy.setSchema(ojob, tableSchema);
    oupy.checkOutputSpecs(ojob);
    OutputCommitter oc = oupy.getOutputCommitter(createTaskAttemptContext(ojob.getConfiguration()));
    oc.setupJob(ojob);

    for (InputSplit split : inpy.getSplits(ijob)) {

        TaskAttemptContext rtaskContext = createTaskAttemptContext(ijob.getConfiguration());
        TaskAttemptContext wtaskContext = createTaskAttemptContext(ojob.getConfiguration());

        RecordReader<WritableComparable, HCatRecord> rr = inpy.createRecordReader(split, rtaskContext);
        rr.initialize(split, rtaskContext);

        OutputCommitter taskOc = oupy.getOutputCommitter(wtaskContext);
        taskOc.setupTask(wtaskContext);
        RecordWriter<WritableComparable<?>, HCatRecord> rw = oupy.getRecordWriter(wtaskContext);

        while (rr.nextKeyValue()) {
            rw.write(rr.getCurrentKey(), rr.getCurrentValue());
        }
        rw.close(wtaskContext);
        taskOc.commitTask(wtaskContext);
        rr.close();
    }

    oc.commitJob(ojob);
}

From source file:org.apache.hcatalog.rcfile.TestRCFileMapReduceInputFormat.java

License:Apache License

private void writeThenReadByRecordReader(int intervalRecordCount, int writeCount, int splitNumber,
        long maxSplitSize, CompressionCodec codec) throws IOException, InterruptedException {
    Path testDir = new Path(System.getProperty("test.data.dir", ".") + "/mapred/testsmallfirstsplit");
    Path testFile = new Path(testDir, "test_rcfile");
    fs.delete(testFile, true);//from  ww  w  .  j a  va2s.com
    Configuration cloneConf = new Configuration(conf);
    RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length);
    cloneConf.setInt(RCFile.RECORD_INTERVAL_CONF_STR, intervalRecordCount);

    RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec);

    BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length);
    for (int i = 0; i < bytesArray.length; i++) {
        BytesRefWritable cu = null;
        cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length);
        bytes.set(i, cu);
    }
    for (int i = 0; i < writeCount; i++) {
        writer.append(bytes);
    }
    writer.close();

    RCFileMapReduceInputFormat<LongWritable, BytesRefArrayWritable> inputFormat = new RCFileMapReduceInputFormat<LongWritable, BytesRefArrayWritable>();
    Configuration jonconf = new Configuration(cloneConf);
    jonconf.set("mapred.input.dir", testDir.toString());
    JobContext context = new Job(jonconf);
    context.getConfiguration().setLong("mapred.max.split.size", maxSplitSize);
    List<InputSplit> splits = inputFormat.getSplits(context);
    assertEquals("splits length should be " + splitNumber, splits.size(), splitNumber);
    int readCount = 0;
    for (int i = 0; i < splits.size(); i++) {
        TaskAttemptContext tac = HCatHadoopShims.Instance.get().createTaskAttemptContext(jonconf,
                new TaskAttemptID());
        RecordReader<LongWritable, BytesRefArrayWritable> rr = inputFormat.createRecordReader(splits.get(i),
                tac);
        rr.initialize(splits.get(i), tac);
        while (rr.nextKeyValue()) {
            readCount++;
        }
    }
    assertEquals("readCount should be equal to writeCount", readCount, writeCount);
}

From source file:org.apache.hive.hcatalog.data.transfer.impl.HCatInputFormatReader.java

License:Apache License

@Override
public Iterator<HCatRecord> read() throws HCatException {

    HCatInputFormat inpFmt = new HCatInputFormat();
    RecordReader<WritableComparable, HCatRecord> rr;
    try {//  ww w.j  av a 2s .c o  m
        TaskAttemptContext cntxt = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(conf,
                new TaskAttemptID());
        rr = inpFmt.createRecordReader(split, cntxt);
        rr.initialize(split, cntxt);
    } catch (IOException e) {
        throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
    } catch (InterruptedException e) {
        throw new HCatException(ErrorType.ERROR_NOT_INITIALIZED, e);
    }
    return new HCatRecordItr(rr);
}

From source file:org.apache.hive.hcatalog.rcfile.TestRCFileMapReduceInputFormat.java

License:Apache License

private void writeThenReadByRecordReader(int intervalRecordCount, int writeCount, int splitNumber,
        long maxSplitSize, CompressionCodec codec) throws IOException, InterruptedException {
    Path testDir = new Path(System.getProperty("test.tmp.dir", ".") + "/mapred/testsmallfirstsplit");
    Path testFile = new Path(testDir, "test_rcfile");
    fs.delete(testFile, true);//from  w  w  w  .  j av  a2s . com
    Configuration cloneConf = new Configuration(conf);
    RCFileOutputFormat.setColumnNumber(cloneConf, bytesArray.length);
    cloneConf.setInt(HiveConf.ConfVars.HIVE_RCFILE_RECORD_INTERVAL.varname, intervalRecordCount);

    RCFile.Writer writer = new RCFile.Writer(fs, cloneConf, testFile, null, codec);

    BytesRefArrayWritable bytes = new BytesRefArrayWritable(bytesArray.length);
    for (int i = 0; i < bytesArray.length; i++) {
        BytesRefWritable cu = null;
        cu = new BytesRefWritable(bytesArray[i], 0, bytesArray[i].length);
        bytes.set(i, cu);
    }
    for (int i = 0; i < writeCount; i++) {
        writer.append(bytes);
    }
    writer.close();

    RCFileMapReduceInputFormat<LongWritable, BytesRefArrayWritable> inputFormat = new RCFileMapReduceInputFormat<LongWritable, BytesRefArrayWritable>();
    Configuration jonconf = new Configuration(cloneConf);
    jonconf.set("mapred.input.dir", testDir.toString());
    JobContext context = new Job(jonconf);
    HiveConf.setLongVar(context.getConfiguration(), HiveConf.ConfVars.MAPREDMAXSPLITSIZE, maxSplitSize);
    List<InputSplit> splits = inputFormat.getSplits(context);
    assertEquals("splits length should be " + splitNumber, splits.size(), splitNumber);
    int readCount = 0;
    for (int i = 0; i < splits.size(); i++) {
        TaskAttemptContext tac = ShimLoader.getHadoopShims().getHCatShim().createTaskAttemptContext(jonconf,
                new TaskAttemptID());
        RecordReader<LongWritable, BytesRefArrayWritable> rr = inputFormat.createRecordReader(splits.get(i),
                tac);
        rr.initialize(splits.get(i), tac);
        while (rr.nextKeyValue()) {
            readCount++;
        }
    }
    assertEquals("readCount should be equal to writeCount", readCount, writeCount);
}

From source file:org.apache.hyracks.dataflow.hadoop.mapreduce.MapperOperatorDescriptor.java

License:Apache License

@SuppressWarnings("deprecation")
@Override//from  ww w. j ava 2s. com
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
        IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions)
        throws HyracksDataException {
    final HadoopHelper helper = new HadoopHelper(config);
    final Configuration conf = helper.getConfiguration();
    final Mapper<K1, V1, K2, V2> mapper = helper.getMapper();
    final InputFormat<K1, V1> inputFormat = helper.getInputFormat();
    final IInputSplitProvider isp = factory.createInputSplitProvider(partition);
    final TaskAttemptID taId = new TaskAttemptID("foo", jobId, true, partition, 0);
    final TaskAttemptContext taskAttemptContext = helper.createTaskAttemptContext(taId);

    final int framesLimit = helper.getSortFrameLimit(ctx);
    final IBinaryComparatorFactory[] comparatorFactories = helper.getSortComparatorFactories();

    class SortingRecordWriter extends RecordWriter<K2, V2> {
        private final ArrayTupleBuilder tb;
        private final IFrame frame;
        private final FrameTupleAppender fta;
        private ExternalSortRunGenerator runGen;
        private int blockId;

        public SortingRecordWriter() throws HyracksDataException {
            tb = new ArrayTupleBuilder(2);
            frame = new VSizeFrame(ctx);
            fta = new FrameTupleAppender(frame);
        }

        public void initBlock(int blockId) throws HyracksDataException {
            runGen = new ExternalSortRunGenerator(ctx, new int[] { 0 }, null, comparatorFactories,
                    helper.getMapOutputRecordDescriptorWithoutExtraFields(), Algorithm.MERGE_SORT, framesLimit);
            this.blockId = blockId;
        }

        @Override
        public void close(TaskAttemptContext arg0) throws IOException, InterruptedException {
        }

        @Override
        public void write(K2 key, V2 value) throws IOException, InterruptedException {
            DataOutput dos = tb.getDataOutput();
            tb.reset();
            key.write(dos);
            tb.addFieldEndOffset();
            value.write(dos);
            tb.addFieldEndOffset();
            if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                runGen.nextFrame(frame.getBuffer());
                fta.reset(frame, true);
                if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                    throw new HyracksDataException("Record size (" + tb.getSize() + ") larger than frame size ("
                            + frame.getBuffer().capacity() + ")");
                }
            }
        }

        public void sortAndFlushBlock(final IFrameWriter writer) throws HyracksDataException {
            if (fta.getTupleCount() > 0) {
                runGen.nextFrame(frame.getBuffer());
                fta.reset(frame, true);
            }
            runGen.close();
            IFrameWriter delegatingWriter = new IFrameWriter() {
                private final FrameTupleAppender appender = new FrameTupleAppender(new VSizeFrame(ctx));
                private final FrameTupleAccessor fta = new FrameTupleAccessor(
                        helper.getMapOutputRecordDescriptorWithoutExtraFields());
                private final ArrayTupleBuilder tb = new ArrayTupleBuilder(3);

                @Override
                public void open() throws HyracksDataException {
                }

                @Override
                public void nextFrame(ByteBuffer buffer) throws HyracksDataException {
                    fta.reset(buffer);
                    int n = fta.getTupleCount();
                    for (int i = 0; i < n; ++i) {
                        tb.reset();
                        tb.addField(fta, i, 0);
                        tb.addField(fta, i, 1);
                        try {
                            tb.getDataOutput().writeInt(blockId);
                        } catch (IOException e) {
                            throw new HyracksDataException(e);
                        }
                        tb.addFieldEndOffset();
                        if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                            appender.flush(writer, true);
                            if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                                throw new IllegalStateException();
                            }
                        }
                    }
                }

                @Override
                public void close() throws HyracksDataException {
                    appender.flush(writer, true);
                }

                @Override
                public void fail() throws HyracksDataException {
                    // TODO Auto-generated method stub

                }
            };
            if (helper.hasCombiner()) {
                Reducer<K2, V2, K2, V2> combiner = helper.getCombiner();
                TaskAttemptID ctaId = new TaskAttemptID("foo", jobId, true, partition, 0);
                TaskAttemptContext ctaskAttemptContext = helper.createTaskAttemptContext(taId);
                final IFrameWriter outputWriter = delegatingWriter;
                RecordWriter<K2, V2> recordWriter = new RecordWriter<K2, V2>() {
                    private final FrameTupleAppender fta = new FrameTupleAppender(new VSizeFrame(ctx));
                    private final ArrayTupleBuilder tb = new ArrayTupleBuilder(2);

                    {
                        outputWriter.open();
                    }

                    @Override
                    public void write(K2 key, V2 value) throws IOException, InterruptedException {
                        DataOutput dos = tb.getDataOutput();
                        tb.reset();
                        key.write(dos);
                        tb.addFieldEndOffset();
                        value.write(dos);
                        tb.addFieldEndOffset();
                        if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                            fta.flush(outputWriter, true);
                            if (!fta.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                                throw new IllegalStateException();
                            }
                        }
                    }

                    @Override
                    public void close(TaskAttemptContext context) throws IOException, InterruptedException {
                        fta.flush(outputWriter, true);
                    }
                };
                delegatingWriter = new ReduceWriter<K2, V2, K2, V2>(ctx, helper,
                        new int[] { HadoopHelper.KEY_FIELD_INDEX }, helper.getGroupingComparatorFactories(),
                        helper.getMapOutputRecordDescriptorWithoutExtraFields(), combiner, recordWriter, ctaId,
                        ctaskAttemptContext);
            }
            IBinaryComparator[] comparators = new IBinaryComparator[comparatorFactories.length];
            for (int i = 0; i < comparatorFactories.length; ++i) {
                comparators[i] = comparatorFactories[i].createBinaryComparator();
            }
            ExternalSortRunMerger merger = new ExternalSortRunMerger(ctx, runGen.getSorter(), runGen.getRuns(),
                    new int[] { 0 }, comparators, null, helper.getMapOutputRecordDescriptorWithoutExtraFields(),
                    framesLimit, delegatingWriter);
            merger.process();
        }
    }

    return new AbstractUnaryOutputSourceOperatorNodePushable() {
        @SuppressWarnings("unchecked")
        @Override
        public void initialize() throws HyracksDataException {
            try {
                writer.open();
                SortingRecordWriter recordWriter = new SortingRecordWriter();
                InputSplit split = null;
                int blockId = 0;
                while ((split = isp.next()) != null) {
                    try {
                        RecordReader<K1, V1> recordReader = inputFormat.createRecordReader(split,
                                taskAttemptContext);
                        ClassLoader ctxCL = Thread.currentThread().getContextClassLoader();
                        try {
                            Thread.currentThread().setContextClassLoader(getClass().getClassLoader());
                            recordReader.initialize(split, taskAttemptContext);
                        } finally {
                            Thread.currentThread().setContextClassLoader(ctxCL);
                        }
                        recordWriter.initBlock(blockId);
                        Mapper<K1, V1, K2, V2>.Context mCtx = new MRContextUtil().createMapContext(conf, taId,
                                recordReader, recordWriter, null, null, split);
                        mapper.run(mCtx);
                        recordReader.close();
                        recordWriter.sortAndFlushBlock(writer);
                        ++blockId;
                    } catch (IOException e) {
                        throw new HyracksDataException(e);
                    } catch (InterruptedException e) {
                        throw new HyracksDataException(e);
                    }
                }
            } catch (Throwable th) {
                writer.fail();
                throw th;
            } finally {
                writer.close();
            }
        }
    };
}

From source file:org.apache.hyracks.hdfs2.dataflow.HDFSReadOperatorDescriptor.java

License:Apache License

@Override
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
        IRecordDescriptorProvider recordDescProvider, final int partition, final int nPartitions)
        throws HyracksDataException {
    final List<FileSplit> inputSplits = splitsFactory.getSplits();

    return new AbstractUnaryOutputSourceOperatorNodePushable() {
        private String nodeName = ctx.getJobletContext().getApplicationContext().getNodeId();
        private ContextFactory ctxFactory = new ContextFactory();

        @SuppressWarnings("unchecked")
        @Override//  www  .ja  v a  2  s  .  c om
        public void initialize() throws HyracksDataException {
            ClassLoader ctxCL = Thread.currentThread().getContextClassLoader();
            try {
                writer.open();
                Thread.currentThread().setContextClassLoader(ctx.getJobletContext().getClassLoader());
                Job job = confFactory.getConf();
                job.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader());
                IKeyValueParser parser = tupleParserFactory.createKeyValueParser(ctx);
                InputFormat inputFormat = ReflectionUtils.newInstance(job.getInputFormatClass(),
                        job.getConfiguration());
                int size = inputSplits.size();
                for (int i = 0; i < size; i++) {
                    /**
                     * read all the partitions scheduled to the current node
                     */
                    if (scheduledLocations[i].equals(nodeName)) {
                        /**
                         * pick an unread split to read synchronize among
                         * simultaneous partitions in the same machine
                         */
                        synchronized (executed) {
                            if (executed[i] == false) {
                                executed[i] = true;
                            } else {
                                continue;
                            }
                        }

                        /**
                         * read the split
                         */
                        TaskAttemptContext context = ctxFactory.createContext(job.getConfiguration(), i);
                        context.getConfiguration().setClassLoader(ctx.getJobletContext().getClassLoader());
                        RecordReader reader = inputFormat.createRecordReader(inputSplits.get(i), context);
                        reader.initialize(inputSplits.get(i), context);
                        while (reader.nextKeyValue() == true) {
                            parser.parse(reader.getCurrentKey(), reader.getCurrentValue(), writer,
                                    inputSplits.get(i).toString());
                        }
                    }
                }
                parser.close(writer);
            } catch (Throwable th) {
                writer.fail();
                throw new HyracksDataException(th);
            } finally {
                writer.close();
                Thread.currentThread().setContextClassLoader(ctxCL);
            }
        }
    };
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2MapTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings({ "ConstantConditions", "unchecked" })
@Override//from   w  w w  . j a v  a 2  s . c om
public void run0(HadoopV2TaskContext taskCtx) throws IgniteCheckedException {
    OutputFormat outputFormat = null;
    Exception err = null;

    JobContextImpl jobCtx = taskCtx.jobContext();

    if (taskCtx.taskInfo().hasMapperIndex())
        HadoopMapperUtils.mapperIndex(taskCtx.taskInfo().mapperIndex());
    else
        HadoopMapperUtils.clearMapperIndex();

    try {
        HadoopV2Context hadoopCtx = hadoopContext();

        InputSplit nativeSplit = hadoopCtx.getInputSplit();

        if (nativeSplit == null)
            throw new IgniteCheckedException("Input split cannot be null.");

        InputFormat inFormat = ReflectionUtils.newInstance(jobCtx.getInputFormatClass(),
                hadoopCtx.getConfiguration());

        RecordReader reader = inFormat.createRecordReader(nativeSplit, hadoopCtx);

        reader.initialize(nativeSplit, hadoopCtx);

        hadoopCtx.reader(reader);

        HadoopJobInfo jobInfo = taskCtx.job().info();

        outputFormat = jobInfo.hasCombiner() || jobInfo.hasReducer() ? null : prepareWriter(jobCtx);

        Mapper mapper = ReflectionUtils.newInstance(jobCtx.getMapperClass(), hadoopCtx.getConfiguration());

        try {
            mapper.run(new WrappedMapper().getMapContext(hadoopCtx));

            taskCtx.onMapperFinished();
        } finally {
            closeWriter();
        }

        commit(outputFormat);
    } catch (InterruptedException e) {
        err = e;

        Thread.currentThread().interrupt();

        throw new IgniteInterruptedCheckedException(e);
    } catch (Exception e) {
        err = e;

        throw new IgniteCheckedException(e);
    } finally {
        HadoopMapperUtils.clearMapperIndex();

        if (err != null)
            abort(outputFormat);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2MapTask.java

License:Apache License

/** {@inheritDoc} */
@SuppressWarnings({ "ConstantConditions", "unchecked" })
@Override//  w w w  .j  a va2s . c  o m
public void run0(GridHadoopV2TaskContext taskCtx) throws IgniteCheckedException {
    GridHadoopInputSplit split = info().inputSplit();

    InputSplit nativeSplit;

    if (split instanceof GridHadoopFileBlock) {
        GridHadoopFileBlock block = (GridHadoopFileBlock) split;

        nativeSplit = new FileSplit(new Path(block.file().toString()), block.start(), block.length(), null);
    } else
        nativeSplit = (InputSplit) taskCtx.getNativeSplit(split);

    assert nativeSplit != null;

    OutputFormat outputFormat = null;
    Exception err = null;

    JobContextImpl jobCtx = taskCtx.jobContext();

    try {
        InputFormat inFormat = ReflectionUtils.newInstance(jobCtx.getInputFormatClass(),
                hadoopContext().getConfiguration());

        RecordReader reader = inFormat.createRecordReader(nativeSplit, hadoopContext());

        reader.initialize(nativeSplit, hadoopContext());

        hadoopContext().reader(reader);

        GridHadoopJobInfo jobInfo = taskCtx.job().info();

        outputFormat = jobInfo.hasCombiner() || jobInfo.hasReducer() ? null : prepareWriter(jobCtx);

        Mapper mapper = ReflectionUtils.newInstance(jobCtx.getMapperClass(),
                hadoopContext().getConfiguration());

        try {
            mapper.run(new WrappedMapper().getMapContext(hadoopContext()));
        } finally {
            closeWriter();
        }

        commit(outputFormat);
    } catch (InterruptedException e) {
        err = e;

        Thread.currentThread().interrupt();

        throw new IgniteInterruptedCheckedException(e);
    } catch (Exception e) {
        err = e;

        throw new IgniteCheckedException(e);
    } finally {
        if (err != null)
            abort(outputFormat);
    }
}