Example usage for org.apache.hadoop.mapreduce JobContext getInputFormatClass

List of usage examples for org.apache.hadoop.mapreduce JobContext getInputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getInputFormatClass.

Prototype

public Class<? extends InputFormat<?, ?>> getInputFormatClass() throws ClassNotFoundException;

Source Link

Document

Get the InputFormat class for the job.

Usage

From source file:com.linkedin.cubert.io.CubertInputFormat.java

License:Open Source License

private InputFormat<K, V> getActualInputFormat(JobContext context) {
    try {/*from  w  w  w  . ja v a  2 s.  c o m*/
        InputFormat<K, V> actualInputFormat = (InputFormat<K, V>) context.getInputFormatClass().newInstance();
        if (actualInputFormat instanceof CubertInputFormat)
            throw new RuntimeException("No actual input format specified");

        return actualInputFormat;
    } catch (InstantiationException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IllegalAccessException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (ClassNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    return null;
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopMapperOperatorDescriptor.java

License:Apache License

private Object getRecordReader(JobConf conf, Object inputSplit)
        throws ClassNotFoundException, IOException, InterruptedException {
    if (conf.getUseNewMapper()) {
        JobContext context = new ContextFactory().createJobContext(conf);
        org.apache.hadoop.mapreduce.InputFormat inputFormat = (org.apache.hadoop.mapreduce.InputFormat) ReflectionUtils
                .newInstance(context.getInputFormatClass(), conf);
        TaskAttemptContext taskAttemptContext = new ContextFactory().createContext(conf, new TaskAttemptID());
        return inputFormat.createRecordReader((org.apache.hadoop.mapreduce.InputSplit) inputSplit,
                taskAttemptContext);/*  w  w w .j  ava 2  s  .com*/
    } else {
        Class inputFormatClass = conf.getInputFormat().getClass();
        InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, conf);
        return inputFormat.getRecordReader((org.apache.hadoop.mapred.InputSplit) inputSplit, conf,
                super.createReporter());
    }
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopReadOperatorDescriptor.java

License:Apache License

private RecordReader getRecordReader(JobConf conf, Object inputSplit)
        throws ClassNotFoundException, IOException, InterruptedException {
    RecordReader hadoopRecordReader = null;
    if (conf.getUseNewMapper()) {
        JobContext context = new ContextFactory().createJobContext(conf);
        org.apache.hadoop.mapreduce.InputFormat inputFormat = (org.apache.hadoop.mapreduce.InputFormat) ReflectionUtils
                .newInstance(context.getInputFormatClass(), conf);
        TaskAttemptContext taskAttemptContext = new ContextFactory().createContext(jobConf, null);
        hadoopRecordReader = (RecordReader) inputFormat
                .createRecordReader((org.apache.hadoop.mapreduce.InputSplit) inputSplit, taskAttemptContext);
    } else {//  w w w .  j  a  v  a  2 s .co  m
        Class inputFormatClass = conf.getInputFormat().getClass();
        InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, conf);
        hadoopRecordReader = (RecordReader) inputFormat
                .getRecordReader((org.apache.hadoop.mapred.InputSplit) inputSplit, conf, createReporter());
    }
    return hadoopRecordReader;
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopReadOperatorDescriptor.java

License:Apache License

@SuppressWarnings("deprecation")
@Override//from  w  w  w . ja v a 2 s  .  co  m
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
        final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions)
        throws HyracksDataException {
    return new AbstractUnaryOutputSourceOperatorNodePushable() {
        @Override
        public void initialize() throws HyracksDataException {
            try {
                JobConf conf = DatatypeHelper.map2JobConf((HashMap) jobConfMap);
                Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
                conf.setClassLoader(this.getClass().getClassLoader());
                RecordReader hadoopRecordReader;
                Object key;
                Object value;
                Object[] splits = inputSplitsProxy.toInputSplits(conf);
                Object inputSplit = splits[partition];

                if (conf.getUseNewMapper()) {
                    JobContext context = new ContextFactory().createJobContext(conf);
                    org.apache.hadoop.mapreduce.InputFormat inputFormat = (org.apache.hadoop.mapreduce.InputFormat) ReflectionUtils
                            .newInstance(context.getInputFormatClass(), conf);
                    TaskAttemptContext taskAttemptContext = new ContextFactory().createContext(jobConf, null);
                    hadoopRecordReader = (RecordReader) inputFormat.createRecordReader(
                            (org.apache.hadoop.mapreduce.InputSplit) inputSplit, taskAttemptContext);
                } else {
                    Class inputFormatClass = conf.getInputFormat().getClass();
                    InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, conf);
                    hadoopRecordReader = (RecordReader) inputFormat.getRecordReader(
                            (org.apache.hadoop.mapred.InputSplit) inputSplit, conf, createReporter());
                }

                Class inputKeyClass;
                Class inputValueClass;
                if (hadoopRecordReader instanceof SequenceFileRecordReader) {
                    inputKeyClass = ((SequenceFileRecordReader) hadoopRecordReader).getKeyClass();
                    inputValueClass = ((SequenceFileRecordReader) hadoopRecordReader).getValueClass();
                } else {
                    inputKeyClass = hadoopRecordReader.createKey().getClass();
                    inputValueClass = hadoopRecordReader.createValue().getClass();
                }

                key = hadoopRecordReader.createKey();
                value = hadoopRecordReader.createValue();
                ByteBuffer outBuffer = ctx.allocateFrame();
                FrameTupleAppender appender = new FrameTupleAppender(ctx.getFrameSize());
                appender.reset(outBuffer, true);
                RecordDescriptor outputRecordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                        (Class<? extends Writable>) hadoopRecordReader.createKey().getClass(),
                        (Class<? extends Writable>) hadoopRecordReader.createValue().getClass());
                int nFields = outputRecordDescriptor.getFieldCount();
                ArrayTupleBuilder tb = new ArrayTupleBuilder(nFields);
                writer.open();
                try {
                    while (hadoopRecordReader.next(key, value)) {
                        tb.reset();
                        switch (nFields) {
                        case 2:
                            tb.addField(outputRecordDescriptor.getFields()[0], key);
                        case 1:
                            tb.addField(outputRecordDescriptor.getFields()[1], value);
                        }
                        if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                            FrameUtils.flushFrame(outBuffer, writer);
                            appender.reset(outBuffer, true);
                            if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                                throw new HyracksDataException("Record size (" + tb.getSize()
                                        + ") larger than frame size (" + outBuffer.capacity() + ")");
                            }
                        }
                    }
                    if (appender.getTupleCount() > 0) {
                        FrameUtils.flushFrame(outBuffer, writer);
                    }
                } catch (Exception e) {
                    writer.fail();
                    throw new HyracksDataException(e);
                } finally {
                    writer.close();
                }
                hadoopRecordReader.close();
            } catch (InstantiationException e) {
                throw new HyracksDataException(e);
            } catch (IllegalAccessException e) {
                throw new HyracksDataException(e);
            } catch (ClassNotFoundException e) {
                throw new HyracksDataException(e);
            } catch (InterruptedException e) {
                throw new HyracksDataException(e);
            } catch (IOException e) {
                throw new HyracksDataException(e);
            }
        }
    };
}

From source file:org.apache.hyracks.dataflow.hadoop.HadoopReadOperatorDescriptor.java

License:Apache License

@SuppressWarnings("deprecation")
@Override/*  w ww .j av  a2s.c o  m*/
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
        final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions)
        throws HyracksDataException {
    return new AbstractUnaryOutputSourceOperatorNodePushable() {
        @Override
        public void initialize() throws HyracksDataException {
            try {
                JobConf conf = DatatypeHelper.map2JobConf((HashMap) jobConfMap);
                Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
                conf.setClassLoader(this.getClass().getClassLoader());
                RecordReader hadoopRecordReader;
                Object key;
                Object value;
                Object[] splits = inputSplitsProxy.toInputSplits(conf);
                Object inputSplit = splits[partition];

                if (conf.getUseNewMapper()) {
                    JobContext context = new ContextFactory().createJobContext(conf);
                    org.apache.hadoop.mapreduce.InputFormat inputFormat = (org.apache.hadoop.mapreduce.InputFormat) ReflectionUtils
                            .newInstance(context.getInputFormatClass(), conf);
                    TaskAttemptContext taskAttemptContext = new ContextFactory().createContext(jobConf, null);
                    hadoopRecordReader = (RecordReader) inputFormat.createRecordReader(
                            (org.apache.hadoop.mapreduce.InputSplit) inputSplit, taskAttemptContext);
                } else {
                    Class inputFormatClass = conf.getInputFormat().getClass();
                    InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, conf);
                    hadoopRecordReader = (RecordReader) inputFormat.getRecordReader(
                            (org.apache.hadoop.mapred.InputSplit) inputSplit, conf, createReporter());
                }

                Class inputKeyClass;
                Class inputValueClass;
                if (hadoopRecordReader instanceof SequenceFileRecordReader) {
                    inputKeyClass = ((SequenceFileRecordReader) hadoopRecordReader).getKeyClass();
                    inputValueClass = ((SequenceFileRecordReader) hadoopRecordReader).getValueClass();
                } else {
                    inputKeyClass = hadoopRecordReader.createKey().getClass();
                    inputValueClass = hadoopRecordReader.createValue().getClass();
                }

                key = hadoopRecordReader.createKey();
                value = hadoopRecordReader.createValue();
                FrameTupleAppender appender = new FrameTupleAppender(new VSizeFrame(ctx));
                RecordDescriptor outputRecordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                        (Class<? extends Writable>) hadoopRecordReader.createKey().getClass(),
                        (Class<? extends Writable>) hadoopRecordReader.createValue().getClass());
                int nFields = outputRecordDescriptor.getFieldCount();
                ArrayTupleBuilder tb = new ArrayTupleBuilder(nFields);
                writer.open();
                try {
                    while (hadoopRecordReader.next(key, value)) {
                        tb.reset();
                        switch (nFields) {
                        case 2:
                            tb.addField(outputRecordDescriptor.getFields()[0], key);
                        case 1:
                            tb.addField(outputRecordDescriptor.getFields()[1], value);
                        }
                        FrameUtils.appendToWriter(writer, appender, tb.getFieldEndOffsets(), tb.getByteArray(),
                                0, tb.getSize());
                    }
                    appender.flush(writer, true);
                } catch (Exception e) {
                    writer.fail();
                    throw new HyracksDataException(e);
                } finally {
                    writer.close();
                }
                hadoopRecordReader.close();
            } catch (InstantiationException e) {
                throw new HyracksDataException(e);
            } catch (IllegalAccessException e) {
                throw new HyracksDataException(e);
            } catch (ClassNotFoundException e) {
                throw new HyracksDataException(e);
            } catch (InterruptedException e) {
                throw new HyracksDataException(e);
            } catch (IOException e) {
                throw new HyracksDataException(e);
            }
        }
    };
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2Splitter.java

License:Apache License

/**
 * @param ctx Job context./*  w  w w .  j a  v  a 2s. c o m*/
 * @return Collection of mapped splits.
 * @throws IgniteCheckedException If mapping failed.
 */
public static Collection<HadoopInputSplit> splitJob(JobContext ctx) throws IgniteCheckedException {
    try {
        InputFormat<?, ?> format = ReflectionUtils.newInstance(ctx.getInputFormatClass(),
                ctx.getConfiguration());

        assert format != null;

        List<InputSplit> splits = format.getSplits(ctx);

        Collection<HadoopInputSplit> res = new ArrayList<>(splits.size());

        int id = 0;

        for (InputSplit nativeSplit : splits) {
            if (nativeSplit instanceof FileSplit) {
                FileSplit s = (FileSplit) nativeSplit;

                res.add(new HadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(),
                        s.getLength()));
            } else
                res.add(HadoopUtils.wrapSplit(id, nativeSplit, nativeSplit.getLocations()));

            id++;
        }

        return res;
    } catch (IOException | ClassNotFoundException e) {
        throw new IgniteCheckedException(e);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();

        throw new IgniteInterruptedCheckedException(e);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2Splitter.java

License:Apache License

/**
 * @param ctx Job context.//from   w  w w . ja va2  s  .  c o  m
 * @return Collection of mapped splits.
 * @throws IgniteCheckedException If mapping failed.
 */
public static Collection<GridHadoopInputSplit> splitJob(JobContext ctx) throws IgniteCheckedException {
    try {
        InputFormat<?, ?> format = ReflectionUtils.newInstance(ctx.getInputFormatClass(),
                ctx.getConfiguration());

        assert format != null;

        List<InputSplit> splits = format.getSplits(ctx);

        Collection<GridHadoopInputSplit> res = new ArrayList<>(splits.size());

        int id = 0;

        for (InputSplit nativeSplit : splits) {
            if (nativeSplit instanceof FileSplit) {
                FileSplit s = (FileSplit) nativeSplit;

                res.add(new GridHadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(),
                        s.getLength()));
            } else
                res.add(GridHadoopUtils.wrapSplit(id, nativeSplit, nativeSplit.getLocations()));

            id++;
        }

        return res;
    } catch (IOException | ClassNotFoundException e) {
        throw new IgniteCheckedException(e);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();

        throw new IgniteInterruptedCheckedException(e);
    }
}

From source file:org.apache.tez.mapreduce.hadoop.MRInputHelpers.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
private static org.apache.hadoop.mapreduce.InputSplit[] generateNewSplits(JobContext jobContext,
        boolean groupSplits, int numTasks) throws ClassNotFoundException, IOException, InterruptedException {
    Configuration conf = jobContext.getConfiguration();

    // This is the real input format.
    org.apache.hadoop.mapreduce.InputFormat<?, ?> inputFormat = null;
    try {//from ww w  .  ja v a  2  s. c o m
        inputFormat = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), conf);
    } catch (ClassNotFoundException e) {
        throw new TezUncheckedException(e);
    }

    org.apache.hadoop.mapreduce.InputFormat<?, ?> finalInputFormat = inputFormat;

    // For grouping, the underlying InputFormatClass class is passed in as a parameter.
    // JobContext has this setup as TezGroupedSplitInputFormat
    if (groupSplits) {
        org.apache.hadoop.mapreduce.split.TezGroupedSplitsInputFormat groupedFormat = new org.apache.hadoop.mapreduce.split.TezGroupedSplitsInputFormat();
        groupedFormat.setConf(conf);
        groupedFormat.setInputFormat(inputFormat);
        groupedFormat.setDesiredNumberOfSplits(numTasks);
        finalInputFormat = groupedFormat;
    } else {
        finalInputFormat = inputFormat;
    }

    List<org.apache.hadoop.mapreduce.InputSplit> array = finalInputFormat.getSplits(jobContext);
    org.apache.hadoop.mapreduce.InputSplit[] splits = (org.apache.hadoop.mapreduce.InputSplit[]) array
            .toArray(new org.apache.hadoop.mapreduce.InputSplit[array.size()]);

    // sort the splits into order based on size, so that the biggest
    // go first
    Arrays.sort(splits, new InputSplitComparator());
    return splits;
}

From source file:org.goldenorb.io.input.VertexInput.java

License:Apache License

/**
* 
*///  w ww.  j  a  va  2  s .c  o  m
@SuppressWarnings("unchecked")
public void initialize() {
    // rebuild the input split
    org.apache.hadoop.mapreduce.InputSplit split = null;
    DataInputBuffer splitBuffer = new DataInputBuffer();
    splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength());
    SerializationFactory factory = new SerializationFactory(orbConf);
    Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer;
    try {
        deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory
                .getDeserializer(orbConf.getClassByName(splitClass));
        deserializer.open(splitBuffer);
        split = deserializer.deserialize(null);
        JobConf job = new JobConf(orbConf);
        JobContext jobContext = new JobContext(job, new JobID(getOrbConf().getJobNumber(), 0));
        InputFormat<INPUT_KEY, INPUT_VALUE> inputFormat;
        inputFormat = (InputFormat<INPUT_KEY, INPUT_VALUE>) ReflectionUtils
                .newInstance(jobContext.getInputFormatClass(), orbConf);
        TaskAttemptContext tao = new TaskAttemptContext(job,
                new TaskAttemptID(new TaskID(jobContext.getJobID(), true, partitionID), 0));
        recordReader = inputFormat.createRecordReader(split, tao);
        recordReader.initialize(split, tao);
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    } catch (IOException e) {
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }

}

From source file:org.goldenorb.io.InputSplitAllocator.java

License:Apache License

/**
 * This method gets the raw splits and calls another method to assign them.
 * /*ww  w .ja  v a2 s .  c  om*/
 * @returns Map
 */
@SuppressWarnings({ "deprecation", "rawtypes", "unchecked" })
public Map<OrbPartitionMember, List<RawSplit>> assignInputSplits() {
    List<RawSplit> rawSplits = null;
    JobConf job = new JobConf(orbConf);
    LOG.debug(orbConf.getJobNumber().toString());
    JobContext jobContext = new JobContext(job, new JobID(orbConf.getJobNumber(), 0));
    org.apache.hadoop.mapreduce.InputFormat<?, ?> input;
    try {
        input = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), orbConf);

        List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(jobContext);
        rawSplits = new ArrayList<RawSplit>(splits.size());
        DataOutputBuffer buffer = new DataOutputBuffer();
        SerializationFactory factory = new SerializationFactory(orbConf);
        Serializer serializer = factory.getSerializer(splits.get(0).getClass());
        serializer.open(buffer);
        for (int i = 0; i < splits.size(); i++) {
            buffer.reset();
            serializer.serialize(splits.get(i));
            RawSplit rawSplit = new RawSplit();
            rawSplit.setClassName(splits.get(i).getClass().getName());
            rawSplit.setDataLength(splits.get(i).getLength());
            rawSplit.setBytes(buffer.getData(), 0, buffer.getLength());
            rawSplit.setLocations(splits.get(i).getLocations());
            rawSplits.add(rawSplit);
        }
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    } catch (IOException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    }
    return assignInputSplits(rawSplits);
}