Example usage for org.apache.hadoop.mapreduce JobContext getInputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce JobContext getInputFormatClass.

Prototype

public Class<? extends InputFormat<?, ?>> getInputFormatClass() throws ClassNotFoundException;

Source Link

Document

Get the InputFormat class for the job.

Usage

From source file:com.linkedin.cubert.io.CubertInputFormat.java

License:Open Source License

private InputFormat<K, V> getActualInputFormat(JobContext context) {
    try {/*from  w  w  w  . ja v a  2 s.  c o m*/
        InputFormat<K, V> actualInputFormat = (InputFormat<K, V>) context.getInputFormatClass().newInstance();
        if (actualInputFormat instanceof CubertInputFormat)
            throw new RuntimeException("No actual input format specified");

        return actualInputFormat;
    } catch (InstantiationException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (IllegalAccessException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } catch (ClassNotFoundException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }
    return null;
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopMapperOperatorDescriptor.java

License:Apache License

private Object getRecordReader(JobConf conf, Object inputSplit)
        throws ClassNotFoundException, IOException, InterruptedException {
    if (conf.getUseNewMapper()) {
        JobContext context = new ContextFactory().createJobContext(conf);
        org.apache.hadoop.mapreduce.InputFormat inputFormat = (org.apache.hadoop.mapreduce.InputFormat) ReflectionUtils
                .newInstance(context.getInputFormatClass(), conf);
        TaskAttemptContext taskAttemptContext = new ContextFactory().createContext(conf, new TaskAttemptID());
        return inputFormat.createRecordReader((org.apache.hadoop.mapreduce.InputSplit) inputSplit,
                taskAttemptContext);/*  w  w w .j  ava 2  s  .com*/
    } else {
        Class inputFormatClass = conf.getInputFormat().getClass();
        InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, conf);
        return inputFormat.getRecordReader((org.apache.hadoop.mapred.InputSplit) inputSplit, conf,
                super.createReporter());
    }
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopReadOperatorDescriptor.java

License:Apache License

private RecordReader getRecordReader(JobConf conf, Object inputSplit)
        throws ClassNotFoundException, IOException, InterruptedException {
    RecordReader hadoopRecordReader = null;
    if (conf.getUseNewMapper()) {
        JobContext context = new ContextFactory().createJobContext(conf);
        org.apache.hadoop.mapreduce.InputFormat inputFormat = (org.apache.hadoop.mapreduce.InputFormat) ReflectionUtils
                .newInstance(context.getInputFormatClass(), conf);
        TaskAttemptContext taskAttemptContext = new ContextFactory().createContext(jobConf, null);
        hadoopRecordReader = (RecordReader) inputFormat
                .createRecordReader((org.apache.hadoop.mapreduce.InputSplit) inputSplit, taskAttemptContext);
    } else {//  w w w .  j  a  v  a  2 s .co  m
        Class inputFormatClass = conf.getInputFormat().getClass();
        InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, conf);
        hadoopRecordReader = (RecordReader) inputFormat
                .getRecordReader((org.apache.hadoop.mapred.InputSplit) inputSplit, conf, createReporter());
    }
    return hadoopRecordReader;
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopReadOperatorDescriptor.java

License:Apache License

@SuppressWarnings("deprecation")
@Override//from  w  w  w . ja v a 2 s  .  co  m
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
        final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions)
        throws HyracksDataException {
    return new AbstractUnaryOutputSourceOperatorNodePushable() {
        @Override
        public void initialize() throws HyracksDataException {
            try {
                JobConf conf = DatatypeHelper.map2JobConf((HashMap) jobConfMap);
                Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
                conf.setClassLoader(this.getClass().getClassLoader());
                RecordReader hadoopRecordReader;
                Object key;
                Object value;
                Object[] splits = inputSplitsProxy.toInputSplits(conf);
                Object inputSplit = splits[partition];

                if (conf.getUseNewMapper()) {
                    JobContext context = new ContextFactory().createJobContext(conf);
                    org.apache.hadoop.mapreduce.InputFormat inputFormat = (org.apache.hadoop.mapreduce.InputFormat) ReflectionUtils
                            .newInstance(context.getInputFormatClass(), conf);
                    TaskAttemptContext taskAttemptContext = new ContextFactory().createContext(jobConf, null);
                    hadoopRecordReader = (RecordReader) inputFormat.createRecordReader(
                            (org.apache.hadoop.mapreduce.InputSplit) inputSplit, taskAttemptContext);
                } else {
                    Class inputFormatClass = conf.getInputFormat().getClass();
                    InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, conf);
                    hadoopRecordReader = (RecordReader) inputFormat.getRecordReader(
                            (org.apache.hadoop.mapred.InputSplit) inputSplit, conf, createReporter());
                }

                Class inputKeyClass;
                Class inputValueClass;
                if (hadoopRecordReader instanceof SequenceFileRecordReader) {
                    inputKeyClass = ((SequenceFileRecordReader) hadoopRecordReader).getKeyClass();
                    inputValueClass = ((SequenceFileRecordReader) hadoopRecordReader).getValueClass();
                } else {
                    inputKeyClass = hadoopRecordReader.createKey().getClass();
                    inputValueClass = hadoopRecordReader.createValue().getClass();
                }

                key = hadoopRecordReader.createKey();
                value = hadoopRecordReader.createValue();
                ByteBuffer outBuffer = ctx.allocateFrame();
                FrameTupleAppender appender = new FrameTupleAppender(ctx.getFrameSize());
                appender.reset(outBuffer, true);
                RecordDescriptor outputRecordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                        (Class<? extends Writable>) hadoopRecordReader.createKey().getClass(),
                        (Class<? extends Writable>) hadoopRecordReader.createValue().getClass());
                int nFields = outputRecordDescriptor.getFieldCount();
                ArrayTupleBuilder tb = new ArrayTupleBuilder(nFields);
                writer.open();
                try {
                    while (hadoopRecordReader.next(key, value)) {
                        tb.reset();
                        switch (nFields) {
                        case 2:
                            tb.addField(outputRecordDescriptor.getFields()[0], key);
                        case 1:
                            tb.addField(outputRecordDescriptor.getFields()[1], value);
                        }
                        if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                            FrameUtils.flushFrame(outBuffer, writer);
                            appender.reset(outBuffer, true);
                            if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                                throw new HyracksDataException("Record size (" + tb.getSize()
                                        + ") larger than frame size (" + outBuffer.capacity() + ")");
                            }
                        }
                    }
                    if (appender.getTupleCount() > 0) {
                        FrameUtils.flushFrame(outBuffer, writer);
                    }
                } catch (Exception e) {
                    writer.fail();
                    throw new HyracksDataException(e);
                } finally {
                    writer.close();
                }
                hadoopRecordReader.close();
            } catch (InstantiationException e) {
                throw new HyracksDataException(e);
            } catch (IllegalAccessException e) {
                throw new HyracksDataException(e);
            } catch (ClassNotFoundException e) {
                throw new HyracksDataException(e);
            } catch (InterruptedException e) {
                throw new HyracksDataException(e);
            } catch (IOException e) {
                throw new HyracksDataException(e);
            }
        }
    };
}

From source file:org.apache.hyracks.dataflow.hadoop.HadoopReadOperatorDescriptor.java

License:Apache License

@SuppressWarnings("deprecation")
@Override/*  w ww .j av  a2s.c o  m*/
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
        final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions)
        throws HyracksDataException {
    return new AbstractUnaryOutputSourceOperatorNodePushable() {
        @Override
        public void initialize() throws HyracksDataException {
            try {
                JobConf conf = DatatypeHelper.map2JobConf((HashMap) jobConfMap);
                Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
                conf.setClassLoader(this.getClass().getClassLoader());
                RecordReader hadoopRecordReader;
                Object key;
                Object value;
                Object[] splits = inputSplitsProxy.toInputSplits(conf);
                Object inputSplit = splits[partition];

                if (conf.getUseNewMapper()) {
                    JobContext context = new ContextFactory().createJobContext(conf);
                    org.apache.hadoop.mapreduce.InputFormat inputFormat = (org.apache.hadoop.mapreduce.InputFormat) ReflectionUtils
                            .newInstance(context.getInputFormatClass(), conf);
                    TaskAttemptContext taskAttemptContext = new ContextFactory().createContext(jobConf, null);
                    hadoopRecordReader = (RecordReader) inputFormat.createRecordReader(
                            (org.apache.hadoop.mapreduce.InputSplit) inputSplit, taskAttemptContext);
                } else {
                    Class inputFormatClass = conf.getInputFormat().getClass();
                    InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, conf);
                    hadoopRecordReader = (RecordReader) inputFormat.getRecordReader(
                            (org.apache.hadoop.mapred.InputSplit) inputSplit, conf, createReporter());
                }

                Class inputKeyClass;
                Class inputValueClass;
                if (hadoopRecordReader instanceof SequenceFileRecordReader) {
                    inputKeyClass = ((SequenceFileRecordReader) hadoopRecordReader).getKeyClass();
                    inputValueClass = ((SequenceFileRecordReader) hadoopRecordReader).getValueClass();
                } else {
                    inputKeyClass = hadoopRecordReader.createKey().getClass();
                    inputValueClass = hadoopRecordReader.createValue().getClass();
                }

                key = hadoopRecordReader.createKey();
                value = hadoopRecordReader.createValue();
                FrameTupleAppender appender = new FrameTupleAppender(new VSizeFrame(ctx));
                RecordDescriptor outputRecordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                        (Class<? extends Writable>) hadoopRecordReader.createKey().getClass(),
                        (Class<? extends Writable>) hadoopRecordReader.createValue().getClass());
                int nFields = outputRecordDescriptor.getFieldCount();
                ArrayTupleBuilder tb = new ArrayTupleBuilder(nFields);
                writer.open();
                try {
                    while (hadoopRecordReader.next(key, value)) {
                        tb.reset();
                        switch (nFields) {
                        case 2:
                            tb.addField(outputRecordDescriptor.getFields()[0], key);
                        case 1:
                            tb.addField(outputRecordDescriptor.getFields()[1], value);
                        }
                        FrameUtils.appendToWriter(writer, appender, tb.getFieldEndOffsets(), tb.getByteArray(),
                                0, tb.getSize());
                    }
                    appender.flush(writer, true);
                } catch (Exception e) {
                    writer.fail();
                    throw new HyracksDataException(e);
                } finally {
                    writer.close();
                }
                hadoopRecordReader.close();
            } catch (InstantiationException e) {
                throw new HyracksDataException(e);
            } catch (IllegalAccessException e) {
                throw new HyracksDataException(e);
            } catch (ClassNotFoundException e) {
                throw new HyracksDataException(e);
            } catch (InterruptedException e) {
                throw new HyracksDataException(e);
            } catch (IOException e) {
                throw new HyracksDataException(e);
            }
        }
    };
}

From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2Splitter.java

License:Apache License

/**
 * @param ctx Job context./*  w  w w .  j a  v  a 2s. c o m*/
 * @return Collection of mapped splits.
 * @throws IgniteCheckedException If mapping failed.
 */
public static Collection<HadoopInputSplit> splitJob(JobContext ctx) throws IgniteCheckedException {
    try {
        InputFormat<?, ?> format = ReflectionUtils.newInstance(ctx.getInputFormatClass(),
                ctx.getConfiguration());

        assert format != null;

        List<InputSplit> splits = format.getSplits(ctx);

        Collection<HadoopInputSplit> res = new ArrayList<>(splits.size());

        int id = 0;

        for (InputSplit nativeSplit : splits) {
            if (nativeSplit instanceof FileSplit) {
                FileSplit s = (FileSplit) nativeSplit;

                res.add(new HadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(),
                        s.getLength()));
            } else
                res.add(HadoopUtils.wrapSplit(id, nativeSplit, nativeSplit.getLocations()));

            id++;
        }

        return res;
    } catch (IOException | ClassNotFoundException e) {
        throw new IgniteCheckedException(e);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();

        throw new IgniteInterruptedCheckedException(e);
    }
}

From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2Splitter.java

License:Apache License

/**
 * @param ctx Job context.//from   w  w w . ja va2  s  .  c o  m
 * @return Collection of mapped splits.
 * @throws IgniteCheckedException If mapping failed.
 */
public static Collection<GridHadoopInputSplit> splitJob(JobContext ctx) throws IgniteCheckedException {
    try {
        InputFormat<?, ?> format = ReflectionUtils.newInstance(ctx.getInputFormatClass(),
                ctx.getConfiguration());

        assert format != null;

        List<InputSplit> splits = format.getSplits(ctx);

        Collection<GridHadoopInputSplit> res = new ArrayList<>(splits.size());

        int id = 0;

        for (InputSplit nativeSplit : splits) {
            if (nativeSplit instanceof FileSplit) {
                FileSplit s = (FileSplit) nativeSplit;

                res.add(new GridHadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(),
                        s.getLength()));
            } else
                res.add(GridHadoopUtils.wrapSplit(id, nativeSplit, nativeSplit.getLocations()));

            id++;
        }

        return res;
    } catch (IOException | ClassNotFoundException e) {
        throw new IgniteCheckedException(e);
    } catch (InterruptedException e) {
        Thread.currentThread().interrupt();

        throw new IgniteInterruptedCheckedException(e);
    }
}

From source file:org.apache.tez.mapreduce.hadoop.MRInputHelpers.java

License:Apache License

@SuppressWarnings({ "rawtypes", "unchecked" })
private static org.apache.hadoop.mapreduce.InputSplit[] generateNewSplits(JobContext jobContext,
        boolean groupSplits, int numTasks) throws ClassNotFoundException, IOException, InterruptedException {
    Configuration conf = jobContext.getConfiguration();

    // This is the real input format.
    org.apache.hadoop.mapreduce.InputFormat<?, ?> inputFormat = null;
    try {//from ww w  .  ja v a  2  s. c o m
        inputFormat = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), conf);
    } catch (ClassNotFoundException e) {
        throw new TezUncheckedException(e);
    }

    org.apache.hadoop.mapreduce.InputFormat<?, ?> finalInputFormat = inputFormat;

    // For grouping, the underlying InputFormatClass class is passed in as a parameter.
    // JobContext has this setup as TezGroupedSplitInputFormat
    if (groupSplits) {
        org.apache.hadoop.mapreduce.split.TezGroupedSplitsInputFormat groupedFormat = new org.apache.hadoop.mapreduce.split.TezGroupedSplitsInputFormat();
        groupedFormat.setConf(conf);
        groupedFormat.setInputFormat(inputFormat);
        groupedFormat.setDesiredNumberOfSplits(numTasks);
        finalInputFormat = groupedFormat;
    } else {
        finalInputFormat = inputFormat;
    }

    List<org.apache.hadoop.mapreduce.InputSplit> array = finalInputFormat.getSplits(jobContext);
    org.apache.hadoop.mapreduce.InputSplit[] splits = (org.apache.hadoop.mapreduce.InputSplit[]) array
            .toArray(new org.apache.hadoop.mapreduce.InputSplit[array.size()]);

    // sort the splits into order based on size, so that the biggest
    // go first
    Arrays.sort(splits, new InputSplitComparator());
    return splits;
}

From source file:org.goldenorb.io.input.VertexInput.java

License:Apache License

/**
* 
*///  w ww.  j  a  va  2  s .c  o  m
@SuppressWarnings("unchecked")
public void initialize() {
    // rebuild the input split
    org.apache.hadoop.mapreduce.InputSplit split = null;
    DataInputBuffer splitBuffer = new DataInputBuffer();
    splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength());
    SerializationFactory factory = new SerializationFactory(orbConf);
    Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer;
    try {
        deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory
                .getDeserializer(orbConf.getClassByName(splitClass));
        deserializer.open(splitBuffer);
        split = deserializer.deserialize(null);
        JobConf job = new JobConf(orbConf);
        JobContext jobContext = new JobContext(job, new JobID(getOrbConf().getJobNumber(), 0));
        InputFormat<INPUT_KEY, INPUT_VALUE> inputFormat;
        inputFormat = (InputFormat<INPUT_KEY, INPUT_VALUE>) ReflectionUtils
                .newInstance(jobContext.getInputFormatClass(), orbConf);
        TaskAttemptContext tao = new TaskAttemptContext(job,
                new TaskAttemptID(new TaskID(jobContext.getJobID(), true, partitionID), 0));
        recordReader = inputFormat.createRecordReader(split, tao);
        recordReader.initialize(split, tao);
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    } catch (IOException e) {
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        throw new RuntimeException(e);
    }

}

From source file:org.goldenorb.io.InputSplitAllocator.java

License:Apache License

/**
 * This method gets the raw splits and calls another method to assign them.
 * /*ww  w .ja  v a2 s .  c  om*/
 * @returns Map
 */
@SuppressWarnings({ "deprecation", "rawtypes", "unchecked" })
public Map<OrbPartitionMember, List<RawSplit>> assignInputSplits() {
    List<RawSplit> rawSplits = null;
    JobConf job = new JobConf(orbConf);
    LOG.debug(orbConf.getJobNumber().toString());
    JobContext jobContext = new JobContext(job, new JobID(orbConf.getJobNumber(), 0));
    org.apache.hadoop.mapreduce.InputFormat<?, ?> input;
    try {
        input = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), orbConf);

        List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(jobContext);
        rawSplits = new ArrayList<RawSplit>(splits.size());
        DataOutputBuffer buffer = new DataOutputBuffer();
        SerializationFactory factory = new SerializationFactory(orbConf);
        Serializer serializer = factory.getSerializer(splits.get(0).getClass());
        serializer.open(buffer);
        for (int i = 0; i < splits.size(); i++) {
            buffer.reset();
            serializer.serialize(splits.get(i));
            RawSplit rawSplit = new RawSplit();
            rawSplit.setClassName(splits.get(i).getClass().getName());
            rawSplit.setDataLength(splits.get(i).getLength());
            rawSplit.setBytes(buffer.getData(), 0, buffer.getLength());
            rawSplit.setLocations(splits.get(i).getLocations());
            rawSplits.add(rawSplit);
        }
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    } catch (IOException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    } catch (InterruptedException e) {
        e.printStackTrace();
        throw new RuntimeException(e);
    }
    return assignInputSplits(rawSplits);
}