Example usage for org.apache.hadoop.mapred JobConf getUseNewMapper

List of usage examples for org.apache.hadoop.mapred JobConf getUseNewMapper

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getUseNewMapper.

Prototype

public boolean getUseNewMapper() 

Source Link

Document

Should the framework use the new context-object code for running the mapper?

Usage

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopMapperOperatorDescriptor.java

License:Apache License

private Object getRecordReader(JobConf conf, Object inputSplit)
        throws ClassNotFoundException, IOException, InterruptedException {
    if (conf.getUseNewMapper()) {
        JobContext context = new ContextFactory().createJobContext(conf);
        org.apache.hadoop.mapreduce.InputFormat inputFormat = (org.apache.hadoop.mapreduce.InputFormat) ReflectionUtils
                .newInstance(context.getInputFormatClass(), conf);
        TaskAttemptContext taskAttemptContext = new ContextFactory().createContext(conf, new TaskAttemptID());
        return inputFormat.createRecordReader((org.apache.hadoop.mapreduce.InputSplit) inputSplit,
                taskAttemptContext);/*from w ww.  j  av  a  2  s . c  o m*/
    } else {
        Class inputFormatClass = conf.getInputFormat().getClass();
        InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, conf);
        return inputFormat.getRecordReader((org.apache.hadoop.mapred.InputSplit) inputSplit, conf,
                super.createReporter());
    }
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopMapperOperatorDescriptor.java

License:Apache License

@Override
public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx,
        IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions)
        throws HyracksDataException {

    JobConf conf = getJobConf();
    Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
    try {//from w  ww .ja va  2  s  . c o m
        if (selfRead) {
            RecordDescriptor recordDescriptor = null;
            if (inputSplits == null) {
                inputSplits = inputSplitsProxy.toInputSplits(conf);
            }
            Object reader = getRecordReader(conf, inputSplits[partition]);
            if (conf.getUseNewMapper()) {
                org.apache.hadoop.mapreduce.RecordReader newReader = (org.apache.hadoop.mapreduce.RecordReader) reader;
                newReader.initialize((org.apache.hadoop.mapreduce.InputSplit) inputSplits[partition],
                        new ContextFactory().createContext(conf, new TaskAttemptID()));
                newReader.nextKeyValue();
                Object key = newReader.getCurrentKey();
                Class keyClass = null;
                if (key == null) {
                    keyClass = Class.forName("org.apache.hadoop.io.NullWritable");
                }
                recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                        (Class<? extends Writable>) keyClass,
                        (Class<? extends Writable>) newReader.getCurrentValue().getClass());
            } else {
                RecordReader oldReader = (RecordReader) reader;
                recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                        (Class<? extends Writable>) oldReader.createKey().getClass(),
                        (Class<? extends Writable>) oldReader.createValue().getClass());
            }
            return createSelfReadingMapper(ctx, recordDescriptor, partition);
        } else {
            return new DeserializedOperatorNodePushable(ctx, new MapperOperator(partition),
                    recordDescProvider.getInputRecordDescriptor(this.activityNodeId, 0));
        }
    } catch (Exception e) {
        throw new HyracksDataException(e);
    }
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopReadOperatorDescriptor.java

License:Apache License

private RecordReader getRecordReader(JobConf conf, Object inputSplit)
        throws ClassNotFoundException, IOException, InterruptedException {
    RecordReader hadoopRecordReader = null;
    if (conf.getUseNewMapper()) {
        JobContext context = new ContextFactory().createJobContext(conf);
        org.apache.hadoop.mapreduce.InputFormat inputFormat = (org.apache.hadoop.mapreduce.InputFormat) ReflectionUtils
                .newInstance(context.getInputFormatClass(), conf);
        TaskAttemptContext taskAttemptContext = new ContextFactory().createContext(jobConf, null);
        hadoopRecordReader = (RecordReader) inputFormat
                .createRecordReader((org.apache.hadoop.mapreduce.InputSplit) inputSplit, taskAttemptContext);
    } else {// w  w  w.  j  a va  2s .co m
        Class inputFormatClass = conf.getInputFormat().getClass();
        InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, conf);
        hadoopRecordReader = (RecordReader) inputFormat
                .getRecordReader((org.apache.hadoop.mapred.InputSplit) inputSplit, conf, createReporter());
    }
    return hadoopRecordReader;
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopReadOperatorDescriptor.java

License:Apache License

@SuppressWarnings("deprecation")
@Override//  w  w  w .ja va  2  s.c  o  m
public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx,
        final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions)
        throws HyracksDataException {
    return new AbstractUnaryOutputSourceOperatorNodePushable() {
        @Override
        public void initialize() throws HyracksDataException {
            try {
                JobConf conf = DatatypeHelper.map2JobConf((HashMap) jobConfMap);
                Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader());
                conf.setClassLoader(this.getClass().getClassLoader());
                RecordReader hadoopRecordReader;
                Object key;
                Object value;
                Object[] splits = inputSplitsProxy.toInputSplits(conf);
                Object inputSplit = splits[partition];

                if (conf.getUseNewMapper()) {
                    JobContext context = new ContextFactory().createJobContext(conf);
                    org.apache.hadoop.mapreduce.InputFormat inputFormat = (org.apache.hadoop.mapreduce.InputFormat) ReflectionUtils
                            .newInstance(context.getInputFormatClass(), conf);
                    TaskAttemptContext taskAttemptContext = new ContextFactory().createContext(jobConf, null);
                    hadoopRecordReader = (RecordReader) inputFormat.createRecordReader(
                            (org.apache.hadoop.mapreduce.InputSplit) inputSplit, taskAttemptContext);
                } else {
                    Class inputFormatClass = conf.getInputFormat().getClass();
                    InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, conf);
                    hadoopRecordReader = (RecordReader) inputFormat.getRecordReader(
                            (org.apache.hadoop.mapred.InputSplit) inputSplit, conf, createReporter());
                }

                Class inputKeyClass;
                Class inputValueClass;
                if (hadoopRecordReader instanceof SequenceFileRecordReader) {
                    inputKeyClass = ((SequenceFileRecordReader) hadoopRecordReader).getKeyClass();
                    inputValueClass = ((SequenceFileRecordReader) hadoopRecordReader).getValueClass();
                } else {
                    inputKeyClass = hadoopRecordReader.createKey().getClass();
                    inputValueClass = hadoopRecordReader.createValue().getClass();
                }

                key = hadoopRecordReader.createKey();
                value = hadoopRecordReader.createValue();
                ByteBuffer outBuffer = ctx.allocateFrame();
                FrameTupleAppender appender = new FrameTupleAppender(ctx.getFrameSize());
                appender.reset(outBuffer, true);
                RecordDescriptor outputRecordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                        (Class<? extends Writable>) hadoopRecordReader.createKey().getClass(),
                        (Class<? extends Writable>) hadoopRecordReader.createValue().getClass());
                int nFields = outputRecordDescriptor.getFieldCount();
                ArrayTupleBuilder tb = new ArrayTupleBuilder(nFields);
                writer.open();
                try {
                    while (hadoopRecordReader.next(key, value)) {
                        tb.reset();
                        switch (nFields) {
                        case 2:
                            tb.addField(outputRecordDescriptor.getFields()[0], key);
                        case 1:
                            tb.addField(outputRecordDescriptor.getFields()[1], value);
                        }
                        if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                            FrameUtils.flushFrame(outBuffer, writer);
                            appender.reset(outBuffer, true);
                            if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) {
                                throw new HyracksDataException("Record size (" + tb.getSize()
                                        + ") larger than frame size (" + outBuffer.capacity() + ")");
                            }
                        }
                    }
                    if (appender.getTupleCount() > 0) {
                        FrameUtils.flushFrame(outBuffer, writer);
                    }
                } catch (Exception e) {
                    writer.fail();
                    throw new HyracksDataException(e);
                } finally {
                    writer.close();
                }
                hadoopRecordReader.close();
            } catch (InstantiationException e) {
                throw new HyracksDataException(e);
            } catch (IllegalAccessException e) {
                throw new HyracksDataException(e);
            } catch (ClassNotFoundException e) {
                throw new HyracksDataException(e);
            } catch (InterruptedException e) {
                throw new HyracksDataException(e);
            } catch (IOException e) {
                throw new HyracksDataException(e);
            }
        }
    };
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopReducerOperatorDescriptor.java

License:Apache License

public static RecordDescriptor getRecordDescriptor(JobConf conf, IHadoopClassFactory classFactory) {
    String outputKeyClassName = null;
    String outputValueClassName = null;

    if (conf.getUseNewMapper()) {
        JobContext context = new ContextFactory().createJobContext(conf);
        outputKeyClassName = context.getOutputKeyClass().getName();
        outputValueClassName = context.getOutputValueClass().getName();
    } else {//ww w. j av a  2  s  . c om
        outputKeyClassName = conf.getOutputKeyClass().getName();
        outputValueClassName = conf.getOutputValueClass().getName();
    }

    RecordDescriptor recordDescriptor = null;
    try {
        if (classFactory == null) {
            recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                    (Class<? extends Writable>) Class.forName(outputKeyClassName),
                    (Class<? extends Writable>) Class.forName(outputValueClassName));
        } else {
            recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(
                    (Class<? extends Writable>) classFactory.loadClass(outputKeyClassName),
                    (Class<? extends Writable>) classFactory.loadClass(outputValueClassName));
        }
    } catch (Exception e) {
        e.printStackTrace();
        return null;
    }
    return recordDescriptor;
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopWriteOperatorDescriptor.java

License:Apache License

private static FileSplit[] getOutputSplits(JobConf conf, int noOfMappers) throws ClassNotFoundException {
    int numOutputters = conf.getNumReduceTasks() != 0 ? conf.getNumReduceTasks() : noOfMappers;
    Object outputFormat = null;/*from w w w .j a va2  s.  c  o  m*/
    if (conf.getUseNewMapper()) {
        outputFormat = ReflectionUtils
                .newInstance(new ContextFactory().createJobContext(conf).getOutputFormatClass(), conf);
    } else {
        outputFormat = conf.getOutputFormat();
    }
    if (outputFormat instanceof NullOutputFormat) {
        FileSplit[] outputFileSplits = new FileSplit[numOutputters];
        for (int i = 0; i < numOutputters; i++) {
            String outputPath = "/tmp/" + System.currentTimeMillis() + i;
            outputFileSplits[i] = new FileSplit("localhost", new FileReference(new File(outputPath)));
        }
        return outputFileSplits;
    } else {

        FileSplit[] outputFileSplits = new FileSplit[numOutputters];
        String absolutePath = FileOutputFormat.getOutputPath(conf).toString();
        for (int index = 0; index < numOutputters; index++) {
            String suffix = new String("part-00000");
            suffix = new String(suffix.substring(0, suffix.length() - ("" + index).length()));
            suffix = suffix + index;
            String outputPath = absolutePath + "/" + suffix;
            outputFileSplits[index] = new FileSplit("localhost", outputPath);
        }
        return outputFileSplits;
    }
}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.util.InputSplitsProxy.java

License:Apache License

public InputSplitsProxy(JobConf conf, Object[] inputSplits) throws IOException {
    isClasses = new Class[inputSplits.length];
    ByteArrayOutputStream baos = new ByteArrayOutputStream();
    DataOutputStream dos = new DataOutputStream(baos);
    if (conf.getUseNewMapper()) {
        for (int i = 0; i < inputSplits.length; ++i) {
            isClasses[i] = ((org.apache.hadoop.mapreduce.InputSplit) inputSplits[i]).getClass();
            ((Writable) inputSplits[i]).write(dos);
        }//from   w  ww. j  a v a2s  . c om
    } else {
        for (int i = 0; i < inputSplits.length; ++i) {
            isClasses[i] = ((org.apache.hadoop.mapred.InputSplit) inputSplits[i]).getClass();
            ((Writable) inputSplits[i]).write(dos);
        }
    }
    dos.close();
    bytes = baos.toByteArray();

}

From source file:edu.uci.ics.hyracks.dataflow.hadoop.util.InputSplitsProxy.java

License:Apache License

public Object[] toInputSplits(JobConf jobConf)
        throws InstantiationException, IllegalAccessException, IOException {
    Object[] splits = new Object[isClasses.length];
    DataInputStream dis = new DataInputStream(new ByteArrayInputStream(bytes));
    for (int i = 0; i < splits.length; ++i) {
        splits[i] = ReflectionUtils.newInstance(isClasses[i], jobConf);
        if (jobConf.getUseNewMapper()) {
            ((Writable) splits[i]).readFields(dis);
        } else {/*from ww w.j a  v  a 2 s.  c o m*/
            ((Writable) splits[i]).readFields(dis);
        }
    }
    return splits;
}

From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java

License:Apache License

private Object[] getInputSplits(JobConf conf) throws IOException, ClassNotFoundException, InterruptedException {
    if (conf.getUseNewMapper()) {
        return getNewInputSplits(conf);
    } else {/*from  www  . j av  a 2s  .c  om*/
        return getOldInputSplits(conf);
    }
}

From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java

License:Apache License

private long getInputSize(Object[] splits, JobConf conf) throws IOException, InterruptedException {
    long totalInputSize = 0;
    if (conf.getUseNewMapper()) {
        for (org.apache.hadoop.mapreduce.InputSplit split : (org.apache.hadoop.mapreduce.InputSplit[]) splits) {
            totalInputSize += split.getLength();
        }//w ww  .j a v a2  s  .  c om
    } else {
        for (InputSplit split : (InputSplit[]) splits) {
            totalInputSize += split.getLength();
        }
    }
    return totalInputSize;
}