List of usage examples for org.apache.hadoop.mapred JobConf getUseNewMapper
public boolean getUseNewMapper()
From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopMapperOperatorDescriptor.java
License:Apache License
private Object getRecordReader(JobConf conf, Object inputSplit) throws ClassNotFoundException, IOException, InterruptedException { if (conf.getUseNewMapper()) { JobContext context = new ContextFactory().createJobContext(conf); org.apache.hadoop.mapreduce.InputFormat inputFormat = (org.apache.hadoop.mapreduce.InputFormat) ReflectionUtils .newInstance(context.getInputFormatClass(), conf); TaskAttemptContext taskAttemptContext = new ContextFactory().createContext(conf, new TaskAttemptID()); return inputFormat.createRecordReader((org.apache.hadoop.mapreduce.InputSplit) inputSplit, taskAttemptContext);/*from w ww. j av a 2 s . c o m*/ } else { Class inputFormatClass = conf.getInputFormat().getClass(); InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, conf); return inputFormat.getRecordReader((org.apache.hadoop.mapred.InputSplit) inputSplit, conf, super.createReporter()); } }
From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopMapperOperatorDescriptor.java
License:Apache License
@Override public IOperatorNodePushable createPushRuntime(IHyracksTaskContext ctx, IRecordDescriptorProvider recordDescProvider, int partition, int nPartitions) throws HyracksDataException { JobConf conf = getJobConf(); Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader()); try {//from w ww .ja va 2 s . c o m if (selfRead) { RecordDescriptor recordDescriptor = null; if (inputSplits == null) { inputSplits = inputSplitsProxy.toInputSplits(conf); } Object reader = getRecordReader(conf, inputSplits[partition]); if (conf.getUseNewMapper()) { org.apache.hadoop.mapreduce.RecordReader newReader = (org.apache.hadoop.mapreduce.RecordReader) reader; newReader.initialize((org.apache.hadoop.mapreduce.InputSplit) inputSplits[partition], new ContextFactory().createContext(conf, new TaskAttemptID())); newReader.nextKeyValue(); Object key = newReader.getCurrentKey(); Class keyClass = null; if (key == null) { keyClass = Class.forName("org.apache.hadoop.io.NullWritable"); } recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor( (Class<? extends Writable>) keyClass, (Class<? extends Writable>) newReader.getCurrentValue().getClass()); } else { RecordReader oldReader = (RecordReader) reader; recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor( (Class<? extends Writable>) oldReader.createKey().getClass(), (Class<? extends Writable>) oldReader.createValue().getClass()); } return createSelfReadingMapper(ctx, recordDescriptor, partition); } else { return new DeserializedOperatorNodePushable(ctx, new MapperOperator(partition), recordDescProvider.getInputRecordDescriptor(this.activityNodeId, 0)); } } catch (Exception e) { throw new HyracksDataException(e); } }
From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopReadOperatorDescriptor.java
License:Apache License
private RecordReader getRecordReader(JobConf conf, Object inputSplit) throws ClassNotFoundException, IOException, InterruptedException { RecordReader hadoopRecordReader = null; if (conf.getUseNewMapper()) { JobContext context = new ContextFactory().createJobContext(conf); org.apache.hadoop.mapreduce.InputFormat inputFormat = (org.apache.hadoop.mapreduce.InputFormat) ReflectionUtils .newInstance(context.getInputFormatClass(), conf); TaskAttemptContext taskAttemptContext = new ContextFactory().createContext(jobConf, null); hadoopRecordReader = (RecordReader) inputFormat .createRecordReader((org.apache.hadoop.mapreduce.InputSplit) inputSplit, taskAttemptContext); } else {// w w w. j a va 2s .co m Class inputFormatClass = conf.getInputFormat().getClass(); InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, conf); hadoopRecordReader = (RecordReader) inputFormat .getRecordReader((org.apache.hadoop.mapred.InputSplit) inputSplit, conf, createReporter()); } return hadoopRecordReader; }
From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopReadOperatorDescriptor.java
License:Apache License
@SuppressWarnings("deprecation") @Override// w w w .ja va 2 s.c o m public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions) throws HyracksDataException { return new AbstractUnaryOutputSourceOperatorNodePushable() { @Override public void initialize() throws HyracksDataException { try { JobConf conf = DatatypeHelper.map2JobConf((HashMap) jobConfMap); Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader()); conf.setClassLoader(this.getClass().getClassLoader()); RecordReader hadoopRecordReader; Object key; Object value; Object[] splits = inputSplitsProxy.toInputSplits(conf); Object inputSplit = splits[partition]; if (conf.getUseNewMapper()) { JobContext context = new ContextFactory().createJobContext(conf); org.apache.hadoop.mapreduce.InputFormat inputFormat = (org.apache.hadoop.mapreduce.InputFormat) ReflectionUtils .newInstance(context.getInputFormatClass(), conf); TaskAttemptContext taskAttemptContext = new ContextFactory().createContext(jobConf, null); hadoopRecordReader = (RecordReader) inputFormat.createRecordReader( (org.apache.hadoop.mapreduce.InputSplit) inputSplit, taskAttemptContext); } else { Class inputFormatClass = conf.getInputFormat().getClass(); InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, conf); hadoopRecordReader = (RecordReader) inputFormat.getRecordReader( (org.apache.hadoop.mapred.InputSplit) inputSplit, conf, createReporter()); } Class inputKeyClass; Class inputValueClass; if (hadoopRecordReader instanceof SequenceFileRecordReader) { inputKeyClass = ((SequenceFileRecordReader) hadoopRecordReader).getKeyClass(); inputValueClass = ((SequenceFileRecordReader) hadoopRecordReader).getValueClass(); } else { inputKeyClass = hadoopRecordReader.createKey().getClass(); inputValueClass = hadoopRecordReader.createValue().getClass(); } key = hadoopRecordReader.createKey(); value = hadoopRecordReader.createValue(); ByteBuffer outBuffer = ctx.allocateFrame(); FrameTupleAppender appender = new FrameTupleAppender(ctx.getFrameSize()); appender.reset(outBuffer, true); RecordDescriptor outputRecordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor( (Class<? extends Writable>) hadoopRecordReader.createKey().getClass(), (Class<? extends Writable>) hadoopRecordReader.createValue().getClass()); int nFields = outputRecordDescriptor.getFieldCount(); ArrayTupleBuilder tb = new ArrayTupleBuilder(nFields); writer.open(); try { while (hadoopRecordReader.next(key, value)) { tb.reset(); switch (nFields) { case 2: tb.addField(outputRecordDescriptor.getFields()[0], key); case 1: tb.addField(outputRecordDescriptor.getFields()[1], value); } if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { FrameUtils.flushFrame(outBuffer, writer); appender.reset(outBuffer, true); if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { throw new HyracksDataException("Record size (" + tb.getSize() + ") larger than frame size (" + outBuffer.capacity() + ")"); } } } if (appender.getTupleCount() > 0) { FrameUtils.flushFrame(outBuffer, writer); } } catch (Exception e) { writer.fail(); throw new HyracksDataException(e); } finally { writer.close(); } hadoopRecordReader.close(); } catch (InstantiationException e) { throw new HyracksDataException(e); } catch (IllegalAccessException e) { throw new HyracksDataException(e); } catch (ClassNotFoundException e) { throw new HyracksDataException(e); } catch (InterruptedException e) { throw new HyracksDataException(e); } catch (IOException e) { throw new HyracksDataException(e); } } }; }
From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopReducerOperatorDescriptor.java
License:Apache License
public static RecordDescriptor getRecordDescriptor(JobConf conf, IHadoopClassFactory classFactory) { String outputKeyClassName = null; String outputValueClassName = null; if (conf.getUseNewMapper()) { JobContext context = new ContextFactory().createJobContext(conf); outputKeyClassName = context.getOutputKeyClass().getName(); outputValueClassName = context.getOutputValueClass().getName(); } else {//ww w. j av a 2 s . c om outputKeyClassName = conf.getOutputKeyClass().getName(); outputValueClassName = conf.getOutputValueClass().getName(); } RecordDescriptor recordDescriptor = null; try { if (classFactory == null) { recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor( (Class<? extends Writable>) Class.forName(outputKeyClassName), (Class<? extends Writable>) Class.forName(outputValueClassName)); } else { recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor( (Class<? extends Writable>) classFactory.loadClass(outputKeyClassName), (Class<? extends Writable>) classFactory.loadClass(outputValueClassName)); } } catch (Exception e) { e.printStackTrace(); return null; } return recordDescriptor; }
From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopWriteOperatorDescriptor.java
License:Apache License
private static FileSplit[] getOutputSplits(JobConf conf, int noOfMappers) throws ClassNotFoundException { int numOutputters = conf.getNumReduceTasks() != 0 ? conf.getNumReduceTasks() : noOfMappers; Object outputFormat = null;/*from w w w .j a va2 s. c o m*/ if (conf.getUseNewMapper()) { outputFormat = ReflectionUtils .newInstance(new ContextFactory().createJobContext(conf).getOutputFormatClass(), conf); } else { outputFormat = conf.getOutputFormat(); } if (outputFormat instanceof NullOutputFormat) { FileSplit[] outputFileSplits = new FileSplit[numOutputters]; for (int i = 0; i < numOutputters; i++) { String outputPath = "/tmp/" + System.currentTimeMillis() + i; outputFileSplits[i] = new FileSplit("localhost", new FileReference(new File(outputPath))); } return outputFileSplits; } else { FileSplit[] outputFileSplits = new FileSplit[numOutputters]; String absolutePath = FileOutputFormat.getOutputPath(conf).toString(); for (int index = 0; index < numOutputters; index++) { String suffix = new String("part-00000"); suffix = new String(suffix.substring(0, suffix.length() - ("" + index).length())); suffix = suffix + index; String outputPath = absolutePath + "/" + suffix; outputFileSplits[index] = new FileSplit("localhost", outputPath); } return outputFileSplits; } }
From source file:edu.uci.ics.hyracks.dataflow.hadoop.util.InputSplitsProxy.java
License:Apache License
public InputSplitsProxy(JobConf conf, Object[] inputSplits) throws IOException { isClasses = new Class[inputSplits.length]; ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutputStream dos = new DataOutputStream(baos); if (conf.getUseNewMapper()) { for (int i = 0; i < inputSplits.length; ++i) { isClasses[i] = ((org.apache.hadoop.mapreduce.InputSplit) inputSplits[i]).getClass(); ((Writable) inputSplits[i]).write(dos); }//from w ww. j a v a2s . c om } else { for (int i = 0; i < inputSplits.length; ++i) { isClasses[i] = ((org.apache.hadoop.mapred.InputSplit) inputSplits[i]).getClass(); ((Writable) inputSplits[i]).write(dos); } } dos.close(); bytes = baos.toByteArray(); }
From source file:edu.uci.ics.hyracks.dataflow.hadoop.util.InputSplitsProxy.java
License:Apache License
public Object[] toInputSplits(JobConf jobConf) throws InstantiationException, IllegalAccessException, IOException { Object[] splits = new Object[isClasses.length]; DataInputStream dis = new DataInputStream(new ByteArrayInputStream(bytes)); for (int i = 0; i < splits.length; ++i) { splits[i] = ReflectionUtils.newInstance(isClasses[i], jobConf); if (jobConf.getUseNewMapper()) { ((Writable) splits[i]).readFields(dis); } else {/*from ww w.j a v a 2 s. c o m*/ ((Writable) splits[i]).readFields(dis); } } return splits; }
From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java
License:Apache License
private Object[] getInputSplits(JobConf conf) throws IOException, ClassNotFoundException, InterruptedException { if (conf.getUseNewMapper()) { return getNewInputSplits(conf); } else {/*from www . j av a 2s .c om*/ return getOldInputSplits(conf); } }
From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java
License:Apache License
private long getInputSize(Object[] splits, JobConf conf) throws IOException, InterruptedException { long totalInputSize = 0; if (conf.getUseNewMapper()) { for (org.apache.hadoop.mapreduce.InputSplit split : (org.apache.hadoop.mapreduce.InputSplit[]) splits) { totalInputSize += split.getLength(); }//w ww .j a v a2 s . c om } else { for (InputSplit split : (InputSplit[]) splits) { totalInputSize += split.getLength(); } } return totalInputSize; }