List of usage examples for org.apache.hadoop.mapreduce JobContext getInputFormatClass
public Class<? extends InputFormat<?, ?>> getInputFormatClass() throws ClassNotFoundException;
From source file:com.linkedin.cubert.io.CubertInputFormat.java
License:Open Source License
private InputFormat<K, V> getActualInputFormat(JobContext context) { try {/*from w w w . ja v a 2 s. c o m*/ InputFormat<K, V> actualInputFormat = (InputFormat<K, V>) context.getInputFormatClass().newInstance(); if (actualInputFormat instanceof CubertInputFormat) throw new RuntimeException("No actual input format specified"); return actualInputFormat; } catch (InstantiationException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IllegalAccessException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (ClassNotFoundException e) { // TODO Auto-generated catch block e.printStackTrace(); } return null; }
From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopMapperOperatorDescriptor.java
License:Apache License
private Object getRecordReader(JobConf conf, Object inputSplit) throws ClassNotFoundException, IOException, InterruptedException { if (conf.getUseNewMapper()) { JobContext context = new ContextFactory().createJobContext(conf); org.apache.hadoop.mapreduce.InputFormat inputFormat = (org.apache.hadoop.mapreduce.InputFormat) ReflectionUtils .newInstance(context.getInputFormatClass(), conf); TaskAttemptContext taskAttemptContext = new ContextFactory().createContext(conf, new TaskAttemptID()); return inputFormat.createRecordReader((org.apache.hadoop.mapreduce.InputSplit) inputSplit, taskAttemptContext);/* w w w .j ava 2 s .com*/ } else { Class inputFormatClass = conf.getInputFormat().getClass(); InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, conf); return inputFormat.getRecordReader((org.apache.hadoop.mapred.InputSplit) inputSplit, conf, super.createReporter()); } }
From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopReadOperatorDescriptor.java
License:Apache License
private RecordReader getRecordReader(JobConf conf, Object inputSplit) throws ClassNotFoundException, IOException, InterruptedException { RecordReader hadoopRecordReader = null; if (conf.getUseNewMapper()) { JobContext context = new ContextFactory().createJobContext(conf); org.apache.hadoop.mapreduce.InputFormat inputFormat = (org.apache.hadoop.mapreduce.InputFormat) ReflectionUtils .newInstance(context.getInputFormatClass(), conf); TaskAttemptContext taskAttemptContext = new ContextFactory().createContext(jobConf, null); hadoopRecordReader = (RecordReader) inputFormat .createRecordReader((org.apache.hadoop.mapreduce.InputSplit) inputSplit, taskAttemptContext); } else {// w w w . j a v a 2 s .co m Class inputFormatClass = conf.getInputFormat().getClass(); InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, conf); hadoopRecordReader = (RecordReader) inputFormat .getRecordReader((org.apache.hadoop.mapred.InputSplit) inputSplit, conf, createReporter()); } return hadoopRecordReader; }
From source file:edu.uci.ics.hyracks.dataflow.hadoop.HadoopReadOperatorDescriptor.java
License:Apache License
@SuppressWarnings("deprecation") @Override//from w w w . ja v a 2 s . co m public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions) throws HyracksDataException { return new AbstractUnaryOutputSourceOperatorNodePushable() { @Override public void initialize() throws HyracksDataException { try { JobConf conf = DatatypeHelper.map2JobConf((HashMap) jobConfMap); Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader()); conf.setClassLoader(this.getClass().getClassLoader()); RecordReader hadoopRecordReader; Object key; Object value; Object[] splits = inputSplitsProxy.toInputSplits(conf); Object inputSplit = splits[partition]; if (conf.getUseNewMapper()) { JobContext context = new ContextFactory().createJobContext(conf); org.apache.hadoop.mapreduce.InputFormat inputFormat = (org.apache.hadoop.mapreduce.InputFormat) ReflectionUtils .newInstance(context.getInputFormatClass(), conf); TaskAttemptContext taskAttemptContext = new ContextFactory().createContext(jobConf, null); hadoopRecordReader = (RecordReader) inputFormat.createRecordReader( (org.apache.hadoop.mapreduce.InputSplit) inputSplit, taskAttemptContext); } else { Class inputFormatClass = conf.getInputFormat().getClass(); InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, conf); hadoopRecordReader = (RecordReader) inputFormat.getRecordReader( (org.apache.hadoop.mapred.InputSplit) inputSplit, conf, createReporter()); } Class inputKeyClass; Class inputValueClass; if (hadoopRecordReader instanceof SequenceFileRecordReader) { inputKeyClass = ((SequenceFileRecordReader) hadoopRecordReader).getKeyClass(); inputValueClass = ((SequenceFileRecordReader) hadoopRecordReader).getValueClass(); } else { inputKeyClass = hadoopRecordReader.createKey().getClass(); inputValueClass = hadoopRecordReader.createValue().getClass(); } key = hadoopRecordReader.createKey(); value = hadoopRecordReader.createValue(); ByteBuffer outBuffer = ctx.allocateFrame(); FrameTupleAppender appender = new FrameTupleAppender(ctx.getFrameSize()); appender.reset(outBuffer, true); RecordDescriptor outputRecordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor( (Class<? extends Writable>) hadoopRecordReader.createKey().getClass(), (Class<? extends Writable>) hadoopRecordReader.createValue().getClass()); int nFields = outputRecordDescriptor.getFieldCount(); ArrayTupleBuilder tb = new ArrayTupleBuilder(nFields); writer.open(); try { while (hadoopRecordReader.next(key, value)) { tb.reset(); switch (nFields) { case 2: tb.addField(outputRecordDescriptor.getFields()[0], key); case 1: tb.addField(outputRecordDescriptor.getFields()[1], value); } if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { FrameUtils.flushFrame(outBuffer, writer); appender.reset(outBuffer, true); if (!appender.append(tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize())) { throw new HyracksDataException("Record size (" + tb.getSize() + ") larger than frame size (" + outBuffer.capacity() + ")"); } } } if (appender.getTupleCount() > 0) { FrameUtils.flushFrame(outBuffer, writer); } } catch (Exception e) { writer.fail(); throw new HyracksDataException(e); } finally { writer.close(); } hadoopRecordReader.close(); } catch (InstantiationException e) { throw new HyracksDataException(e); } catch (IllegalAccessException e) { throw new HyracksDataException(e); } catch (ClassNotFoundException e) { throw new HyracksDataException(e); } catch (InterruptedException e) { throw new HyracksDataException(e); } catch (IOException e) { throw new HyracksDataException(e); } } }; }
From source file:org.apache.hyracks.dataflow.hadoop.HadoopReadOperatorDescriptor.java
License:Apache License
@SuppressWarnings("deprecation") @Override/* w ww .j av a2s.c o m*/ public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions) throws HyracksDataException { return new AbstractUnaryOutputSourceOperatorNodePushable() { @Override public void initialize() throws HyracksDataException { try { JobConf conf = DatatypeHelper.map2JobConf((HashMap) jobConfMap); Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader()); conf.setClassLoader(this.getClass().getClassLoader()); RecordReader hadoopRecordReader; Object key; Object value; Object[] splits = inputSplitsProxy.toInputSplits(conf); Object inputSplit = splits[partition]; if (conf.getUseNewMapper()) { JobContext context = new ContextFactory().createJobContext(conf); org.apache.hadoop.mapreduce.InputFormat inputFormat = (org.apache.hadoop.mapreduce.InputFormat) ReflectionUtils .newInstance(context.getInputFormatClass(), conf); TaskAttemptContext taskAttemptContext = new ContextFactory().createContext(jobConf, null); hadoopRecordReader = (RecordReader) inputFormat.createRecordReader( (org.apache.hadoop.mapreduce.InputSplit) inputSplit, taskAttemptContext); } else { Class inputFormatClass = conf.getInputFormat().getClass(); InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, conf); hadoopRecordReader = (RecordReader) inputFormat.getRecordReader( (org.apache.hadoop.mapred.InputSplit) inputSplit, conf, createReporter()); } Class inputKeyClass; Class inputValueClass; if (hadoopRecordReader instanceof SequenceFileRecordReader) { inputKeyClass = ((SequenceFileRecordReader) hadoopRecordReader).getKeyClass(); inputValueClass = ((SequenceFileRecordReader) hadoopRecordReader).getValueClass(); } else { inputKeyClass = hadoopRecordReader.createKey().getClass(); inputValueClass = hadoopRecordReader.createValue().getClass(); } key = hadoopRecordReader.createKey(); value = hadoopRecordReader.createValue(); FrameTupleAppender appender = new FrameTupleAppender(new VSizeFrame(ctx)); RecordDescriptor outputRecordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor( (Class<? extends Writable>) hadoopRecordReader.createKey().getClass(), (Class<? extends Writable>) hadoopRecordReader.createValue().getClass()); int nFields = outputRecordDescriptor.getFieldCount(); ArrayTupleBuilder tb = new ArrayTupleBuilder(nFields); writer.open(); try { while (hadoopRecordReader.next(key, value)) { tb.reset(); switch (nFields) { case 2: tb.addField(outputRecordDescriptor.getFields()[0], key); case 1: tb.addField(outputRecordDescriptor.getFields()[1], value); } FrameUtils.appendToWriter(writer, appender, tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize()); } appender.flush(writer, true); } catch (Exception e) { writer.fail(); throw new HyracksDataException(e); } finally { writer.close(); } hadoopRecordReader.close(); } catch (InstantiationException e) { throw new HyracksDataException(e); } catch (IllegalAccessException e) { throw new HyracksDataException(e); } catch (ClassNotFoundException e) { throw new HyracksDataException(e); } catch (InterruptedException e) { throw new HyracksDataException(e); } catch (IOException e) { throw new HyracksDataException(e); } } }; }
From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2Splitter.java
License:Apache License
/** * @param ctx Job context./* w w w . j a v a 2s. c o m*/ * @return Collection of mapped splits. * @throws IgniteCheckedException If mapping failed. */ public static Collection<HadoopInputSplit> splitJob(JobContext ctx) throws IgniteCheckedException { try { InputFormat<?, ?> format = ReflectionUtils.newInstance(ctx.getInputFormatClass(), ctx.getConfiguration()); assert format != null; List<InputSplit> splits = format.getSplits(ctx); Collection<HadoopInputSplit> res = new ArrayList<>(splits.size()); int id = 0; for (InputSplit nativeSplit : splits) { if (nativeSplit instanceof FileSplit) { FileSplit s = (FileSplit) nativeSplit; res.add(new HadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(), s.getLength())); } else res.add(HadoopUtils.wrapSplit(id, nativeSplit, nativeSplit.getLocations())); id++; } return res; } catch (IOException | ClassNotFoundException e) { throw new IgniteCheckedException(e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new IgniteInterruptedCheckedException(e); } }
From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2Splitter.java
License:Apache License
/** * @param ctx Job context.//from w w w . ja va2 s . c o m * @return Collection of mapped splits. * @throws IgniteCheckedException If mapping failed. */ public static Collection<GridHadoopInputSplit> splitJob(JobContext ctx) throws IgniteCheckedException { try { InputFormat<?, ?> format = ReflectionUtils.newInstance(ctx.getInputFormatClass(), ctx.getConfiguration()); assert format != null; List<InputSplit> splits = format.getSplits(ctx); Collection<GridHadoopInputSplit> res = new ArrayList<>(splits.size()); int id = 0; for (InputSplit nativeSplit : splits) { if (nativeSplit instanceof FileSplit) { FileSplit s = (FileSplit) nativeSplit; res.add(new GridHadoopFileBlock(s.getLocations(), s.getPath().toUri(), s.getStart(), s.getLength())); } else res.add(GridHadoopUtils.wrapSplit(id, nativeSplit, nativeSplit.getLocations())); id++; } return res; } catch (IOException | ClassNotFoundException e) { throw new IgniteCheckedException(e); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new IgniteInterruptedCheckedException(e); } }
From source file:org.apache.tez.mapreduce.hadoop.MRInputHelpers.java
License:Apache License
@SuppressWarnings({ "rawtypes", "unchecked" }) private static org.apache.hadoop.mapreduce.InputSplit[] generateNewSplits(JobContext jobContext, boolean groupSplits, int numTasks) throws ClassNotFoundException, IOException, InterruptedException { Configuration conf = jobContext.getConfiguration(); // This is the real input format. org.apache.hadoop.mapreduce.InputFormat<?, ?> inputFormat = null; try {//from ww w . ja v a 2 s. c o m inputFormat = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), conf); } catch (ClassNotFoundException e) { throw new TezUncheckedException(e); } org.apache.hadoop.mapreduce.InputFormat<?, ?> finalInputFormat = inputFormat; // For grouping, the underlying InputFormatClass class is passed in as a parameter. // JobContext has this setup as TezGroupedSplitInputFormat if (groupSplits) { org.apache.hadoop.mapreduce.split.TezGroupedSplitsInputFormat groupedFormat = new org.apache.hadoop.mapreduce.split.TezGroupedSplitsInputFormat(); groupedFormat.setConf(conf); groupedFormat.setInputFormat(inputFormat); groupedFormat.setDesiredNumberOfSplits(numTasks); finalInputFormat = groupedFormat; } else { finalInputFormat = inputFormat; } List<org.apache.hadoop.mapreduce.InputSplit> array = finalInputFormat.getSplits(jobContext); org.apache.hadoop.mapreduce.InputSplit[] splits = (org.apache.hadoop.mapreduce.InputSplit[]) array .toArray(new org.apache.hadoop.mapreduce.InputSplit[array.size()]); // sort the splits into order based on size, so that the biggest // go first Arrays.sort(splits, new InputSplitComparator()); return splits; }
From source file:org.goldenorb.io.input.VertexInput.java
License:Apache License
/** * */// w ww. j a va 2 s .c o m @SuppressWarnings("unchecked") public void initialize() { // rebuild the input split org.apache.hadoop.mapreduce.InputSplit split = null; DataInputBuffer splitBuffer = new DataInputBuffer(); splitBuffer.reset(rawSplit.getBytes(), 0, rawSplit.getLength()); SerializationFactory factory = new SerializationFactory(orbConf); Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit> deserializer; try { deserializer = (Deserializer<? extends org.apache.hadoop.mapreduce.InputSplit>) factory .getDeserializer(orbConf.getClassByName(splitClass)); deserializer.open(splitBuffer); split = deserializer.deserialize(null); JobConf job = new JobConf(orbConf); JobContext jobContext = new JobContext(job, new JobID(getOrbConf().getJobNumber(), 0)); InputFormat<INPUT_KEY, INPUT_VALUE> inputFormat; inputFormat = (InputFormat<INPUT_KEY, INPUT_VALUE>) ReflectionUtils .newInstance(jobContext.getInputFormatClass(), orbConf); TaskAttemptContext tao = new TaskAttemptContext(job, new TaskAttemptID(new TaskID(jobContext.getJobID(), true, partitionID), 0)); recordReader = inputFormat.createRecordReader(split, tao); recordReader.initialize(split, tao); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } catch (IOException e) { throw new RuntimeException(e); } catch (InterruptedException e) { throw new RuntimeException(e); } }
From source file:org.goldenorb.io.InputSplitAllocator.java
License:Apache License
/** * This method gets the raw splits and calls another method to assign them. * /*ww w .ja v a2 s . c om*/ * @returns Map */ @SuppressWarnings({ "deprecation", "rawtypes", "unchecked" }) public Map<OrbPartitionMember, List<RawSplit>> assignInputSplits() { List<RawSplit> rawSplits = null; JobConf job = new JobConf(orbConf); LOG.debug(orbConf.getJobNumber().toString()); JobContext jobContext = new JobContext(job, new JobID(orbConf.getJobNumber(), 0)); org.apache.hadoop.mapreduce.InputFormat<?, ?> input; try { input = ReflectionUtils.newInstance(jobContext.getInputFormatClass(), orbConf); List<org.apache.hadoop.mapreduce.InputSplit> splits = input.getSplits(jobContext); rawSplits = new ArrayList<RawSplit>(splits.size()); DataOutputBuffer buffer = new DataOutputBuffer(); SerializationFactory factory = new SerializationFactory(orbConf); Serializer serializer = factory.getSerializer(splits.get(0).getClass()); serializer.open(buffer); for (int i = 0; i < splits.size(); i++) { buffer.reset(); serializer.serialize(splits.get(i)); RawSplit rawSplit = new RawSplit(); rawSplit.setClassName(splits.get(i).getClass().getName()); rawSplit.setDataLength(splits.get(i).getLength()); rawSplit.setBytes(buffer.getData(), 0, buffer.getLength()); rawSplit.setLocations(splits.get(i).getLocations()); rawSplits.add(rawSplit); } } catch (ClassNotFoundException e) { e.printStackTrace(); throw new RuntimeException(e); } catch (IOException e) { e.printStackTrace(); throw new RuntimeException(e); } catch (InterruptedException e) { e.printStackTrace(); throw new RuntimeException(e); } return assignInputSplits(rawSplits); }