List of usage examples for org.apache.hadoop.mapred JobConf getUseNewMapper
public boolean getUseNewMapper()
From source file:org.apache.hyracks.dataflow.hadoop.HadoopReadOperatorDescriptor.java
License:Apache License
@SuppressWarnings("deprecation") @Override//w w w.j a v a2s. c o m public IOperatorNodePushable createPushRuntime(final IHyracksTaskContext ctx, final IRecordDescriptorProvider recordDescProvider, final int partition, int nPartitions) throws HyracksDataException { return new AbstractUnaryOutputSourceOperatorNodePushable() { @Override public void initialize() throws HyracksDataException { try { JobConf conf = DatatypeHelper.map2JobConf((HashMap) jobConfMap); Thread.currentThread().setContextClassLoader(this.getClass().getClassLoader()); conf.setClassLoader(this.getClass().getClassLoader()); RecordReader hadoopRecordReader; Object key; Object value; Object[] splits = inputSplitsProxy.toInputSplits(conf); Object inputSplit = splits[partition]; if (conf.getUseNewMapper()) { JobContext context = new ContextFactory().createJobContext(conf); org.apache.hadoop.mapreduce.InputFormat inputFormat = (org.apache.hadoop.mapreduce.InputFormat) ReflectionUtils .newInstance(context.getInputFormatClass(), conf); TaskAttemptContext taskAttemptContext = new ContextFactory().createContext(jobConf, null); hadoopRecordReader = (RecordReader) inputFormat.createRecordReader( (org.apache.hadoop.mapreduce.InputSplit) inputSplit, taskAttemptContext); } else { Class inputFormatClass = conf.getInputFormat().getClass(); InputFormat inputFormat = (InputFormat) ReflectionUtils.newInstance(inputFormatClass, conf); hadoopRecordReader = (RecordReader) inputFormat.getRecordReader( (org.apache.hadoop.mapred.InputSplit) inputSplit, conf, createReporter()); } Class inputKeyClass; Class inputValueClass; if (hadoopRecordReader instanceof SequenceFileRecordReader) { inputKeyClass = ((SequenceFileRecordReader) hadoopRecordReader).getKeyClass(); inputValueClass = ((SequenceFileRecordReader) hadoopRecordReader).getValueClass(); } else { inputKeyClass = hadoopRecordReader.createKey().getClass(); inputValueClass = hadoopRecordReader.createValue().getClass(); } key = hadoopRecordReader.createKey(); value = hadoopRecordReader.createValue(); FrameTupleAppender appender = new FrameTupleAppender(new VSizeFrame(ctx)); RecordDescriptor outputRecordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor( (Class<? extends Writable>) hadoopRecordReader.createKey().getClass(), (Class<? extends Writable>) hadoopRecordReader.createValue().getClass()); int nFields = outputRecordDescriptor.getFieldCount(); ArrayTupleBuilder tb = new ArrayTupleBuilder(nFields); writer.open(); try { while (hadoopRecordReader.next(key, value)) { tb.reset(); switch (nFields) { case 2: tb.addField(outputRecordDescriptor.getFields()[0], key); case 1: tb.addField(outputRecordDescriptor.getFields()[1], value); } FrameUtils.appendToWriter(writer, appender, tb.getFieldEndOffsets(), tb.getByteArray(), 0, tb.getSize()); } appender.flush(writer, true); } catch (Exception e) { writer.fail(); throw new HyracksDataException(e); } finally { writer.close(); } hadoopRecordReader.close(); } catch (InstantiationException e) { throw new HyracksDataException(e); } catch (IllegalAccessException e) { throw new HyracksDataException(e); } catch (ClassNotFoundException e) { throw new HyracksDataException(e); } catch (InterruptedException e) { throw new HyracksDataException(e); } catch (IOException e) { throw new HyracksDataException(e); } } }; }
From source file:org.apache.ignite.internal.processors.hadoop.GridHadoopUtils.java
License:Apache License
/** * Creates JobInfo from hadoop configuration. * * @param cfg Hadoop configuration.// w ww . ja va2s.c om * @return Job info. * @throws IgniteCheckedException If failed. */ public static GridHadoopDefaultJobInfo createJobInfo(Configuration cfg) throws IgniteCheckedException { JobConf jobConf = new JobConf(cfg); boolean hasCombiner = jobConf.get("mapred.combiner.class") != null || jobConf.get(MRJobConfig.COMBINE_CLASS_ATTR) != null; int numReduces = jobConf.getNumReduceTasks(); jobConf.setBooleanIfUnset("mapred.mapper.new-api", jobConf.get(OLD_MAP_CLASS_ATTR) == null); if (jobConf.getUseNewMapper()) { String mode = "new map API"; ensureNotSet(jobConf, "mapred.input.format.class", mode); ensureNotSet(jobConf, OLD_MAP_CLASS_ATTR, mode); if (numReduces != 0) ensureNotSet(jobConf, "mapred.partitioner.class", mode); else ensureNotSet(jobConf, "mapred.output.format.class", mode); } else { String mode = "map compatibility"; ensureNotSet(jobConf, MRJobConfig.INPUT_FORMAT_CLASS_ATTR, mode); ensureNotSet(jobConf, MRJobConfig.MAP_CLASS_ATTR, mode); if (numReduces != 0) ensureNotSet(jobConf, MRJobConfig.PARTITIONER_CLASS_ATTR, mode); else ensureNotSet(jobConf, MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, mode); } if (numReduces != 0) { jobConf.setBooleanIfUnset("mapred.reducer.new-api", jobConf.get(OLD_REDUCE_CLASS_ATTR) == null); if (jobConf.getUseNewReducer()) { String mode = "new reduce API"; ensureNotSet(jobConf, "mapred.output.format.class", mode); ensureNotSet(jobConf, OLD_REDUCE_CLASS_ATTR, mode); } else { String mode = "reduce compatibility"; ensureNotSet(jobConf, MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, mode); ensureNotSet(jobConf, MRJobConfig.REDUCE_CLASS_ATTR, mode); } } Map<String, String> props = new HashMap<>(); for (Map.Entry<String, String> entry : jobConf) props.put(entry.getKey(), entry.getValue()); return new GridHadoopDefaultJobInfo(jobConf.getJobName(), jobConf.getUser(), hasCombiner, numReduces, props); }
From source file:org.apache.ignite.internal.processors.hadoop.HadoopUtils.java
License:Apache License
/** * Creates JobInfo from hadoop configuration. * * @param cfg Hadoop configuration./*from w ww . j a va2 s . c o m*/ * @return Job info. * @throws IgniteCheckedException If failed. */ public static HadoopDefaultJobInfo createJobInfo(Configuration cfg) throws IgniteCheckedException { JobConf jobConf = new JobConf(cfg); boolean hasCombiner = jobConf.get("mapred.combiner.class") != null || jobConf.get(MRJobConfig.COMBINE_CLASS_ATTR) != null; int numReduces = jobConf.getNumReduceTasks(); jobConf.setBooleanIfUnset("mapred.mapper.new-api", jobConf.get(OLD_MAP_CLASS_ATTR) == null); if (jobConf.getUseNewMapper()) { String mode = "new map API"; ensureNotSet(jobConf, "mapred.input.format.class", mode); ensureNotSet(jobConf, OLD_MAP_CLASS_ATTR, mode); if (numReduces != 0) ensureNotSet(jobConf, "mapred.partitioner.class", mode); else ensureNotSet(jobConf, "mapred.output.format.class", mode); } else { String mode = "map compatibility"; ensureNotSet(jobConf, MRJobConfig.INPUT_FORMAT_CLASS_ATTR, mode); ensureNotSet(jobConf, MRJobConfig.MAP_CLASS_ATTR, mode); if (numReduces != 0) ensureNotSet(jobConf, MRJobConfig.PARTITIONER_CLASS_ATTR, mode); else ensureNotSet(jobConf, MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, mode); } if (numReduces != 0) { jobConf.setBooleanIfUnset("mapred.reducer.new-api", jobConf.get(OLD_REDUCE_CLASS_ATTR) == null); if (jobConf.getUseNewReducer()) { String mode = "new reduce API"; ensureNotSet(jobConf, "mapred.output.format.class", mode); ensureNotSet(jobConf, OLD_REDUCE_CLASS_ATTR, mode); } else { String mode = "reduce compatibility"; ensureNotSet(jobConf, MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, mode); ensureNotSet(jobConf, MRJobConfig.REDUCE_CLASS_ATTR, mode); } } Map<String, String> props = new HashMap<>(); for (Map.Entry<String, String> entry : jobConf) props.put(entry.getKey(), entry.getValue()); return new HadoopDefaultJobInfo(jobConf.getJobName(), jobConf.getUser(), hasCombiner, numReduces, props); }
From source file:org.apache.ignite.internal.processors.hadoop.impl.v2.HadoopV2TaskContext.java
License:Apache License
/** * @param taskInfo Task info./* w ww . j a va2 s . c o m*/ * @param job Job. * @param jobId Job ID. * @param locNodeId Local node ID. * @param jobConfDataInput DataInput for read JobConf. */ public HadoopV2TaskContext(HadoopTaskInfo taskInfo, HadoopJobEx job, HadoopJobId jobId, @Nullable UUID locNodeId, DataInput jobConfDataInput) throws IgniteCheckedException { super(taskInfo, job); this.locNodeId = locNodeId; // Before create JobConf instance we should set new context class loader. ClassLoader oldLdr = HadoopCommonUtils.setContextClassLoader(getClass().getClassLoader()); try { JobConf jobConf = new JobConf(); try { jobConf.readFields(jobConfDataInput); } catch (IOException e) { throw new IgniteCheckedException(e); } // For map-reduce jobs prefer local writes. jobConf.setBooleanIfUnset(PARAM_IGFS_PREFER_LOCAL_WRITES, true); initializePartiallyRawComparator(jobConf); jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId())); useNewMapper = jobConf.getUseNewMapper(); useNewReducer = jobConf.getUseNewReducer(); useNewCombiner = jobConf.getCombinerClass() == null; } finally { HadoopCommonUtils.restoreContextClassLoader(oldLdr); } }
From source file:org.apache.ignite.internal.processors.hadoop.v2.GridHadoopV2TaskContext.java
License:Apache License
/** * @param taskInfo Task info./*from ww w .j a v a 2s . c o m*/ * @param job Job. * @param jobId Job ID. * @param locNodeId Local node ID. * @param jobConfDataInput DataInput for read JobConf. */ public GridHadoopV2TaskContext(GridHadoopTaskInfo taskInfo, GridHadoopJob job, GridHadoopJobId jobId, @Nullable UUID locNodeId, DataInput jobConfDataInput) throws IgniteCheckedException { super(taskInfo, job); this.locNodeId = locNodeId; // Before create JobConf instance we should set new context class loader. Thread.currentThread().setContextClassLoader(getClass().getClassLoader()); try { JobConf jobConf = new JobConf(); try { jobConf.readFields(jobConfDataInput); } catch (IOException e) { throw new IgniteCheckedException(e); } // For map-reduce jobs prefer local writes. jobConf.setBooleanIfUnset(PARAM_IGFS_PREFER_LOCAL_WRITES, true); jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId())); useNewMapper = jobConf.getUseNewMapper(); useNewReducer = jobConf.getUseNewReducer(); useNewCombiner = jobConf.getCombinerClass() == null; } finally { Thread.currentThread().setContextClassLoader(null); } }
From source file:org.apache.ignite.internal.processors.hadoop.v2.HadoopV2TaskContext.java
License:Apache License
/** * @param taskInfo Task info./* w ww .java 2 s.co m*/ * @param job Job. * @param jobId Job ID. * @param locNodeId Local node ID. * @param jobConfDataInput DataInput for read JobConf. */ public HadoopV2TaskContext(HadoopTaskInfo taskInfo, HadoopJob job, HadoopJobId jobId, @Nullable UUID locNodeId, DataInput jobConfDataInput) throws IgniteCheckedException { super(taskInfo, job); this.locNodeId = locNodeId; // Before create JobConf instance we should set new context class loader. Thread.currentThread().setContextClassLoader(getClass().getClassLoader()); try { JobConf jobConf = new JobConf(); try { jobConf.readFields(jobConfDataInput); } catch (IOException e) { throw new IgniteCheckedException(e); } // For map-reduce jobs prefer local writes. jobConf.setBooleanIfUnset(PARAM_IGFS_PREFER_LOCAL_WRITES, true); jobCtx = new JobContextImpl(jobConf, new JobID(jobId.globalId().toString(), jobId.localId())); useNewMapper = jobConf.getUseNewMapper(); useNewReducer = jobConf.getUseNewReducer(); useNewCombiner = jobConf.getCombinerClass() == null; } finally { Thread.currentThread().setContextClassLoader(null); } }
From source file:org.apache.tez.mapreduce.common.MRInputSplitDistributor.java
License:Apache License
@Override public List<Event> initialize() throws IOException { Stopwatch sw = null;//from w w w. j av a 2 s .com if (LOG.isDebugEnabled()) { sw = new Stopwatch().start(); } MRInputUserPayloadProto userPayloadProto = MRInputHelpers .parseMRInputPayload(getContext().getInputUserPayload()); if (LOG.isDebugEnabled()) { sw.stop(); LOG.debug("Time to parse MRInput payload into prot: " + sw.elapsedMillis()); } Configuration conf = TezUtils.createConfFromByteString(userPayloadProto.getConfigurationBytes()); JobConf jobConf = new JobConf(conf); boolean useNewApi = jobConf.getUseNewMapper(); sendSerializedEvents = conf.getBoolean(MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD, MRJobConfig.MR_TEZ_INPUT_INITIALIZER_SERIALIZE_EVENT_PAYLOAD_DEFAULT); LOG.info("Emitting serialized splits: " + sendSerializedEvents); this.splitsProto = userPayloadProto.getSplits(); MRInputUserPayloadProto.Builder updatedPayloadBuilder = MRInputUserPayloadProto .newBuilder(userPayloadProto); updatedPayloadBuilder.clearSplits(); List<Event> events = Lists.newArrayListWithCapacity(this.splitsProto.getSplitsCount() + 1); InputUpdatePayloadEvent updatePayloadEvent = InputUpdatePayloadEvent .create(updatedPayloadBuilder.build().toByteString().asReadOnlyByteBuffer()); events.add(updatePayloadEvent); int count = 0; for (MRSplitProto mrSplit : this.splitsProto.getSplitsList()) { InputDataInformationEvent diEvent; if (sendSerializedEvents) { // Unnecessary array copy, can be avoided by using ByteBuffer instead of // a raw array. diEvent = InputDataInformationEvent.createWithSerializedPayload(count++, mrSplit.toByteString().asReadOnlyByteBuffer()); } else { if (useNewApi) { org.apache.hadoop.mapreduce.InputSplit newInputSplit = MRInputUtils .getNewSplitDetailsFromEvent(mrSplit, conf); diEvent = InputDataInformationEvent.createWithObjectPayload(count++, newInputSplit); } else { org.apache.hadoop.mapred.InputSplit oldInputSplit = MRInputUtils .getOldSplitDetailsFromEvent(mrSplit, conf); diEvent = InputDataInformationEvent.createWithObjectPayload(count++, oldInputSplit); } } events.add(diEvent); } return events; }
From source file:org.apache.tez.mapreduce.hadoop.MRInputHelpers.java
License:Apache License
/** * Generates Input splits and stores them in a {@link org.apache.tez.mapreduce.protos.MRRuntimeProtos.MRSplitsProto} instance. * * Returns an instance of {@link InputSplitInfoMem} * * With grouping enabled, the eventual configuration used by the tasks, will have * the user-specified InputFormat replaced by either {@link org.apache.hadoop.mapred.split.TezGroupedSplitsInputFormat} * or {@link org.apache.hadoop.mapreduce.split.TezGroupedSplitsInputFormat} * * @param conf/*from w w w.j av a 2s . co m*/ * an instance of Configuration which is used to determine whether * the mapred of mapreduce API is being used. This Configuration * instance should also contain adequate information to be able to * generate splits - like the InputFormat being used and related * configuration. * @param groupSplits whether to group the splits or not * @param targetTasks the number of target tasks if grouping is enabled. Specify as 0 otherwise. * @return an instance of {@link InputSplitInfoMem} which supports a subset of * the APIs defined on {@link InputSplitInfo} * @throws IOException * @throws ClassNotFoundException * @throws InterruptedException */ @InterfaceStability.Unstable public static InputSplitInfoMem generateInputSplitsToMem(Configuration conf, boolean groupSplits, int targetTasks) throws IOException, ClassNotFoundException, InterruptedException { InputSplitInfoMem splitInfoMem = null; JobConf jobConf = new JobConf(conf); if (jobConf.getUseNewMapper()) { LOG.info("Generating mapreduce api input splits"); Job job = Job.getInstance(conf); org.apache.hadoop.mapreduce.InputSplit[] splits = generateNewSplits(job, groupSplits, targetTasks); splitInfoMem = new InputSplitInfoMem(splits, createTaskLocationHintsFromSplits(splits), splits.length, job.getCredentials(), job.getConfiguration()); } else { LOG.info("Generating mapred api input splits"); org.apache.hadoop.mapred.InputSplit[] splits = generateOldSplits(jobConf, groupSplits, targetTasks); splitInfoMem = new InputSplitInfoMem(splits, createTaskLocationHintsFromSplits(splits), splits.length, jobConf.getCredentials(), jobConf); } LOG.info("NumSplits: " + splitInfoMem.getNumTasks() + ", SerializedSize: " + splitInfoMem.getSplitsProto().getSerializedSize()); return splitInfoMem; }
From source file:org.apache.tez.mapreduce.hadoop.MRInputHelpers.java
License:Apache License
/** * Helper api to generate splits/*w w w. ja v a 2 s . c o m*/ * @param conf Configuration with all necessary information set to generate * splits. The following are required at a minimum: * * - mapred.mapper.new-api: determine whether mapred.InputFormat or * mapreduce.InputFormat is to be used * - mapred.input.format.class or mapreduce.job.inputformat.class: * determines the InputFormat class to be used * * In addition to this, all the configs needed by the InputFormat class also * have to be set. For example, FileInputFormat needs the input directory * paths to be set in the config. * * @param inputSplitsDir Directory in which the splits file and meta info file * will be generated. job.split and job.splitmetainfo files in this directory * will be overwritten. Should be a fully-qualified path. * * @return InputSplitInfo containing the split files' information and the * number of splits generated to be used to determining parallelism of * the map stage. * * @throws IOException * @throws InterruptedException * @throws ClassNotFoundException */ private static InputSplitInfoDisk generateInputSplits(Configuration conf, Path inputSplitsDir) throws IOException, InterruptedException, ClassNotFoundException { Job job = Job.getInstance(conf); JobConf jobConf = new JobConf(conf); conf.setBoolean(MRJobConfig.MR_TEZ_SPLITS_VIA_EVENTS, false); if (jobConf.getUseNewMapper()) { LOG.info("Generating new input splits" + ", splitsDir=" + inputSplitsDir.toString()); return writeNewSplits(job, inputSplitsDir); } else { LOG.info("Generating old input splits" + ", splitsDir=" + inputSplitsDir.toString()); return writeOldSplits(jobConf, inputSplitsDir); } }
From source file:org.gridgain.grid.kernal.processors.hadoop.GridHadoopUtils.java
License:Open Source License
/** * Creates JobInfo from hadoop configuration. * * @param cfg Hadoop configuration.//from ww w .j a va 2s . c om * @return Job info. * @throws GridException If failed. */ public static GridHadoopDefaultJobInfo createJobInfo(Configuration cfg) throws GridException { JobConf jobConf = new JobConf(cfg); boolean hasCombiner = jobConf.get("mapred.combiner.class") != null || jobConf.get(MRJobConfig.COMBINE_CLASS_ATTR) != null; int numReduces = jobConf.getNumReduceTasks(); jobConf.setBooleanIfUnset("mapred.mapper.new-api", jobConf.get(OLD_MAP_CLASS_ATTR) == null); if (jobConf.getUseNewMapper()) { String mode = "new map API"; ensureNotSet(jobConf, "mapred.input.format.class", mode); ensureNotSet(jobConf, OLD_MAP_CLASS_ATTR, mode); if (numReduces != 0) ensureNotSet(jobConf, "mapred.partitioner.class", mode); else ensureNotSet(jobConf, "mapred.output.format.class", mode); } else { String mode = "map compatibility"; ensureNotSet(jobConf, MRJobConfig.INPUT_FORMAT_CLASS_ATTR, mode); ensureNotSet(jobConf, MRJobConfig.MAP_CLASS_ATTR, mode); if (numReduces != 0) ensureNotSet(jobConf, MRJobConfig.PARTITIONER_CLASS_ATTR, mode); else ensureNotSet(jobConf, MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, mode); } if (numReduces != 0) { jobConf.setBooleanIfUnset("mapred.reducer.new-api", jobConf.get(OLD_REDUCE_CLASS_ATTR) == null); if (jobConf.getUseNewReducer()) { String mode = "new reduce API"; ensureNotSet(jobConf, "mapred.output.format.class", mode); ensureNotSet(jobConf, OLD_REDUCE_CLASS_ATTR, mode); } else { String mode = "reduce compatibility"; ensureNotSet(jobConf, MRJobConfig.OUTPUT_FORMAT_CLASS_ATTR, mode); ensureNotSet(jobConf, MRJobConfig.REDUCE_CLASS_ATTR, mode); } } Map<String, String> props = new HashMap<>(); for (Map.Entry<String, String> entry : jobConf) props.put(entry.getKey(), entry.getValue()); return new GridHadoopDefaultJobInfo(jobConf.getJobName(), jobConf.getUser(), hasCombiner, numReduces, props); }