Example usage for org.apache.hadoop.mapred JobConf getMapOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getMapOutputValueClass.

Prototype

public Class<?> getMapOutputValueClass()

Source Link

Document

Get the value class for the map output data.

Usage

From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java

License:Apache License

public static InMemorySortOperatorDescriptor getInMemorySorter(JobConf conf, IOperatorDescriptorRegistry spec) {
    InMemorySortOperatorDescriptor inMemorySortOp = null;
    RecordDescriptor recordDescriptor = getHadoopRecordDescriptor(conf.getMapOutputKeyClass().getName(),
            conf.getMapOutputValueClass().getName());
    Class<? extends RawComparator> rawComparatorClass = null;
    WritableComparator writableComparator = WritableComparator
            .get(conf.getMapOutputKeyClass().asSubclass(WritableComparable.class));
    WritableComparingBinaryComparatorFactory comparatorFactory = new WritableComparingBinaryComparatorFactory(
            writableComparator.getClass());
    inMemorySortOp = new InMemorySortOperatorDescriptor(spec, new int[] { 0 },
            new IBinaryComparatorFactory[] { comparatorFactory }, recordDescriptor);
    return inMemorySortOp;
}

From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java

License:Apache License

public static ExternalSortOperatorDescriptor getExternalSorter(JobConf conf, IOperatorDescriptorRegistry spec) {
    ExternalSortOperatorDescriptor externalSortOp = null;
    RecordDescriptor recordDescriptor = getHadoopRecordDescriptor(conf.getMapOutputKeyClass().getName(),
            conf.getMapOutputValueClass().getName());
    Class<? extends RawComparator> rawComparatorClass = null;
    WritableComparator writableComparator = WritableComparator
            .get(conf.getMapOutputKeyClass().asSubclass(WritableComparable.class));
    WritableComparingBinaryComparatorFactory comparatorFactory = new WritableComparingBinaryComparatorFactory(
            writableComparator.getClass());
    externalSortOp = new ExternalSortOperatorDescriptor(spec,
            conf.getInt(HYRACKS_EX_SORT_FRAME_LIMIT, DEFAULT_EX_SORT_FRAME_LIMIT), new int[] { 0 },
            new IBinaryComparatorFactory[] { comparatorFactory }, recordDescriptor);
    return externalSortOp;
}

From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java

License:Apache License

public static MToNPartitioningConnectorDescriptor getMtoNHashPartitioningConnector(JobConf conf,
        IConnectorDescriptorRegistry spec) {

    Class mapOutputKeyClass = conf.getMapOutputKeyClass();
    Class mapOutputValueClass = conf.getMapOutputValueClass();

    MToNPartitioningConnectorDescriptor connectorDescriptor = null;
    ITuplePartitionComputerFactory factory = null;
    conf.getMapOutputKeyClass();//w  w w  .ja  va  2  s . c  o  m
    if (conf.getPartitionerClass() != null
            && !conf.getPartitionerClass().getName().startsWith("org.apache.hadoop")) {
        Class<? extends Partitioner> partitioner = conf.getPartitionerClass();
        factory = new HadoopPartitionerTuplePartitionComputerFactory(partitioner,
                DatatypeHelper.createSerializerDeserializer(mapOutputKeyClass),
                DatatypeHelper.createSerializerDeserializer(mapOutputValueClass));
    } else {
        RecordDescriptor recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(mapOutputKeyClass,
                mapOutputValueClass);
        ISerializerDeserializer mapOutputKeySerializerDerserializer = DatatypeHelper
                .createSerializerDeserializer(mapOutputKeyClass);
        factory = new HadoopHashTuplePartitionComputerFactory(mapOutputKeySerializerDerserializer);
    }
    connectorDescriptor = new MToNPartitioningConnectorDescriptor(spec, factory);
    return connectorDescriptor;
}

From source file:org.apache.sysml.runtime.matrix.mapred.GMRMapper.java

License:Apache License

@Override
public void configure(JobConf job) {
    super.configure(job);

    mapperID = job.get(MRConfigurationNames.MR_TASK_ATTEMPT_ID);
    dimsUnknownFilePrefix = job.get("dims.unknown.file.prefix");

    _filterEmptyInputBlocks = allowsFilterEmptyInputBlocks();

    //assign the temporay vairables
    try {//from  w  ww .j a v  a 2 s . c  o m
        //   System.out.println(valueClass.getName());
        //   System.out.println(MatrixCell.class.getName());
        if (job.getMapOutputValueClass().equals(TaggedMatrixPackedCell.class))
            taggedValueBuffer = TaggedMatrixValue.createObject(MatrixPackedCell.class);
        else
            taggedValueBuffer = TaggedMatrixValue.createObject(valueClass);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    //decide whether it is a maponly job
    mapOnlyJob = (job.getNumReduceTasks() <= 0);
    if (!mapOnlyJob)
        return;

    //get the indexes of the final output matrices
    resultIndexes = MRJobConfiguration.getResultIndexes(job);
    resultDimsUnknown = MRJobConfiguration.getResultDimsUnknown(job);

    //initialize SystemML Counters (defined in MRJobConfiguration)
    resultsNonZeros = new long[resultIndexes.length];
    resultsMaxRowDims = new long[resultIndexes.length];
    resultsMaxColDims = new long[resultIndexes.length];

    tagMapping = new HashMap<>();
    for (int i = 0; i < resultIndexes.length; i++) {
        byte output = resultIndexes[i];
        ArrayList<Integer> vec = tagMapping.get(output);
        if (vec == null) {
            vec = new ArrayList<>();
            tagMapping.put(output, vec);
        }
        vec.add(i);
    }
    //for map only job, get the map output converters 
    collectFinalMultipleOutputs = MRJobConfiguration.getMultipleConvertedOutputs(job);
}

From source file:org.apache.tez.mapreduce.hadoop.MRHelpers.java

License:Apache License

/**
 * Pulls in specific keys from the base configuration, if they are not set at
 * the stage level. An explicit list of keys is copied over (not all), which
 * require translation to tez keys.//from w w w .j  a  v a 2 s .  c  o  m
 */
private static void setStageKeysFromBaseConf(Configuration conf, Configuration baseConf, String stage) {
    // Don't clobber explicit tez config.
    JobConf jobConf = null;
    if (conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS) == null) {
        // If this is set, but the comparator is not set, and their types differ -
        // the job will break.
        if (conf.get(MRJobConfig.MAP_OUTPUT_KEY_CLASS) == null) {
            // Pull this in from the baseConf
            // Create jobConf only if required.
            jobConf = new JobConf(baseConf);
            conf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, jobConf.getMapOutputKeyClass().getName());

            if (LOG.isDebugEnabled()) {
                LOG.debug("Setting " + MRJobConfig.MAP_OUTPUT_KEY_CLASS + " for stage: " + stage
                        + " based on job level configuration. Value: "
                        + conf.get(MRJobConfig.MAP_OUTPUT_KEY_CLASS));
            }
        }
    }

    if (conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS) == null) {
        if (conf.get(MRJobConfig.MAP_OUTPUT_VALUE_CLASS) == null) {
            if (jobConf == null) {
                // Create jobConf if not already created
                jobConf = new JobConf(baseConf);
            }
            conf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, jobConf.getMapOutputValueClass().getName());
            if (LOG.isDebugEnabled()) {
                LOG.debug("Setting " + MRJobConfig.MAP_OUTPUT_VALUE_CLASS + " for stage: " + stage
                        + " based on job level configuration. Value: "
                        + conf.get(MRJobConfig.MAP_OUTPUT_VALUE_CLASS));
            }
        }
    }
}

From source file:org.dkpro.bigdata.hadoop.DkproMapper.java

License:Apache License

@Override
public void configure(JobConf job) {
    super.configure(job);
    try {//from  w  w  w.j ava  2 s .c  o m
        // create an output writable of the appropriate type
        outValue = (CASWritable) job.getMapOutputValueClass().newInstance();
        docLanguage = job.get("dkpro.document.language");
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:org.dkpro.bigdata.hadoop.UIMAMapReduceBase.java

License:Open Source License

@Override
public void configure(JobConf job) {
    try {// w w w.  ja  v a  2 s  .  c  om
        this.job = job;
        this.inputName = job.get("mapred.input.dir");
        this.taskId = job.get("mapred.task.id");
        this.mapOutputValueClass = job.getMapOutputValueClass();
        this.outputValueClass = job.getOutputValueClass();
        this.samplingPropability = job.getInt("dkpro.map.samplingratio", 100);
        final EngineFactory engineFactory = (EngineFactory) Class
                .forName(job.get("dkpro.uima.factory", DkproHadoopDriver.class.getName())).newInstance();
        engineFactory.configure(job);

        final AnalysisEngineDescription engineDescription = getEngineDescription(engineFactory, job);

        // replace the $dir variable within the configuration.
        this.fs = FileSystem.get(job);
        this.localFS = FileSystem.getLocal(job);
        if (job.getBoolean("dkpro.output.onedirpertask", true)) {
            this.working_dir = new Path("uima_output_" + job.get("mapred.task.id"));
        } else {
            this.working_dir = new Path("uima_output");
        }
        final Path outputPath = FileOutputFormat.getOutputPath(job);
        this.results_dir = this.fs.startLocalOutput(outputPath, job.getLocalPath(this.working_dir.getName()));
        this.localFS.mkdirs(this.results_dir);
        final String[] resources = job.get("dkpro.resources", "").split(",");
        sLogger.info("Writing local data to: " + this.results_dir);
        this.resourceURIs = new TreeMap<String, URL>();
        for (final String resource : resources) {
            final URL r = job.getResource(resource);
            if (r != null && !resource.isEmpty()) {
                this.resourceURIs.put(resource, r);
            }

        }
        Map<String, String> variableValues = new HashMap<String, String>();
        variableValues.put("\\$dir", this.results_dir.toString());
        variableValues.put("\\$input", this.inputName);
        variableValues.put("\\$taskid", this.taskId);
        Path[] cacheFiles = DistributedCache.getLocalCacheFiles(job);
        if (cacheFiles != null) {
            for (Path cacheFile : cacheFiles) {
                variableValues.put("^\\$cache/" + cacheFile.getName(), cacheFile.toUri().getPath());
            }
        }
        for (final Entry<String, URL> resource : this.resourceURIs.entrySet()) {
            variableValues.put("\\$" + resource, resource.getValue().toString());
        }
        AnalysisEngineUtil.replaceVariables(engineDescription, variableValues);
        this.engine = createEngine(engineDescription);

    } catch (final Exception e) {
        sLogger.fatal("Error while configuring pipeline", e);
        e.printStackTrace();
        throw new RuntimeException(e);
    }

}

From source file:org.pentaho.hadoop.mapreduce.PentahoMapReduceBase.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/*from   w w w  .ja  v  a  2 s  .  co  m*/
public void configure(JobConf job) {
    super.configure(job);

    debug = "true".equalsIgnoreCase(job.get("debug")); //$NON-NLS-1$

    transMapXml = job.get("transformation-map-xml");
    transCombinerXml = job.get("transformation-combiner-xml");
    transReduceXml = job.get("transformation-reduce-xml");
    mapInputStepName = job.get("transformation-map-input-stepname");
    mapOutputStepName = job.get("transformation-map-output-stepname");
    combinerInputStepName = job.get("transformation-combiner-input-stepname");
    combinerOutputStepName = job.get("transformation-combiner-output-stepname");
    combineSingleThreaded = isCombinerSingleThreaded(job);
    reduceInputStepName = job.get("transformation-reduce-input-stepname");
    reduceOutputStepName = job.get("transformation-reduce-output-stepname");
    reduceSingleThreaded = isReducerSingleThreaded(job);
    String xmlVariableSpace = job.get("variableSpace");

    if (!Const.isEmpty(xmlVariableSpace)) {
        setDebugStatus("PentahoMapReduceBase. variableSpace was retrieved from the job.  The contents: ");

        //  deserialize from xml to variable space
        XStream xStream = new XStream();

        if (xStream != null) {
            setDebugStatus("PentahoMapReduceBase: Setting classes variableSpace property.: ");
            variableSpace = (VariableSpace) xStream.fromXML(xmlVariableSpace);

            for (String variableName : variableSpace.listVariables()) {
                if (variableName.startsWith(KETTLE_VARIABLE_PREFIX)) {
                    System.setProperty(variableName, variableSpace.getVariable(variableName));
                }
            }
        }
    } else {
        setDebugStatus(
                "PentahoMapReduceBase: The PDI Job's variable space was not found in the job configuration.");
        variableSpace = new Variables();
    }

    // Check for environment variables in the userDefined variables
    Iterator<Entry<String, String>> iter = job.iterator();
    while (iter.hasNext()) {
        Entry<String, String> entry = iter.next();
        if (entry.getKey().startsWith(ENVIRONMENT_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey().substring(ENVIRONMENT_VARIABLE_PREFIX.length()),
                    entry.getValue());
        } else if (entry.getKey().startsWith(KETTLE_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey(), entry.getValue());
        }
    }

    MRUtil.passInformationToTransformation(variableSpace, job);

    switch (mrOperation) {
    case Combine:
        outClassK = (Class<K>) job.getMapOutputKeyClass();
        outClassV = (Class<V>) job.getMapOutputValueClass();
        break;
    case Reduce:
        outClassK = (Class<K>) job.getOutputKeyClass();
        outClassV = (Class<V>) job.getOutputValueClass();
        break;
    default:
        throw new IllegalArgumentException("Unsupported MapReduce operation: " + mrOperation);
    }

    if (debug) {
        System.out.println("Job configuration>");
        System.out.println("Output key class: " + outClassK.getName());
        System.out.println("Output value class: " + outClassV.getName());
    }

    //  set the log level to what the level of the job is
    String stringLogLevel = job.get("logLevel");
    if (!Const.isEmpty(stringLogLevel)) {
        logLevel = LogLevel.valueOf(stringLogLevel);
        setDebugStatus("Log level set to " + stringLogLevel);
    } else {
        System.out.println(
                "Could not retrieve the log level from the job configuration.  logLevel will not be set.");
    }

    createTrans(job);
}

From source file:org.pentaho.hadoop.mapreduce.PentahoMapRunnable.java

License:Apache License

public void configure(JobConf job) {
    pluginWaitTimeout = TimeUnit.MINUTES.toMillis(5);

    debug = "true".equalsIgnoreCase(job.get("debug")); //$NON-NLS-1$

    transMapXml = job.get("transformation-map-xml");
    transReduceXml = job.get("transformation-reduce-xml");
    mapInputStepName = job.get("transformation-map-input-stepname");
    mapOutputStepName = job.get("transformation-map-output-stepname");
    reduceInputStepName = job.get("transformation-reduce-input-stepname");
    reduceOutputStepName = job.get("transformation-reduce-output-stepname");
    String xmlVariableSpace = job.get("variableSpace");

    outClassK = (Class<K2>) job.getMapOutputKeyClass();
    outClassV = (Class<V2>) job.getMapOutputValueClass();

    if (!Const.isEmpty(xmlVariableSpace)) {
        setDebugStatus("PentahoMapRunnable(): variableSpace was retrieved from the job.  The contents: ");
        setDebugStatus(xmlVariableSpace);

        //  deserialize from xml to variable space
        XStream xStream = new XStream();

        setDebugStatus("PentahoMapRunnable(): Setting classes variableSpace property.: ");
        variableSpace = (VariableSpace) xStream.fromXML(xmlVariableSpace);

        for (String variableName : variableSpace.listVariables()) {
            if (variableName.startsWith(KETTLE_VARIABLE_PREFIX)) {
                System.setProperty(variableName, variableSpace.getVariable(variableName));
            }/*from w w w .  jav  a  2s  .c  om*/
            if (KETTLE_PMR_PLUGIN_TIMEOUT.equals(variableName)) {
                try {
                    pluginWaitTimeout = Long.parseLong(variableSpace.getVariable(variableName));
                } catch (Exception e) {
                    System.out.println("Unable to parse plugin wait timeout, defaulting to 5 minutes");
                }
            }
        }
    } else {
        setDebugStatus("PentahoMapRunnable(): The PDI Job's variable space was not sent.");
        variableSpace = new Variables();
    }

    // Check for environment variables in the userDefined variables
    Iterator<Entry<String, String>> iter = job.iterator();
    while (iter.hasNext()) {
        Entry<String, String> entry = iter.next();
        if (entry.getKey().startsWith(ENVIRONMENT_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey().substring(ENVIRONMENT_VARIABLE_PREFIX.length()),
                    entry.getValue());
        } else if (entry.getKey().startsWith(KETTLE_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey(), entry.getValue());
        }
    }

    MRUtil.passInformationToTransformation(variableSpace, job);

    setDebugStatus("Job configuration");
    setDebugStatus("Output key class: " + outClassK.getName());
    setDebugStatus("Output value class: " + outClassV.getName());

    //  set the log level to what the level of the job is
    String stringLogLevel = job.get("logLevel");
    if (!Const.isEmpty(stringLogLevel)) {
        logLevel = LogLevel.valueOf(stringLogLevel);
        setDebugStatus("Log level set to " + stringLogLevel);
    } else {
        System.out.println(
                "Could not retrieve the log level from the job configuration.  logLevel will not be set.");
    }

    long deadline = 0;
    boolean first = true;
    while (true) {
        createTrans(job);

        if (first) {
            deadline = pluginWaitTimeout + System.currentTimeMillis();
            System.out.println(PentahoMapRunnable.class + ": Trans creation checking starting now "
                    + new Date().toString());
            first = false;
        }

        List<MissingTrans> missingTranses = new ArrayList<MissingTrans>();
        for (StepMeta stepMeta : trans.getTransMeta().getSteps()) {
            StepMetaInterface stepMetaInterface = stepMeta.getStepMetaInterface();
            if (stepMetaInterface instanceof MissingTrans) {
                MissingTrans missingTrans = (MissingTrans) stepMetaInterface;
                System.out.println(MissingTrans.class + "{stepName: " + missingTrans.getStepName()
                        + ", missingPluginId: " + missingTrans.getMissingPluginId() + "}");
                missingTranses.add(missingTrans);
            }
        }

        if (missingTranses.size() == 0) {
            System.out.println(
                    PentahoMapRunnable.class + ": Done waiting on plugins now " + new Date().toString());
            break;
        } else {
            if (System.currentTimeMillis() > deadline) {
                StringBuilder stringBuilder = new StringBuilder("Failed to initialize plugins: ");
                for (MissingTrans missingTrans : missingTranses) {
                    stringBuilder.append(missingTrans.getMissingPluginId());
                    stringBuilder.append(" on step ").append(missingTrans.getStepName());
                    stringBuilder.append(", ");
                }
                stringBuilder.setLength(stringBuilder.length() - 2);
                throw new RuntimeException(stringBuilder.toString());
            } else {
                try {
                    Thread.sleep(Math.min(100, deadline - System.currentTimeMillis()));
                } catch (InterruptedException e) {
                    throw new RuntimeException(e);
                }
            }
        }
    }
}

From source file:org.pentaho.hadoop.mapreduce.test.MapperAndReducerTest.java

License:Open Source License

@Test
public void testCombinerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./test-res/wordcount-mapper.ktr", "./test-res/wordcount-reducer.ktr",
            "./test-res/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransCombiner combiner = new GenericTransCombiner();

    combiner.configure(jobConf);/*w w w.ja  v  a  2  s . c om*/

    assertEquals(jobConf.getMapOutputKeyClass(), combiner.getOutClassK());
    assertEquals(jobConf.getMapOutputValueClass(), combiner.getOutClassV());
}