Example usage for org.apache.hadoop.mapred JobConf getMapOutputValueClass

List of usage examples for org.apache.hadoop.mapred JobConf getMapOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getMapOutputValueClass.

Prototype

public Class<?> getMapOutputValueClass() 

Source Link

Document

Get the value class for the map output data.

Usage

From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java

License:Apache License

public static InMemorySortOperatorDescriptor getInMemorySorter(JobConf conf, IOperatorDescriptorRegistry spec) {
    InMemorySortOperatorDescriptor inMemorySortOp = null;
    RecordDescriptor recordDescriptor = getHadoopRecordDescriptor(conf.getMapOutputKeyClass().getName(),
            conf.getMapOutputValueClass().getName());
    Class<? extends RawComparator> rawComparatorClass = null;
    WritableComparator writableComparator = WritableComparator
            .get(conf.getMapOutputKeyClass().asSubclass(WritableComparable.class));
    WritableComparingBinaryComparatorFactory comparatorFactory = new WritableComparingBinaryComparatorFactory(
            writableComparator.getClass());
    inMemorySortOp = new InMemorySortOperatorDescriptor(spec, new int[] { 0 },
            new IBinaryComparatorFactory[] { comparatorFactory }, recordDescriptor);
    return inMemorySortOp;
}

From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java

License:Apache License

public static ExternalSortOperatorDescriptor getExternalSorter(JobConf conf, IOperatorDescriptorRegistry spec) {
    ExternalSortOperatorDescriptor externalSortOp = null;
    RecordDescriptor recordDescriptor = getHadoopRecordDescriptor(conf.getMapOutputKeyClass().getName(),
            conf.getMapOutputValueClass().getName());
    Class<? extends RawComparator> rawComparatorClass = null;
    WritableComparator writableComparator = WritableComparator
            .get(conf.getMapOutputKeyClass().asSubclass(WritableComparable.class));
    WritableComparingBinaryComparatorFactory comparatorFactory = new WritableComparingBinaryComparatorFactory(
            writableComparator.getClass());
    externalSortOp = new ExternalSortOperatorDescriptor(spec,
            conf.getInt(HYRACKS_EX_SORT_FRAME_LIMIT, DEFAULT_EX_SORT_FRAME_LIMIT), new int[] { 0 },
            new IBinaryComparatorFactory[] { comparatorFactory }, recordDescriptor);
    return externalSortOp;
}

From source file:edu.uci.ics.hyracks.hadoop.compat.util.HadoopAdapter.java

License:Apache License

public static MToNPartitioningConnectorDescriptor getMtoNHashPartitioningConnector(JobConf conf,
        IConnectorDescriptorRegistry spec) {

    Class mapOutputKeyClass = conf.getMapOutputKeyClass();
    Class mapOutputValueClass = conf.getMapOutputValueClass();

    MToNPartitioningConnectorDescriptor connectorDescriptor = null;
    ITuplePartitionComputerFactory factory = null;
    conf.getMapOutputKeyClass();//w  w w  .ja  va  2  s . c  o  m
    if (conf.getPartitionerClass() != null
            && !conf.getPartitionerClass().getName().startsWith("org.apache.hadoop")) {
        Class<? extends Partitioner> partitioner = conf.getPartitionerClass();
        factory = new HadoopPartitionerTuplePartitionComputerFactory(partitioner,
                DatatypeHelper.createSerializerDeserializer(mapOutputKeyClass),
                DatatypeHelper.createSerializerDeserializer(mapOutputValueClass));
    } else {
        RecordDescriptor recordDescriptor = DatatypeHelper.createKeyValueRecordDescriptor(mapOutputKeyClass,
                mapOutputValueClass);
        ISerializerDeserializer mapOutputKeySerializerDerserializer = DatatypeHelper
                .createSerializerDeserializer(mapOutputKeyClass);
        factory = new HadoopHashTuplePartitionComputerFactory(mapOutputKeySerializerDerserializer);
    }
    connectorDescriptor = new MToNPartitioningConnectorDescriptor(spec, factory);
    return connectorDescriptor;
}

From source file:org.apache.sysml.runtime.matrix.mapred.GMRMapper.java

License:Apache License

@Override
public void configure(JobConf job) {
    super.configure(job);

    mapperID = job.get(MRConfigurationNames.MR_TASK_ATTEMPT_ID);
    dimsUnknownFilePrefix = job.get("dims.unknown.file.prefix");

    _filterEmptyInputBlocks = allowsFilterEmptyInputBlocks();

    //assign the temporay vairables
    try {//from  w  ww .j a v  a 2 s . c  o m
        //   System.out.println(valueClass.getName());
        //   System.out.println(MatrixCell.class.getName());
        if (job.getMapOutputValueClass().equals(TaggedMatrixPackedCell.class))
            taggedValueBuffer = TaggedMatrixValue.createObject(MatrixPackedCell.class);
        else
            taggedValueBuffer = TaggedMatrixValue.createObject(valueClass);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }

    //decide whether it is a maponly job
    mapOnlyJob = (job.getNumReduceTasks() <= 0);
    if (!mapOnlyJob)
        return;

    //get the indexes of the final output matrices
    resultIndexes = MRJobConfiguration.getResultIndexes(job);
    resultDimsUnknown = MRJobConfiguration.getResultDimsUnknown(job);

    //initialize SystemML Counters (defined in MRJobConfiguration)
    resultsNonZeros = new long[resultIndexes.length];
    resultsMaxRowDims = new long[resultIndexes.length];
    resultsMaxColDims = new long[resultIndexes.length];

    tagMapping = new HashMap<>();
    for (int i = 0; i < resultIndexes.length; i++) {
        byte output = resultIndexes[i];
        ArrayList<Integer> vec = tagMapping.get(output);
        if (vec == null) {
            vec = new ArrayList<>();
            tagMapping.put(output, vec);
        }
        vec.add(i);
    }
    //for map only job, get the map output converters 
    collectFinalMultipleOutputs = MRJobConfiguration.getMultipleConvertedOutputs(job);
}

From source file:org.apache.tez.mapreduce.hadoop.MRHelpers.java

License:Apache License

/**
 * Pulls in specific keys from the base configuration, if they are not set at
 * the stage level. An explicit list of keys is copied over (not all), which
 * require translation to tez keys.//from w w w .j  a  v a 2 s .  c  o  m
 */
private static void setStageKeysFromBaseConf(Configuration conf, Configuration baseConf, String stage) {
    // Don't clobber explicit tez config.
    JobConf jobConf = null;
    if (conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS) == null) {
        // If this is set, but the comparator is not set, and their types differ -
        // the job will break.
        if (conf.get(MRJobConfig.MAP_OUTPUT_KEY_CLASS) == null) {
            // Pull this in from the baseConf
            // Create jobConf only if required.
            jobConf = new JobConf(baseConf);
            conf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, jobConf.getMapOutputKeyClass().getName());

            if (LOG.isDebugEnabled()) {
                LOG.debug("Setting " + MRJobConfig.MAP_OUTPUT_KEY_CLASS + " for stage: " + stage
                        + " based on job level configuration. Value: "
                        + conf.get(MRJobConfig.MAP_OUTPUT_KEY_CLASS));
            }
        }
    }

    if (conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS) == null) {
        if (conf.get(MRJobConfig.MAP_OUTPUT_VALUE_CLASS) == null) {
            if (jobConf == null) {
                // Create jobConf if not already created
                jobConf = new JobConf(baseConf);
            }
            conf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, jobConf.getMapOutputValueClass().getName());
            if (LOG.isDebugEnabled()) {
                LOG.debug("Setting " + MRJobConfig.MAP_OUTPUT_VALUE_CLASS + " for stage: " + stage
                        + " based on job level configuration. Value: "
                        + conf.get(MRJobConfig.MAP_OUTPUT_VALUE_CLASS));
            }
        }
    }
}

From source file:org.dkpro.bigdata.hadoop.DkproMapper.java

License:Apache License

@Override
public void configure(JobConf job) {
    super.configure(job);
    try {//from  w  w  w.j ava  2 s .c  o m
        // create an output writable of the appropriate type
        outValue = (CASWritable) job.getMapOutputValueClass().newInstance();
        docLanguage = job.get("dkpro.document.language");
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
}

From source file:org.dkpro.bigdata.hadoop.UIMAMapReduceBase.java

License:Open Source License

@Override
public void configure(JobConf job) {
    try {// w w w.  ja  v a  2 s  .  c  om
        this.job = job;
        this.inputName = job.get("mapred.input.dir");
        this.taskId = job.get("mapred.task.id");
        this.mapOutputValueClass = job.getMapOutputValueClass();
        this.outputValueClass = job.getOutputValueClass();
        this.samplingPropability = job.getInt("dkpro.map.samplingratio", 100);
        final EngineFactory engineFactory = (EngineFactory) Class
                .forName(job.get("dkpro.uima.factory", DkproHadoopDriver.class.getName())).newInstance();
        engineFactory.configure(job);

        final AnalysisEngineDescription engineDescription = getEngineDescription(engineFactory, job);

        // replace the $dir variable within the configuration.
        this.fs = FileSystem.get(job);
        this.localFS = FileSystem.getLocal(job);
        if (job.getBoolean("dkpro.output.onedirpertask", true)) {
            this.working_dir = new Path("uima_output_" + job.get("mapred.task.id"));
        } else {
            this.working_dir = new Path("uima_output");
        }
        final Path outputPath = FileOutputFormat.getOutputPath(job);
        this.results_dir = this.fs.startLocalOutput(outputPath, job.getLocalPath(this.working_dir.getName()));
        this.localFS.mkdirs(this.results_dir);
        final String[] resources = job.get("dkpro.resources", "").split(",");
        sLogger.info("Writing local data to: " + this.results_dir);
        this.resourceURIs = new TreeMap<String, URL>();
        for (final String resource : resources) {
            final URL r = job.getResource(resource);
            if (r != null && !resource.isEmpty()) {
                this.resourceURIs.put(resource, r);
            }

        }
        Map<String, String> variableValues = new HashMap<String, String>();
        variableValues.put("\\$dir", this.results_dir.toString());
        variableValues.put("\\$input", this.inputName);
        variableValues.put("\\$taskid", this.taskId);
        Path[] cacheFiles = DistributedCache.getLocalCacheFiles(job);
        if (cacheFiles != null) {
            for (Path cacheFile : cacheFiles) {
                variableValues.put("^\\$cache/" + cacheFile.getName(), cacheFile.toUri().getPath());
            }
        }
        for (final Entry<String, URL> resource : this.resourceURIs.entrySet()) {
            variableValues.put("\\$" + resource, resource.getValue().toString());
        }
        AnalysisEngineUtil.replaceVariables(engineDescription, variableValues);
        this.engine = createEngine(engineDescription);

    } catch (final Exception e) {
        sLogger.fatal("Error while configuring pipeline", e);
        e.printStackTrace();
        throw new RuntimeException(e);
    }

}

From source file:org.pentaho.hadoop.mapreduce.PentahoMapReduceBase.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/*from   w w w  .ja  v  a  2 s  .  co  m*/
public void configure(JobConf job) {
    super.configure(job);

    debug = "true".equalsIgnoreCase(job.get("debug")); //$NON-NLS-1$

    transMapXml = job.get("transformation-map-xml");
    transCombinerXml = job.get("transformation-combiner-xml");
    transReduceXml = job.get("transformation-reduce-xml");
    mapInputStepName = job.get("transformation-map-input-stepname");
    mapOutputStepName = job.get("transformation-map-output-stepname");
    combinerInputStepName = job.get("transformation-combiner-input-stepname");
    combinerOutputStepName = job.get("transformation-combiner-output-stepname");
    combineSingleThreaded = isCombinerSingleThreaded(job);
    reduceInputStepName = job.get("transformation-reduce-input-stepname");
    reduceOutputStepName = job.get("transformation-reduce-output-stepname");
    reduceSingleThreaded = isReducerSingleThreaded(job);
    String xmlVariableSpace = job.get("variableSpace");

    if (!Const.isEmpty(xmlVariableSpace)) {
        setDebugStatus("PentahoMapReduceBase. variableSpace was retrieved from the job.  The contents: ");

        //  deserialize from xml to variable space
        XStream xStream = new XStream();

        if (xStream != null) {
            setDebugStatus("PentahoMapReduceBase: Setting classes variableSpace property.: ");
            variableSpace = (VariableSpace) xStream.fromXML(xmlVariableSpace);

            for (String variableName : variableSpace.listVariables()) {
                if (variableName.startsWith(KETTLE_VARIABLE_PREFIX)) {
                    System.setProperty(variableName, variableSpace.getVariable(variableName));
                }
            }
        }
    } else {
        setDebugStatus(
                "PentahoMapReduceBase: The PDI Job's variable space was not found in the job configuration.");
        variableSpace = new Variables();
    }

    // Check for environment variables in the userDefined variables
    Iterator<Entry<String, String>> iter = job.iterator();
    while (iter.hasNext()) {
        Entry<String, String> entry = iter.next();
        if (entry.getKey().startsWith(ENVIRONMENT_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey().substring(ENVIRONMENT_VARIABLE_PREFIX.length()),
                    entry.getValue());
        } else if (entry.getKey().startsWith(KETTLE_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey(), entry.getValue());
        }
    }

    MRUtil.passInformationToTransformation(variableSpace, job);

    switch (mrOperation) {
    case Combine:
        outClassK = (Class<K>) job.getMapOutputKeyClass();
        outClassV = (Class<V>) job.getMapOutputValueClass();
        break;
    case Reduce:
        outClassK = (Class<K>) job.getOutputKeyClass();
        outClassV = (Class<V>) job.getOutputValueClass();
        break;
    default:
        throw new IllegalArgumentException("Unsupported MapReduce operation: " + mrOperation);
    }

    if (debug) {
        System.out.println("Job configuration>");
        System.out.println("Output key class: " + outClassK.getName());
        System.out.println("Output value class: " + outClassV.getName());
    }

    //  set the log level to what the level of the job is
    String stringLogLevel = job.get("logLevel");
    if (!Const.isEmpty(stringLogLevel)) {
        logLevel = LogLevel.valueOf(stringLogLevel);
        setDebugStatus("Log level set to " + stringLogLevel);
    } else {
        System.out.println(
                "Could not retrieve the log level from the job configuration.  logLevel will not be set.");
    }

    createTrans(job);
}

From source file:org.pentaho.hadoop.mapreduce.PentahoMapRunnable.java

License:Apache License

public void configure(JobConf job) {
    pluginWaitTimeout = TimeUnit.MINUTES.toMillis(5);

    debug = "true".equalsIgnoreCase(job.get("debug")); //$NON-NLS-1$

    transMapXml = job.get("transformation-map-xml");
    transReduceXml = job.get("transformation-reduce-xml");
    mapInputStepName = job.get("transformation-map-input-stepname");
    mapOutputStepName = job.get("transformation-map-output-stepname");
    reduceInputStepName = job.get("transformation-reduce-input-stepname");
    reduceOutputStepName = job.get("transformation-reduce-output-stepname");
    String xmlVariableSpace = job.get("variableSpace");

    outClassK = (Class<K2>) job.getMapOutputKeyClass();
    outClassV = (Class<V2>) job.getMapOutputValueClass();

    if (!Const.isEmpty(xmlVariableSpace)) {
        setDebugStatus("PentahoMapRunnable(): variableSpace was retrieved from the job.  The contents: ");
        setDebugStatus(xmlVariableSpace);

        //  deserialize from xml to variable space
        XStream xStream = new XStream();

        setDebugStatus("PentahoMapRunnable(): Setting classes variableSpace property.: ");
        variableSpace = (VariableSpace) xStream.fromXML(xmlVariableSpace);

        for (String variableName : variableSpace.listVariables()) {
            if (variableName.startsWith(KETTLE_VARIABLE_PREFIX)) {
                System.setProperty(variableName, variableSpace.getVariable(variableName));
            }/*from w w w .  jav  a  2s  .c  om*/
            if (KETTLE_PMR_PLUGIN_TIMEOUT.equals(variableName)) {
                try {
                    pluginWaitTimeout = Long.parseLong(variableSpace.getVariable(variableName));
                } catch (Exception e) {
                    System.out.println("Unable to parse plugin wait timeout, defaulting to 5 minutes");
                }
            }
        }
    } else {
        setDebugStatus("PentahoMapRunnable(): The PDI Job's variable space was not sent.");
        variableSpace = new Variables();
    }

    // Check for environment variables in the userDefined variables
    Iterator<Entry<String, String>> iter = job.iterator();
    while (iter.hasNext()) {
        Entry<String, String> entry = iter.next();
        if (entry.getKey().startsWith(ENVIRONMENT_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey().substring(ENVIRONMENT_VARIABLE_PREFIX.length()),
                    entry.getValue());
        } else if (entry.getKey().startsWith(KETTLE_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey(), entry.getValue());
        }
    }

    MRUtil.passInformationToTransformation(variableSpace, job);

    setDebugStatus("Job configuration");
    setDebugStatus("Output key class: " + outClassK.getName());
    setDebugStatus("Output value class: " + outClassV.getName());

    //  set the log level to what the level of the job is
    String stringLogLevel = job.get("logLevel");
    if (!Const.isEmpty(stringLogLevel)) {
        logLevel = LogLevel.valueOf(stringLogLevel);
        setDebugStatus("Log level set to " + stringLogLevel);
    } else {
        System.out.println(
                "Could not retrieve the log level from the job configuration.  logLevel will not be set.");
    }

    long deadline = 0;
    boolean first = true;
    while (true) {
        createTrans(job);

        if (first) {
            deadline = pluginWaitTimeout + System.currentTimeMillis();
            System.out.println(PentahoMapRunnable.class + ": Trans creation checking starting now "
                    + new Date().toString());
            first = false;
        }

        List<MissingTrans> missingTranses = new ArrayList<MissingTrans>();
        for (StepMeta stepMeta : trans.getTransMeta().getSteps()) {
            StepMetaInterface stepMetaInterface = stepMeta.getStepMetaInterface();
            if (stepMetaInterface instanceof MissingTrans) {
                MissingTrans missingTrans = (MissingTrans) stepMetaInterface;
                System.out.println(MissingTrans.class + "{stepName: " + missingTrans.getStepName()
                        + ", missingPluginId: " + missingTrans.getMissingPluginId() + "}");
                missingTranses.add(missingTrans);
            }
        }

        if (missingTranses.size() == 0) {
            System.out.println(
                    PentahoMapRunnable.class + ": Done waiting on plugins now " + new Date().toString());
            break;
        } else {
            if (System.currentTimeMillis() > deadline) {
                StringBuilder stringBuilder = new StringBuilder("Failed to initialize plugins: ");
                for (MissingTrans missingTrans : missingTranses) {
                    stringBuilder.append(missingTrans.getMissingPluginId());
                    stringBuilder.append(" on step ").append(missingTrans.getStepName());
                    stringBuilder.append(", ");
                }
                stringBuilder.setLength(stringBuilder.length() - 2);
                throw new RuntimeException(stringBuilder.toString());
            } else {
                try {
                    Thread.sleep(Math.min(100, deadline - System.currentTimeMillis()));
                } catch (InterruptedException e) {
                    throw new RuntimeException(e);
                }
            }
        }
    }
}

From source file:org.pentaho.hadoop.mapreduce.test.MapperAndReducerTest.java

License:Open Source License

@Test
public void testCombinerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./test-res/wordcount-mapper.ktr", "./test-res/wordcount-reducer.ktr",
            "./test-res/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransCombiner combiner = new GenericTransCombiner();

    combiner.configure(jobConf);/*w w w.ja  v  a  2  s . c om*/

    assertEquals(jobConf.getMapOutputKeyClass(), combiner.getOutClassK());
    assertEquals(jobConf.getMapOutputValueClass(), combiner.getOutClassV());
}