Example usage for org.apache.hadoop.mapred JobConf getMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getMapOutputKeyClass.

Prototype

public Class<?> getMapOutputKeyClass()

Source Link

Document

Get the key class for the map output data.

Usage

From source file:org.apache.tez.mapreduce.hadoop.MRHelpers.java

License:Apache License

/**
 * Pulls in specific keys from the base configuration, if they are not set at
 * the stage level. An explicit list of keys is copied over (not all), which
 * require translation to tez keys./*from  www.  j av a2s  .  com*/
 */
private static void setStageKeysFromBaseConf(Configuration conf, Configuration baseConf, String stage) {
    // Don't clobber explicit tez config.
    JobConf jobConf = null;
    if (conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS) == null) {
        // If this is set, but the comparator is not set, and their types differ -
        // the job will break.
        if (conf.get(MRJobConfig.MAP_OUTPUT_KEY_CLASS) == null) {
            // Pull this in from the baseConf
            // Create jobConf only if required.
            jobConf = new JobConf(baseConf);
            conf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, jobConf.getMapOutputKeyClass().getName());

            if (LOG.isDebugEnabled()) {
                LOG.debug("Setting " + MRJobConfig.MAP_OUTPUT_KEY_CLASS + " for stage: " + stage
                        + " based on job level configuration. Value: "
                        + conf.get(MRJobConfig.MAP_OUTPUT_KEY_CLASS));
            }
        }
    }

    if (conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS) == null) {
        if (conf.get(MRJobConfig.MAP_OUTPUT_VALUE_CLASS) == null) {
            if (jobConf == null) {
                // Create jobConf if not already created
                jobConf = new JobConf(baseConf);
            }
            conf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, jobConf.getMapOutputValueClass().getName());
            if (LOG.isDebugEnabled()) {
                LOG.debug("Setting " + MRJobConfig.MAP_OUTPUT_VALUE_CLASS + " for stage: " + stage
                        + " based on job level configuration. Value: "
                        + conf.get(MRJobConfig.MAP_OUTPUT_VALUE_CLASS));
            }
        }
    }
}

From source file:org.pentaho.hadoop.mapreduce.PentahoMapReduceBase.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//ww  w .java  2  s. co m
public void configure(JobConf job) {
    super.configure(job);

    debug = "true".equalsIgnoreCase(job.get("debug")); //$NON-NLS-1$

    transMapXml = job.get("transformation-map-xml");
    transCombinerXml = job.get("transformation-combiner-xml");
    transReduceXml = job.get("transformation-reduce-xml");
    mapInputStepName = job.get("transformation-map-input-stepname");
    mapOutputStepName = job.get("transformation-map-output-stepname");
    combinerInputStepName = job.get("transformation-combiner-input-stepname");
    combinerOutputStepName = job.get("transformation-combiner-output-stepname");
    combineSingleThreaded = isCombinerSingleThreaded(job);
    reduceInputStepName = job.get("transformation-reduce-input-stepname");
    reduceOutputStepName = job.get("transformation-reduce-output-stepname");
    reduceSingleThreaded = isReducerSingleThreaded(job);
    String xmlVariableSpace = job.get("variableSpace");

    if (!Const.isEmpty(xmlVariableSpace)) {
        setDebugStatus("PentahoMapReduceBase. variableSpace was retrieved from the job.  The contents: ");

        //  deserialize from xml to variable space
        XStream xStream = new XStream();

        if (xStream != null) {
            setDebugStatus("PentahoMapReduceBase: Setting classes variableSpace property.: ");
            variableSpace = (VariableSpace) xStream.fromXML(xmlVariableSpace);

            for (String variableName : variableSpace.listVariables()) {
                if (variableName.startsWith(KETTLE_VARIABLE_PREFIX)) {
                    System.setProperty(variableName, variableSpace.getVariable(variableName));
                }
            }
        }
    } else {
        setDebugStatus(
                "PentahoMapReduceBase: The PDI Job's variable space was not found in the job configuration.");
        variableSpace = new Variables();
    }

    // Check for environment variables in the userDefined variables
    Iterator<Entry<String, String>> iter = job.iterator();
    while (iter.hasNext()) {
        Entry<String, String> entry = iter.next();
        if (entry.getKey().startsWith(ENVIRONMENT_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey().substring(ENVIRONMENT_VARIABLE_PREFIX.length()),
                    entry.getValue());
        } else if (entry.getKey().startsWith(KETTLE_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey(), entry.getValue());
        }
    }

    MRUtil.passInformationToTransformation(variableSpace, job);

    switch (mrOperation) {
    case Combine:
        outClassK = (Class<K>) job.getMapOutputKeyClass();
        outClassV = (Class<V>) job.getMapOutputValueClass();
        break;
    case Reduce:
        outClassK = (Class<K>) job.getOutputKeyClass();
        outClassV = (Class<V>) job.getOutputValueClass();
        break;
    default:
        throw new IllegalArgumentException("Unsupported MapReduce operation: " + mrOperation);
    }

    if (debug) {
        System.out.println("Job configuration>");
        System.out.println("Output key class: " + outClassK.getName());
        System.out.println("Output value class: " + outClassV.getName());
    }

    //  set the log level to what the level of the job is
    String stringLogLevel = job.get("logLevel");
    if (!Const.isEmpty(stringLogLevel)) {
        logLevel = LogLevel.valueOf(stringLogLevel);
        setDebugStatus("Log level set to " + stringLogLevel);
    } else {
        System.out.println(
                "Could not retrieve the log level from the job configuration.  logLevel will not be set.");
    }

    createTrans(job);
}

From source file:org.pentaho.hadoop.mapreduce.PentahoMapRunnable.java

License:Apache License

public void configure(JobConf job) {
    pluginWaitTimeout = TimeUnit.MINUTES.toMillis(5);

    debug = "true".equalsIgnoreCase(job.get("debug")); //$NON-NLS-1$

    transMapXml = job.get("transformation-map-xml");
    transReduceXml = job.get("transformation-reduce-xml");
    mapInputStepName = job.get("transformation-map-input-stepname");
    mapOutputStepName = job.get("transformation-map-output-stepname");
    reduceInputStepName = job.get("transformation-reduce-input-stepname");
    reduceOutputStepName = job.get("transformation-reduce-output-stepname");
    String xmlVariableSpace = job.get("variableSpace");

    outClassK = (Class<K2>) job.getMapOutputKeyClass();
    outClassV = (Class<V2>) job.getMapOutputValueClass();

    if (!Const.isEmpty(xmlVariableSpace)) {
        setDebugStatus("PentahoMapRunnable(): variableSpace was retrieved from the job.  The contents: ");
        setDebugStatus(xmlVariableSpace);

        //  deserialize from xml to variable space
        XStream xStream = new XStream();

        setDebugStatus("PentahoMapRunnable(): Setting classes variableSpace property.: ");
        variableSpace = (VariableSpace) xStream.fromXML(xmlVariableSpace);

        for (String variableName : variableSpace.listVariables()) {
            if (variableName.startsWith(KETTLE_VARIABLE_PREFIX)) {
                System.setProperty(variableName, variableSpace.getVariable(variableName));
            }/*from w w w .j a  va  2  s  .  c o  m*/
            if (KETTLE_PMR_PLUGIN_TIMEOUT.equals(variableName)) {
                try {
                    pluginWaitTimeout = Long.parseLong(variableSpace.getVariable(variableName));
                } catch (Exception e) {
                    System.out.println("Unable to parse plugin wait timeout, defaulting to 5 minutes");
                }
            }
        }
    } else {
        setDebugStatus("PentahoMapRunnable(): The PDI Job's variable space was not sent.");
        variableSpace = new Variables();
    }

    // Check for environment variables in the userDefined variables
    Iterator<Entry<String, String>> iter = job.iterator();
    while (iter.hasNext()) {
        Entry<String, String> entry = iter.next();
        if (entry.getKey().startsWith(ENVIRONMENT_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey().substring(ENVIRONMENT_VARIABLE_PREFIX.length()),
                    entry.getValue());
        } else if (entry.getKey().startsWith(KETTLE_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey(), entry.getValue());
        }
    }

    MRUtil.passInformationToTransformation(variableSpace, job);

    setDebugStatus("Job configuration");
    setDebugStatus("Output key class: " + outClassK.getName());
    setDebugStatus("Output value class: " + outClassV.getName());

    //  set the log level to what the level of the job is
    String stringLogLevel = job.get("logLevel");
    if (!Const.isEmpty(stringLogLevel)) {
        logLevel = LogLevel.valueOf(stringLogLevel);
        setDebugStatus("Log level set to " + stringLogLevel);
    } else {
        System.out.println(
                "Could not retrieve the log level from the job configuration.  logLevel will not be set.");
    }

    long deadline = 0;
    boolean first = true;
    while (true) {
        createTrans(job);

        if (first) {
            deadline = pluginWaitTimeout + System.currentTimeMillis();
            System.out.println(PentahoMapRunnable.class + ": Trans creation checking starting now "
                    + new Date().toString());
            first = false;
        }

        List<MissingTrans> missingTranses = new ArrayList<MissingTrans>();
        for (StepMeta stepMeta : trans.getTransMeta().getSteps()) {
            StepMetaInterface stepMetaInterface = stepMeta.getStepMetaInterface();
            if (stepMetaInterface instanceof MissingTrans) {
                MissingTrans missingTrans = (MissingTrans) stepMetaInterface;
                System.out.println(MissingTrans.class + "{stepName: " + missingTrans.getStepName()
                        + ", missingPluginId: " + missingTrans.getMissingPluginId() + "}");
                missingTranses.add(missingTrans);
            }
        }

        if (missingTranses.size() == 0) {
            System.out.println(
                    PentahoMapRunnable.class + ": Done waiting on plugins now " + new Date().toString());
            break;
        } else {
            if (System.currentTimeMillis() > deadline) {
                StringBuilder stringBuilder = new StringBuilder("Failed to initialize plugins: ");
                for (MissingTrans missingTrans : missingTranses) {
                    stringBuilder.append(missingTrans.getMissingPluginId());
                    stringBuilder.append(" on step ").append(missingTrans.getStepName());
                    stringBuilder.append(", ");
                }
                stringBuilder.setLength(stringBuilder.length() - 2);
                throw new RuntimeException(stringBuilder.toString());
            } else {
                try {
                    Thread.sleep(Math.min(100, deadline - System.currentTimeMillis()));
                } catch (InterruptedException e) {
                    throw new RuntimeException(e);
                }
            }
        }
    }
}

From source file:org.pentaho.hadoop.mapreduce.test.MapperAndReducerTest.java

License:Open Source License

@Test
public void testCombinerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./test-res/wordcount-mapper.ktr", "./test-res/wordcount-reducer.ktr",
            "./test-res/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransCombiner combiner = new GenericTransCombiner();

    combiner.configure(jobConf);/*  w  w w. ja  v a  2 s  .c  o  m*/

    assertEquals(jobConf.getMapOutputKeyClass(), combiner.getOutClassK());
    assertEquals(jobConf.getMapOutputValueClass(), combiner.getOutClassV());
}

From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIntegrationTest.java

License:Apache License

@Test
public void testCombinerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./src/test/resources/wordcount-mapper.ktr",
            "./src/test/resources/wordcount-reducer.ktr", "./src/test/resources/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransCombiner combiner = new GenericTransCombiner();

    combiner.configure(jobConf);// w w w.ja va2s. c  o m

    assertEquals(jobConf.getMapOutputKeyClass(), combiner.getOutClassK());
    assertEquals(jobConf.getMapOutputValueClass(), combiner.getOutClassV());
}

From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIT.java

License:Apache License

@Test
public void testCombinerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./src/it/resources/wordcount-mapper.ktr",
            "./src/it/resources/wordcount-reducer.ktr", "./src/it/resources/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransCombiner combiner = new GenericTransCombiner();

    combiner.configure(jobConf);/*from ww  w .j av a 2  s  . com*/

    assertEquals(jobConf.getMapOutputKeyClass(), combiner.getOutClassK());
    assertEquals(jobConf.getMapOutputValueClass(), combiner.getOutClassV());
}

From source file:sg.edu.astar.dsi.mergespill.App.java

public synchronized static void doProcess(String directory, int spillNumber)
        throws IOException, InterruptedException {
    // TODO code application logic here
    System.out.println("directory: " + directory);
    System.out.println("numberOfSpill: " + spillNumber);
    //SETUP// w ww  .j a  v a 2  s .  c  o m
    JobConf job = new JobConf();
    //job.setMapOutputKeyClass(Text.class);
    job.setMapOutputKeyClass(TextDsi.class);
    job.setMapOutputValueClass(IntWritable.class);
    //Class<Text> keyClass = (Class<Text>)job.getMapOutputKeyClass();
    Class<TextDsi> keyClass = (Class<TextDsi>) job.getMapOutputKeyClass();
    Class<IntWritable> valClass = (Class<IntWritable>) job.getMapOutputValueClass();
    FileSystem rfs;
    CompressionCodec codec = null;
    Counters.Counter spilledRecordsCounter = null;
    rfs = ((LocalFileSystem) FileSystem.getLocal(job)).getRaw();

    while (!new File(directory).isDirectory()) {
        sleep(5000);
    }

    if (new File(directory).isDirectory()) {
        ArrayList<Path> spillFile = new ArrayList();
        ArrayList<Path> spillFileIndex = new ArrayList();

        App myApp;
        myApp = new App();

        myApp.getSpillFilesAndIndices(new File(directory), spillFile, spillFileIndex, spillNumber);

        ArrayList<SpillRecord> indexCacheList = new ArrayList<>();
        int numSpills = 0;

        Iterator itrSpillFileIndex = spillFileIndex.iterator();
        while (itrSpillFileIndex.hasNext()) {
            numSpills++;
            Path temp = (Path) itrSpillFileIndex.next();
            System.out.println(temp);
            SpillRecord sr = new SpillRecord(temp, job);
            indexCacheList.add(sr);

            System.out.println("indexFile partition size: " + sr.size());
            long startOffset = 0;
            for (int i = 0; i < sr.size(); i++) { //sr.size is the number of partitions
                IndexRecord ir = sr.getIndex(i);
                System.out.println("index[" + i + "] rawLength = " + ir.rawLength);
                System.out.println("index[" + i + "] partLength = " + ir.partLength);
                System.out.println("index[" + i + "] startOffset= " + ir.startOffset);
                startOffset = ir.startOffset;
            }
            System.out.println("========================================");
        }
        System.out.println("Number of spills: " + numSpills);
        //FinalOutputFile
        Path finalOutputFile = new Path(directory + File.separator + "FINALOUTPUTFILE");
        FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096);
        System.out.println("GOT HERE 1");
        Path finalIndexFile = new Path(directory + File.separator + "FINALOUTPUTFILE.index");

        //ONE PARTITION ONLY
        List<Segment<TextDsi, IntWritable>> segmentList = new ArrayList<>(numSpills);
        for (int i = 0; i < numSpills; i++) {
            IndexRecord theIndexRecord = indexCacheList.get(i).getIndex(0);
            Path temp = spillFileIndex.get(i);
            String temp1 = temp.toString();
            String temp2 = temp1.substring(0, temp1.length() - 6);
            //System.out.println(temp2);
            //System.out.println(new Path(temp2).getParent());
            //File myFile = new File(temp2);
            //System.out.println(myFile.getPath());
            Segment<TextDsi, IntWritable> s = new Segment<>(job, rfs, new Path(temp2),
                    theIndexRecord.startOffset, theIndexRecord.partLength, codec, true);
            segmentList.add(i, s);
        }
        System.out.println("GOT HERE 2");
        RawKeyValueIterator kvIter = Merger.merge(job, rfs, keyClass, valClass, null, segmentList, 4,
                new Path("/home/hduser/spillSample2/My"), job.getOutputKeyComparator(), null, false, null,
                spilledRecordsCounter, null, TaskType.MAP);
        System.out.println("GOT HERE 3");
        //write merged output to disk
        long segmentStart = finalOut.getPos();
        FSDataOutputStream finalPartitionOut = CryptoUtils.wrapIfNecessary(job, finalOut);
        Writer<TextDsi, IntWritable> writer = new Writer<TextDsi, IntWritable>(job, finalPartitionOut,
                TextDsi.class, IntWritable.class, codec, spilledRecordsCounter);
        System.out.println("GOT HERE 4");
        Merger.writeFile(kvIter, writer, null, job);
        writer.close();
        finalOut.close();
        System.out.println("GOT HERE 5");

        IndexRecord rec = new IndexRecord();
        final SpillRecord spillRec = new SpillRecord(1);
        rec.startOffset = segmentStart;
        rec.rawLength = writer.getRawLength() + CryptoUtils.cryptoPadding(job);
        rec.partLength = writer.getCompressedLength() + CryptoUtils.cryptoPadding(job);
        System.out.println("rec.startOffset: " + rec.startOffset);
        System.out.println("rec.rawLength  : " + rec.rawLength);
        System.out.println("rec.partLength : " + rec.partLength);
        spillRec.putIndex(rec, 0);
        spillRec.writeToFile(finalIndexFile, job);
        System.out.println("GOT HERE 6");

    } else {
        System.out.println("argument is not a directory! : " + directory);
    }

}