Example usage for org.apache.hadoop.mapred JobConf getMapOutputKeyClass

List of usage examples for org.apache.hadoop.mapred JobConf getMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf getMapOutputKeyClass.

Prototype

public Class<?> getMapOutputKeyClass() 

Source Link

Document

Get the key class for the map output data.

Usage

From source file:org.apache.tez.mapreduce.hadoop.MRHelpers.java

License:Apache License

/**
 * Pulls in specific keys from the base configuration, if they are not set at
 * the stage level. An explicit list of keys is copied over (not all), which
 * require translation to tez keys./*from  www.  j av a2s  .  com*/
 */
private static void setStageKeysFromBaseConf(Configuration conf, Configuration baseConf, String stage) {
    // Don't clobber explicit tez config.
    JobConf jobConf = null;
    if (conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_KEY_CLASS) == null) {
        // If this is set, but the comparator is not set, and their types differ -
        // the job will break.
        if (conf.get(MRJobConfig.MAP_OUTPUT_KEY_CLASS) == null) {
            // Pull this in from the baseConf
            // Create jobConf only if required.
            jobConf = new JobConf(baseConf);
            conf.set(MRJobConfig.MAP_OUTPUT_KEY_CLASS, jobConf.getMapOutputKeyClass().getName());

            if (LOG.isDebugEnabled()) {
                LOG.debug("Setting " + MRJobConfig.MAP_OUTPUT_KEY_CLASS + " for stage: " + stage
                        + " based on job level configuration. Value: "
                        + conf.get(MRJobConfig.MAP_OUTPUT_KEY_CLASS));
            }
        }
    }

    if (conf.get(TezRuntimeConfiguration.TEZ_RUNTIME_VALUE_CLASS) == null) {
        if (conf.get(MRJobConfig.MAP_OUTPUT_VALUE_CLASS) == null) {
            if (jobConf == null) {
                // Create jobConf if not already created
                jobConf = new JobConf(baseConf);
            }
            conf.set(MRJobConfig.MAP_OUTPUT_VALUE_CLASS, jobConf.getMapOutputValueClass().getName());
            if (LOG.isDebugEnabled()) {
                LOG.debug("Setting " + MRJobConfig.MAP_OUTPUT_VALUE_CLASS + " for stage: " + stage
                        + " based on job level configuration. Value: "
                        + conf.get(MRJobConfig.MAP_OUTPUT_VALUE_CLASS));
            }
        }
    }
}

From source file:org.pentaho.hadoop.mapreduce.PentahoMapReduceBase.java

License:Apache License

@SuppressWarnings("unchecked")
@Override//ww  w .java  2  s. co m
public void configure(JobConf job) {
    super.configure(job);

    debug = "true".equalsIgnoreCase(job.get("debug")); //$NON-NLS-1$

    transMapXml = job.get("transformation-map-xml");
    transCombinerXml = job.get("transformation-combiner-xml");
    transReduceXml = job.get("transformation-reduce-xml");
    mapInputStepName = job.get("transformation-map-input-stepname");
    mapOutputStepName = job.get("transformation-map-output-stepname");
    combinerInputStepName = job.get("transformation-combiner-input-stepname");
    combinerOutputStepName = job.get("transformation-combiner-output-stepname");
    combineSingleThreaded = isCombinerSingleThreaded(job);
    reduceInputStepName = job.get("transformation-reduce-input-stepname");
    reduceOutputStepName = job.get("transformation-reduce-output-stepname");
    reduceSingleThreaded = isReducerSingleThreaded(job);
    String xmlVariableSpace = job.get("variableSpace");

    if (!Const.isEmpty(xmlVariableSpace)) {
        setDebugStatus("PentahoMapReduceBase. variableSpace was retrieved from the job.  The contents: ");

        //  deserialize from xml to variable space
        XStream xStream = new XStream();

        if (xStream != null) {
            setDebugStatus("PentahoMapReduceBase: Setting classes variableSpace property.: ");
            variableSpace = (VariableSpace) xStream.fromXML(xmlVariableSpace);

            for (String variableName : variableSpace.listVariables()) {
                if (variableName.startsWith(KETTLE_VARIABLE_PREFIX)) {
                    System.setProperty(variableName, variableSpace.getVariable(variableName));
                }
            }
        }
    } else {
        setDebugStatus(
                "PentahoMapReduceBase: The PDI Job's variable space was not found in the job configuration.");
        variableSpace = new Variables();
    }

    // Check for environment variables in the userDefined variables
    Iterator<Entry<String, String>> iter = job.iterator();
    while (iter.hasNext()) {
        Entry<String, String> entry = iter.next();
        if (entry.getKey().startsWith(ENVIRONMENT_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey().substring(ENVIRONMENT_VARIABLE_PREFIX.length()),
                    entry.getValue());
        } else if (entry.getKey().startsWith(KETTLE_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey(), entry.getValue());
        }
    }

    MRUtil.passInformationToTransformation(variableSpace, job);

    switch (mrOperation) {
    case Combine:
        outClassK = (Class<K>) job.getMapOutputKeyClass();
        outClassV = (Class<V>) job.getMapOutputValueClass();
        break;
    case Reduce:
        outClassK = (Class<K>) job.getOutputKeyClass();
        outClassV = (Class<V>) job.getOutputValueClass();
        break;
    default:
        throw new IllegalArgumentException("Unsupported MapReduce operation: " + mrOperation);
    }

    if (debug) {
        System.out.println("Job configuration>");
        System.out.println("Output key class: " + outClassK.getName());
        System.out.println("Output value class: " + outClassV.getName());
    }

    //  set the log level to what the level of the job is
    String stringLogLevel = job.get("logLevel");
    if (!Const.isEmpty(stringLogLevel)) {
        logLevel = LogLevel.valueOf(stringLogLevel);
        setDebugStatus("Log level set to " + stringLogLevel);
    } else {
        System.out.println(
                "Could not retrieve the log level from the job configuration.  logLevel will not be set.");
    }

    createTrans(job);
}

From source file:org.pentaho.hadoop.mapreduce.PentahoMapRunnable.java

License:Apache License

public void configure(JobConf job) {
    pluginWaitTimeout = TimeUnit.MINUTES.toMillis(5);

    debug = "true".equalsIgnoreCase(job.get("debug")); //$NON-NLS-1$

    transMapXml = job.get("transformation-map-xml");
    transReduceXml = job.get("transformation-reduce-xml");
    mapInputStepName = job.get("transformation-map-input-stepname");
    mapOutputStepName = job.get("transformation-map-output-stepname");
    reduceInputStepName = job.get("transformation-reduce-input-stepname");
    reduceOutputStepName = job.get("transformation-reduce-output-stepname");
    String xmlVariableSpace = job.get("variableSpace");

    outClassK = (Class<K2>) job.getMapOutputKeyClass();
    outClassV = (Class<V2>) job.getMapOutputValueClass();

    if (!Const.isEmpty(xmlVariableSpace)) {
        setDebugStatus("PentahoMapRunnable(): variableSpace was retrieved from the job.  The contents: ");
        setDebugStatus(xmlVariableSpace);

        //  deserialize from xml to variable space
        XStream xStream = new XStream();

        setDebugStatus("PentahoMapRunnable(): Setting classes variableSpace property.: ");
        variableSpace = (VariableSpace) xStream.fromXML(xmlVariableSpace);

        for (String variableName : variableSpace.listVariables()) {
            if (variableName.startsWith(KETTLE_VARIABLE_PREFIX)) {
                System.setProperty(variableName, variableSpace.getVariable(variableName));
            }/*from w w w .j a  va  2  s  .  c o  m*/
            if (KETTLE_PMR_PLUGIN_TIMEOUT.equals(variableName)) {
                try {
                    pluginWaitTimeout = Long.parseLong(variableSpace.getVariable(variableName));
                } catch (Exception e) {
                    System.out.println("Unable to parse plugin wait timeout, defaulting to 5 minutes");
                }
            }
        }
    } else {
        setDebugStatus("PentahoMapRunnable(): The PDI Job's variable space was not sent.");
        variableSpace = new Variables();
    }

    // Check for environment variables in the userDefined variables
    Iterator<Entry<String, String>> iter = job.iterator();
    while (iter.hasNext()) {
        Entry<String, String> entry = iter.next();
        if (entry.getKey().startsWith(ENVIRONMENT_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey().substring(ENVIRONMENT_VARIABLE_PREFIX.length()),
                    entry.getValue());
        } else if (entry.getKey().startsWith(KETTLE_VARIABLE_PREFIX)) {
            System.setProperty(entry.getKey(), entry.getValue());
        }
    }

    MRUtil.passInformationToTransformation(variableSpace, job);

    setDebugStatus("Job configuration");
    setDebugStatus("Output key class: " + outClassK.getName());
    setDebugStatus("Output value class: " + outClassV.getName());

    //  set the log level to what the level of the job is
    String stringLogLevel = job.get("logLevel");
    if (!Const.isEmpty(stringLogLevel)) {
        logLevel = LogLevel.valueOf(stringLogLevel);
        setDebugStatus("Log level set to " + stringLogLevel);
    } else {
        System.out.println(
                "Could not retrieve the log level from the job configuration.  logLevel will not be set.");
    }

    long deadline = 0;
    boolean first = true;
    while (true) {
        createTrans(job);

        if (first) {
            deadline = pluginWaitTimeout + System.currentTimeMillis();
            System.out.println(PentahoMapRunnable.class + ": Trans creation checking starting now "
                    + new Date().toString());
            first = false;
        }

        List<MissingTrans> missingTranses = new ArrayList<MissingTrans>();
        for (StepMeta stepMeta : trans.getTransMeta().getSteps()) {
            StepMetaInterface stepMetaInterface = stepMeta.getStepMetaInterface();
            if (stepMetaInterface instanceof MissingTrans) {
                MissingTrans missingTrans = (MissingTrans) stepMetaInterface;
                System.out.println(MissingTrans.class + "{stepName: " + missingTrans.getStepName()
                        + ", missingPluginId: " + missingTrans.getMissingPluginId() + "}");
                missingTranses.add(missingTrans);
            }
        }

        if (missingTranses.size() == 0) {
            System.out.println(
                    PentahoMapRunnable.class + ": Done waiting on plugins now " + new Date().toString());
            break;
        } else {
            if (System.currentTimeMillis() > deadline) {
                StringBuilder stringBuilder = new StringBuilder("Failed to initialize plugins: ");
                for (MissingTrans missingTrans : missingTranses) {
                    stringBuilder.append(missingTrans.getMissingPluginId());
                    stringBuilder.append(" on step ").append(missingTrans.getStepName());
                    stringBuilder.append(", ");
                }
                stringBuilder.setLength(stringBuilder.length() - 2);
                throw new RuntimeException(stringBuilder.toString());
            } else {
                try {
                    Thread.sleep(Math.min(100, deadline - System.currentTimeMillis()));
                } catch (InterruptedException e) {
                    throw new RuntimeException(e);
                }
            }
        }
    }
}

From source file:org.pentaho.hadoop.mapreduce.test.MapperAndReducerTest.java

License:Open Source License

@Test
public void testCombinerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./test-res/wordcount-mapper.ktr", "./test-res/wordcount-reducer.ktr",
            "./test-res/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransCombiner combiner = new GenericTransCombiner();

    combiner.configure(jobConf);/*  w  w w. ja  v a  2 s  .c  o  m*/

    assertEquals(jobConf.getMapOutputKeyClass(), combiner.getOutClassK());
    assertEquals(jobConf.getMapOutputValueClass(), combiner.getOutClassV());
}

From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIntegrationTest.java

License:Apache License

@Test
public void testCombinerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./src/test/resources/wordcount-mapper.ktr",
            "./src/test/resources/wordcount-reducer.ktr", "./src/test/resources/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransCombiner combiner = new GenericTransCombiner();

    combiner.configure(jobConf);// w w w.ja va2s. c  o m

    assertEquals(jobConf.getMapOutputKeyClass(), combiner.getOutClassK());
    assertEquals(jobConf.getMapOutputValueClass(), combiner.getOutClassV());
}

From source file:org.pentaho.hadoop.mapreduce.test.PentahoMapReduceIT.java

License:Apache License

@Test
public void testCombinerOutputClasses() throws IOException, KettleException {
    JobConf jobConf = createJobConf("./src/it/resources/wordcount-mapper.ktr",
            "./src/it/resources/wordcount-reducer.ktr", "./src/it/resources/wordcount-reducer.ktr");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputValueClass(NullWritable.class);
    jobConf.setOutputValueClass(LongWritable.class);

    GenericTransCombiner combiner = new GenericTransCombiner();

    combiner.configure(jobConf);/*from ww  w .j av a 2  s  . com*/

    assertEquals(jobConf.getMapOutputKeyClass(), combiner.getOutClassK());
    assertEquals(jobConf.getMapOutputValueClass(), combiner.getOutClassV());
}

From source file:sg.edu.astar.dsi.mergespill.App.java

public synchronized static void doProcess(String directory, int spillNumber)
        throws IOException, InterruptedException {
    // TODO code application logic here
    System.out.println("directory: " + directory);
    System.out.println("numberOfSpill: " + spillNumber);
    //SETUP// w ww  .j a  v a 2  s .  c  o m
    JobConf job = new JobConf();
    //job.setMapOutputKeyClass(Text.class);
    job.setMapOutputKeyClass(TextDsi.class);
    job.setMapOutputValueClass(IntWritable.class);
    //Class<Text> keyClass = (Class<Text>)job.getMapOutputKeyClass();
    Class<TextDsi> keyClass = (Class<TextDsi>) job.getMapOutputKeyClass();
    Class<IntWritable> valClass = (Class<IntWritable>) job.getMapOutputValueClass();
    FileSystem rfs;
    CompressionCodec codec = null;
    Counters.Counter spilledRecordsCounter = null;
    rfs = ((LocalFileSystem) FileSystem.getLocal(job)).getRaw();

    while (!new File(directory).isDirectory()) {
        sleep(5000);
    }

    if (new File(directory).isDirectory()) {
        ArrayList<Path> spillFile = new ArrayList();
        ArrayList<Path> spillFileIndex = new ArrayList();

        App myApp;
        myApp = new App();

        myApp.getSpillFilesAndIndices(new File(directory), spillFile, spillFileIndex, spillNumber);

        ArrayList<SpillRecord> indexCacheList = new ArrayList<>();
        int numSpills = 0;

        Iterator itrSpillFileIndex = spillFileIndex.iterator();
        while (itrSpillFileIndex.hasNext()) {
            numSpills++;
            Path temp = (Path) itrSpillFileIndex.next();
            System.out.println(temp);
            SpillRecord sr = new SpillRecord(temp, job);
            indexCacheList.add(sr);

            System.out.println("indexFile partition size: " + sr.size());
            long startOffset = 0;
            for (int i = 0; i < sr.size(); i++) { //sr.size is the number of partitions
                IndexRecord ir = sr.getIndex(i);
                System.out.println("index[" + i + "] rawLength = " + ir.rawLength);
                System.out.println("index[" + i + "] partLength = " + ir.partLength);
                System.out.println("index[" + i + "] startOffset= " + ir.startOffset);
                startOffset = ir.startOffset;
            }
            System.out.println("========================================");
        }
        System.out.println("Number of spills: " + numSpills);
        //FinalOutputFile
        Path finalOutputFile = new Path(directory + File.separator + "FINALOUTPUTFILE");
        FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096);
        System.out.println("GOT HERE 1");
        Path finalIndexFile = new Path(directory + File.separator + "FINALOUTPUTFILE.index");

        //ONE PARTITION ONLY
        List<Segment<TextDsi, IntWritable>> segmentList = new ArrayList<>(numSpills);
        for (int i = 0; i < numSpills; i++) {
            IndexRecord theIndexRecord = indexCacheList.get(i).getIndex(0);
            Path temp = spillFileIndex.get(i);
            String temp1 = temp.toString();
            String temp2 = temp1.substring(0, temp1.length() - 6);
            //System.out.println(temp2);
            //System.out.println(new Path(temp2).getParent());
            //File myFile = new File(temp2);
            //System.out.println(myFile.getPath());
            Segment<TextDsi, IntWritable> s = new Segment<>(job, rfs, new Path(temp2),
                    theIndexRecord.startOffset, theIndexRecord.partLength, codec, true);
            segmentList.add(i, s);
        }
        System.out.println("GOT HERE 2");
        RawKeyValueIterator kvIter = Merger.merge(job, rfs, keyClass, valClass, null, segmentList, 4,
                new Path("/home/hduser/spillSample2/My"), job.getOutputKeyComparator(), null, false, null,
                spilledRecordsCounter, null, TaskType.MAP);
        System.out.println("GOT HERE 3");
        //write merged output to disk
        long segmentStart = finalOut.getPos();
        FSDataOutputStream finalPartitionOut = CryptoUtils.wrapIfNecessary(job, finalOut);
        Writer<TextDsi, IntWritable> writer = new Writer<TextDsi, IntWritable>(job, finalPartitionOut,
                TextDsi.class, IntWritable.class, codec, spilledRecordsCounter);
        System.out.println("GOT HERE 4");
        Merger.writeFile(kvIter, writer, null, job);
        writer.close();
        finalOut.close();
        System.out.println("GOT HERE 5");

        IndexRecord rec = new IndexRecord();
        final SpillRecord spillRec = new SpillRecord(1);
        rec.startOffset = segmentStart;
        rec.rawLength = writer.getRawLength() + CryptoUtils.cryptoPadding(job);
        rec.partLength = writer.getCompressedLength() + CryptoUtils.cryptoPadding(job);
        System.out.println("rec.startOffset: " + rec.startOffset);
        System.out.println("rec.rawLength  : " + rec.rawLength);
        System.out.println("rec.partLength : " + rec.partLength);
        spillRec.putIndex(rec, 0);
        spillRec.writeToFile(finalIndexFile, job);
        System.out.println("GOT HERE 6");

    } else {
        System.out.println("argument is not a directory! : " + directory);
    }

}