Example usage for org.apache.hadoop.mapred JobConf setMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setMapOutputKeyClass.

Prototype

public void setMapOutputKeyClass(Class<?> theClass)

Source Link

Document

Set the key class for the map output data.

Usage

From source file:PDI.Hadoop.Datamining.Tools.HistorianParser.java

/**
 * The main driver for historian map/reduce program. Invoke this method to
 * submit the map/reduce job./*  w w  w. ja  v  a2s .co  m*/
 * 
 * @throws IOException
 *         When there is communication problems with the job tracker.
 */
public int run(String[] args) throws Exception {

    JobConf conf = new JobConf(getConf(), HistorianParser.class);
    JobClient jobClient = new JobClient(conf);

    List<String> sourcePaths = new ArrayList<String>();

    String destPath = "";
    String currentDate = DateUtils.getCurrentDateString();
    String startTS = "";
    String endTS = "";
    String pointIDS = "";
    String outputSize = "";

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(StandardPointFile.class);
    conf.setMapperClass(MapClass.class);
    conf.setReducerClass(ReduceClass.class);
    conf.setInputFormat(HistorianInputFormat.class);

    conf.set("compression", "no");
    conf.set("filePrefix", "devarchive_archive_");

    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-startTS".equals(args[i])) {
                conf.set("startTS", args[++i]);
                startTS = args[i];
            } else if ("-endTS".equals(args[i])) {
                conf.set("endTS", args[++i]);
                endTS = args[i];
            } else if ("-pointIDS".equals(args[i])) {
                conf.set("pointIDS", args[++i]);
                pointIDS = args[i];
            } else if ("-outputMaxSize".equals(args[i])) {
                conf.set("outputSize", args[++i]);
                outputSize = args[i];
            } else if ("-sourcePATH".equals(args[i])) {
                String sourcePath = "" + args[++i];
                if (sourcePath.indexOf(',') == -1) {
                    sourcePaths.add(sourcePath);
                } else {
                    String[] paths = sourcePath.split(",");
                    for (int ii = 0; ii < paths.length; ii++) {
                        sourcePaths.add(paths[ii]);
                    }
                }
            } else if ("-destPATH".equals(args[i])) {
                destPath = "" + args[++i] + "/";
            } else if ("-compression".equals(args[i])) {
                conf.set("compression", args[++i]);
            } else if ("-filePrefix".equals(args[i])) {
                conf.set("filePrefix", args[++i]);
            } else if ("-v".equals(args[i])) {
                pdi_showVersion();
                return 0;
            } else if ("-verbose".equals(args[i])) {
                this.pdi_setVerbose(true);
            } else if ("-h".equals(args[i])) {
                return printUsage();
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }

    // Check for the user input parameters
    if ((0 == sourcePaths.size()) || destPath.equals("") || startTS.equals("") || endTS.equals("")
            || pointIDS.equals("") || outputSize.equals("") || (0 == conf.get("filePrefix").length())) {
        System.out.println("ERROR: Wrong input parameters.");
        return printUsage();
    }

    String startTime = DateUtils.unixTimestampToHumanReadableTime2(startTS);
    String endTime = DateUtils.unixTimestampToHumanReadableTime2(endTS);

    System.out.println("-------------------------------------------------------");
    System.out.println("jobName      : " + currentDate);
    System.out.println("filePrefix   : " + conf.get("filePrefix"));
    for (int i = 0; i < sourcePaths.size(); i++) {
        System.out.println("sourcePath[" + i + "]: " + sourcePaths.get(i));
    }
    System.out.println("destPath     : " + destPath);
    System.out.println("startTS      : " + startTS + " (" + startTime + ")");
    System.out.println("endTS        : " + endTS + " (" + endTime + ")");
    System.out.println("pointIDS     : " + pointIDS);
    System.out.println("outputMaxSize: " + outputSize + " MB");
    System.out.println("compression  : " + conf.get("compression"));
    System.out.println("-------------------------------------------------------");

    PathUtils utils = new PathUtils(this.pdi_isVerbose());
    if (false == utils.pdi_setRecursiveInputPaths(conf, sourcePaths, startTS, endTS)) {
        return -1;
    }

    // set output path to current time
    FileOutputFormat.setOutputPath(conf, utils.getOutputPath(destPath, currentDate));

    // set jobName to current time
    //      conf.setJobName(date.toString());
    conf.setJobName(currentDate);
    JobClient.runJob(conf); // run the job

    //      mergeAndCopyToLocal(conf, destPath);

    return 0;
}

From source file:pegasus.column_joiner.JoinTablePegasus.java

License:Apache License

protected JobConf configPass1() throws Exception {
    final JobConf conf = new JobConf(getConf(), JoinTablePegasus.class);
    conf.set("number_tables", "" + number_tables);
    conf.set("join_type", "" + join_type);

    conf.setJobName("JoinTablePegasus");

    conf.setMapperClass(MapPass1.class);
    conf.setReducerClass(RedPass1.class);

    int i = 1;//from www .  j  a v a  2 s.co m
    Iterator<Path> iter = input_paths.iterator();
    while (iter.hasNext()) {
        Path cur_path = iter.next();
        FileInputFormat.addInputPath(conf, cur_path);
        conf.set("path" + i, cur_path.toString());
        i++;
    }
    FileOutputFormat.setOutputPath(conf, output_path);

    final FileSystem fs = FileSystem.get(conf);
    fs.delete(output_path);

    conf.setNumReduceTasks(nreducer);

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    return conf;
}

From source file:ronchy.BigramCount.java

License:Apache License

/**
 * Runs this tool./*from  w  ww .j av a  2  s  .  c  o  m*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];

    int mapTasks = Integer.parseInt(args[2]);
    int reduceTasks = Integer.parseInt(args[3]);

    sLogger.info("Tool: BigramCount");
    sLogger.info(" - input path: " + inputPath);
    sLogger.info(" - output path: " + outputPath);
    sLogger.info(" - number of mappers: " + mapTasks);
    sLogger.info(" - number of reducers: " + reduceTasks);

    JobConf conf = new JobConf(BigramCount.class);
    conf.setJobName("BigramCount");

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    /**
     *  Note that these must match the Class arguments given in the mapper 
     */
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already
    Path outputDir = new Path(outputPath);
    FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    JobClient.runJob(conf);
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:sa.edu.kaust.fwindex.BuildIntDocVectorsForwardIndex.java

License:Apache License

/**
 * Runs this tool.//from   w  ww .ja va2  s  . co m
 */
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();
        return -1;
    }
    String inPath = args[0];
    String outPath = args[1];

    JobConf conf = new JobConf(getConf(), BuildIntDocVectorsForwardIndex.class);
    FileSystem fs = FileSystem.get(conf);

    int mapTasks = 10;
    sLogger.info("Tool: BuildIntDocVectorsIndex");

    String intDocVectorsPath = inPath;
    String forwardIndexPath = outPath;

    if (!fs.exists(new Path(intDocVectorsPath))) {
        sLogger.info("Error: IntDocVectors don't exist!");
        return 0;
    }

    if (fs.exists(new Path(forwardIndexPath))) {
        sLogger.info("IntDocVectorsForwardIndex already exists: skipping!");
        return 0;
    }

    conf.set("ForwardIndexPath", forwardIndexPath);

    conf.setJobName("BuildIntDocVectorsForwardIndex");

    Path inputPath = new Path(intDocVectorsPath);
    FileInputFormat.setInputPaths(conf, inputPath);

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(1);

    conf.set("mapred.child.java.opts", "-Xmx2048m");

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapOutputKeyClass(TermDF.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputFormat(NullOutputFormat.class);

    conf.setMapRunnerClass(MyMapRunner.class);
    conf.setReducerClass(MyReducer.class);

    JobClient.runJob(conf);

    return 0;
}

From source file:sa.edu.kaust.twitter.index.BuildPostingsForwardIndex.java

License:Apache License

/**
 * Runs this tool./*from w w  w.  j a  v a 2s .com*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();
        return -1;
    }

    JobConf conf = new JobConf(BuildPostingsForwardIndex.class);
    FileSystem fs = FileSystem.get(conf);

    int mapTasks = 10;
    sLogger.info("Tool: PostingsForwardIndex");

    String postingsPath = args[0];
    String forwardIndexPath = args[1];

    if (!fs.exists(new Path(postingsPath))) {
        sLogger.info("Error: IntDocVectors don't exist!");
        return 0;
    }

    // delete the output directory if it exists already
    //FileSystem.get(conf).delete(new Path(forwardIndexPath), true);
    if (fs.exists(new Path(forwardIndexPath))) {
        sLogger.info("PostingsForwardIndex already exists: skipping!");
        return 0;
    }

    conf.set("ForwardIndexPath", forwardIndexPath);

    conf.setJobName("BuildPostingsForwardIndex");

    Path inputPath = new Path(postingsPath);
    FileInputFormat.setInputPaths(conf, inputPath);

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(1);

    conf.set("mapred.child.java.opts", "-Xmx2048m");

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputFormat(NullOutputFormat.class);

    conf.setMapRunnerClass(MyMapRunner.class);
    conf.setReducerClass(MyReducer.class);

    JobClient.runJob(conf);

    return 0;
}

From source file:sa.edu.kaust.twitter.index.BuildTweetsForwardIndex.java

License:Apache License

/**
 * Runs this tool./*from w  w  w .  j a v  a 2  s . com*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();
        return -1;
    }

    JobConf conf = new JobConf(BuildTweetsForwardIndex.class);
    FileSystem fs = FileSystem.get(conf);

    int mapTasks = 10;
    sLogger.info("Tool: TweetsForwardIndex");

    String postingsPath = args[0];
    String forwardIndexPath = args[1];

    if (!fs.exists(new Path(postingsPath))) {
        sLogger.info("Error: IntDocVectors don't exist!");
        return 0;
    }

    // delete the output directory if it exists already
    //FileSystem.get(conf).delete(new Path(forwardIndexPath), true);
    if (fs.exists(new Path(forwardIndexPath))) {
        sLogger.info("PostingsForwardIndex already exists: skipping!");
        return 0;
    }

    conf.set("ForwardIndexPath", forwardIndexPath);

    conf.setJobName("BuildTweetsForwardIndex");

    Path inputPath = new Path(postingsPath);
    FileInputFormat.setInputPaths(conf, inputPath);

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(1);

    conf.set("mapred.child.java.opts", "-Xmx2048m");

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapOutputKeyClass(LongWritable.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputFormat(NullOutputFormat.class);

    conf.setMapRunnerClass(MyMapRunner.class);
    conf.setReducerClass(MyReducer.class);

    JobClient.runJob(conf);

    return 0;
}

From source file:sg.edu.astar.dsi.mergespill.App.java

public synchronized static void doProcess(String directory, int spillNumber)
        throws IOException, InterruptedException {
    // TODO code application logic here
    System.out.println("directory: " + directory);
    System.out.println("numberOfSpill: " + spillNumber);
    //SETUP/*  w w w  .j av a2s .  c  om*/
    JobConf job = new JobConf();
    //job.setMapOutputKeyClass(Text.class);
    job.setMapOutputKeyClass(TextDsi.class);
    job.setMapOutputValueClass(IntWritable.class);
    //Class<Text> keyClass = (Class<Text>)job.getMapOutputKeyClass();
    Class<TextDsi> keyClass = (Class<TextDsi>) job.getMapOutputKeyClass();
    Class<IntWritable> valClass = (Class<IntWritable>) job.getMapOutputValueClass();
    FileSystem rfs;
    CompressionCodec codec = null;
    Counters.Counter spilledRecordsCounter = null;
    rfs = ((LocalFileSystem) FileSystem.getLocal(job)).getRaw();

    while (!new File(directory).isDirectory()) {
        sleep(5000);
    }

    if (new File(directory).isDirectory()) {
        ArrayList<Path> spillFile = new ArrayList();
        ArrayList<Path> spillFileIndex = new ArrayList();

        App myApp;
        myApp = new App();

        myApp.getSpillFilesAndIndices(new File(directory), spillFile, spillFileIndex, spillNumber);

        ArrayList<SpillRecord> indexCacheList = new ArrayList<>();
        int numSpills = 0;

        Iterator itrSpillFileIndex = spillFileIndex.iterator();
        while (itrSpillFileIndex.hasNext()) {
            numSpills++;
            Path temp = (Path) itrSpillFileIndex.next();
            System.out.println(temp);
            SpillRecord sr = new SpillRecord(temp, job);
            indexCacheList.add(sr);

            System.out.println("indexFile partition size: " + sr.size());
            long startOffset = 0;
            for (int i = 0; i < sr.size(); i++) { //sr.size is the number of partitions
                IndexRecord ir = sr.getIndex(i);
                System.out.println("index[" + i + "] rawLength = " + ir.rawLength);
                System.out.println("index[" + i + "] partLength = " + ir.partLength);
                System.out.println("index[" + i + "] startOffset= " + ir.startOffset);
                startOffset = ir.startOffset;
            }
            System.out.println("========================================");
        }
        System.out.println("Number of spills: " + numSpills);
        //FinalOutputFile
        Path finalOutputFile = new Path(directory + File.separator + "FINALOUTPUTFILE");
        FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096);
        System.out.println("GOT HERE 1");
        Path finalIndexFile = new Path(directory + File.separator + "FINALOUTPUTFILE.index");

        //ONE PARTITION ONLY
        List<Segment<TextDsi, IntWritable>> segmentList = new ArrayList<>(numSpills);
        for (int i = 0; i < numSpills; i++) {
            IndexRecord theIndexRecord = indexCacheList.get(i).getIndex(0);
            Path temp = spillFileIndex.get(i);
            String temp1 = temp.toString();
            String temp2 = temp1.substring(0, temp1.length() - 6);
            //System.out.println(temp2);
            //System.out.println(new Path(temp2).getParent());
            //File myFile = new File(temp2);
            //System.out.println(myFile.getPath());
            Segment<TextDsi, IntWritable> s = new Segment<>(job, rfs, new Path(temp2),
                    theIndexRecord.startOffset, theIndexRecord.partLength, codec, true);
            segmentList.add(i, s);
        }
        System.out.println("GOT HERE 2");
        RawKeyValueIterator kvIter = Merger.merge(job, rfs, keyClass, valClass, null, segmentList, 4,
                new Path("/home/hduser/spillSample2/My"), job.getOutputKeyComparator(), null, false, null,
                spilledRecordsCounter, null, TaskType.MAP);
        System.out.println("GOT HERE 3");
        //write merged output to disk
        long segmentStart = finalOut.getPos();
        FSDataOutputStream finalPartitionOut = CryptoUtils.wrapIfNecessary(job, finalOut);
        Writer<TextDsi, IntWritable> writer = new Writer<TextDsi, IntWritable>(job, finalPartitionOut,
                TextDsi.class, IntWritable.class, codec, spilledRecordsCounter);
        System.out.println("GOT HERE 4");
        Merger.writeFile(kvIter, writer, null, job);
        writer.close();
        finalOut.close();
        System.out.println("GOT HERE 5");

        IndexRecord rec = new IndexRecord();
        final SpillRecord spillRec = new SpillRecord(1);
        rec.startOffset = segmentStart;
        rec.rawLength = writer.getRawLength() + CryptoUtils.cryptoPadding(job);
        rec.partLength = writer.getCompressedLength() + CryptoUtils.cryptoPadding(job);
        System.out.println("rec.startOffset: " + rec.startOffset);
        System.out.println("rec.rawLength  : " + rec.rawLength);
        System.out.println("rec.partLength : " + rec.partLength);
        spillRec.putIndex(rec, 0);
        spillRec.writeToFile(finalIndexFile, job);
        System.out.println("GOT HERE 6");

    } else {
        System.out.println("argument is not a directory! : " + directory);
    }

}

From source file:thinkbig.hadoop.inputformat.TestDocumentInputFormat.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JobConf job = new JobConf();
    job.setInputFormat(DocumentInputFormat.class);
    job.set("docinput.prepend.key", "TRUE");
    DocumentInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapOutputKeyClass(Text.class);
    JobClient.runJob(job);/*ww  w  .j av a 2  s . c  o  m*/
    return 0;
}

From source file:TVA.Hadoop.MapReduce.Development.Test_RecordReader_Alt.java

/**
 * The main driver for word count map/reduce program.
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the 
 *                     job tracker.//from   w  ww.j a va2s  .c o  m
 */
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), Test_RecordReader_Alt.class);
    conf.setJobName("Test_RecordReader_Alt");

    // the keys are words (strings)
    //conf.setOutputKeyClass(IntWritable.class);
    //conf.setOutputValueClass(DoubleWritable.class);

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(StandardPointFile.class);

    conf.set("gov.tva.mapreduce.AverageFrequency.connectionstring",
            "jdbc:sqlserver://rgocdsql:1433; databaseName=PhasorMeasurementData;user=NaspiApp;password=pw4site;");
    conf.set("gov.tva.mapreduce.AverageFrequency.HistorianID", "2");

    conf.setMapperClass(MapClass.class);
    //conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(DatAware_InputFormat.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }

    /*
     * at this point, we need to check for a parameter that represents the id
     * of any other info we may need to view
     * --- then set the parameter in the job configuration
     *       ex: conf.set( "gov.tva.AvgFreq.Company.ID", other_args.get( n ) );
     */

    FileInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}

From source file:TVA.Hadoop.Samples.TestRecordReader.java

/**
 * The main driver for word count map/reduce program.
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the 
 *                     job tracker./* w w w. ja  v  a  2s  .  c om*/
 */
public int run(String[] args) throws Exception {

    JobConf conf = new JobConf(getConf(), TestRecordReader.class);
    conf.setJobName("TestRecordReader");

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(StandardPointFile.class);

    conf.setMapperClass(MapClass.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(HistorianInputFormat.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }

    FileInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);

    return 0;
}