Example usage for org.apache.hadoop.mapred JobConf setMapOutputKeyClass

List of usage examples for org.apache.hadoop.mapred JobConf setMapOutputKeyClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setMapOutputKeyClass.

Prototype

public void setMapOutputKeyClass(Class<?> theClass) 

Source Link

Document

Set the key class for the map output data.

Usage

From source file:PDI.Hadoop.Datamining.Tools.HistorianParser.java

/**
 * The main driver for historian map/reduce program. Invoke this method to
 * submit the map/reduce job./*  w w  w. ja  v  a2s .co  m*/
 * 
 * @throws IOException
 *         When there is communication problems with the job tracker.
 */
public int run(String[] args) throws Exception {

    JobConf conf = new JobConf(getConf(), HistorianParser.class);
    JobClient jobClient = new JobClient(conf);

    List<String> sourcePaths = new ArrayList<String>();

    String destPath = "";
    String currentDate = DateUtils.getCurrentDateString();
    String startTS = "";
    String endTS = "";
    String pointIDS = "";
    String outputSize = "";

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(StandardPointFile.class);
    conf.setMapperClass(MapClass.class);
    conf.setReducerClass(ReduceClass.class);
    conf.setInputFormat(HistorianInputFormat.class);

    conf.set("compression", "no");
    conf.set("filePrefix", "devarchive_archive_");

    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else if ("-startTS".equals(args[i])) {
                conf.set("startTS", args[++i]);
                startTS = args[i];
            } else if ("-endTS".equals(args[i])) {
                conf.set("endTS", args[++i]);
                endTS = args[i];
            } else if ("-pointIDS".equals(args[i])) {
                conf.set("pointIDS", args[++i]);
                pointIDS = args[i];
            } else if ("-outputMaxSize".equals(args[i])) {
                conf.set("outputSize", args[++i]);
                outputSize = args[i];
            } else if ("-sourcePATH".equals(args[i])) {
                String sourcePath = "" + args[++i];
                if (sourcePath.indexOf(',') == -1) {
                    sourcePaths.add(sourcePath);
                } else {
                    String[] paths = sourcePath.split(",");
                    for (int ii = 0; ii < paths.length; ii++) {
                        sourcePaths.add(paths[ii]);
                    }
                }
            } else if ("-destPATH".equals(args[i])) {
                destPath = "" + args[++i] + "/";
            } else if ("-compression".equals(args[i])) {
                conf.set("compression", args[++i]);
            } else if ("-filePrefix".equals(args[i])) {
                conf.set("filePrefix", args[++i]);
            } else if ("-v".equals(args[i])) {
                pdi_showVersion();
                return 0;
            } else if ("-verbose".equals(args[i])) {
                this.pdi_setVerbose(true);
            } else if ("-h".equals(args[i])) {
                return printUsage();
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }

    // Check for the user input parameters
    if ((0 == sourcePaths.size()) || destPath.equals("") || startTS.equals("") || endTS.equals("")
            || pointIDS.equals("") || outputSize.equals("") || (0 == conf.get("filePrefix").length())) {
        System.out.println("ERROR: Wrong input parameters.");
        return printUsage();
    }

    String startTime = DateUtils.unixTimestampToHumanReadableTime2(startTS);
    String endTime = DateUtils.unixTimestampToHumanReadableTime2(endTS);

    System.out.println("-------------------------------------------------------");
    System.out.println("jobName      : " + currentDate);
    System.out.println("filePrefix   : " + conf.get("filePrefix"));
    for (int i = 0; i < sourcePaths.size(); i++) {
        System.out.println("sourcePath[" + i + "]: " + sourcePaths.get(i));
    }
    System.out.println("destPath     : " + destPath);
    System.out.println("startTS      : " + startTS + " (" + startTime + ")");
    System.out.println("endTS        : " + endTS + " (" + endTime + ")");
    System.out.println("pointIDS     : " + pointIDS);
    System.out.println("outputMaxSize: " + outputSize + " MB");
    System.out.println("compression  : " + conf.get("compression"));
    System.out.println("-------------------------------------------------------");

    PathUtils utils = new PathUtils(this.pdi_isVerbose());
    if (false == utils.pdi_setRecursiveInputPaths(conf, sourcePaths, startTS, endTS)) {
        return -1;
    }

    // set output path to current time
    FileOutputFormat.setOutputPath(conf, utils.getOutputPath(destPath, currentDate));

    // set jobName to current time
    //      conf.setJobName(date.toString());
    conf.setJobName(currentDate);
    JobClient.runJob(conf); // run the job

    //      mergeAndCopyToLocal(conf, destPath);

    return 0;
}

From source file:pegasus.column_joiner.JoinTablePegasus.java

License:Apache License

protected JobConf configPass1() throws Exception {
    final JobConf conf = new JobConf(getConf(), JoinTablePegasus.class);
    conf.set("number_tables", "" + number_tables);
    conf.set("join_type", "" + join_type);

    conf.setJobName("JoinTablePegasus");

    conf.setMapperClass(MapPass1.class);
    conf.setReducerClass(RedPass1.class);

    int i = 1;//from www .  j  a v a  2 s.co m
    Iterator<Path> iter = input_paths.iterator();
    while (iter.hasNext()) {
        Path cur_path = iter.next();
        FileInputFormat.addInputPath(conf, cur_path);
        conf.set("path" + i, cur_path.toString());
        i++;
    }
    FileOutputFormat.setOutputPath(conf, output_path);

    final FileSystem fs = FileSystem.get(conf);
    fs.delete(output_path);

    conf.setNumReduceTasks(nreducer);

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    return conf;
}

From source file:ronchy.BigramCount.java

License:Apache License

/**
 * Runs this tool./*from  w  ww .j av a  2  s  .  c  o  m*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];

    int mapTasks = Integer.parseInt(args[2]);
    int reduceTasks = Integer.parseInt(args[3]);

    sLogger.info("Tool: BigramCount");
    sLogger.info(" - input path: " + inputPath);
    sLogger.info(" - output path: " + outputPath);
    sLogger.info(" - number of mappers: " + mapTasks);
    sLogger.info(" - number of reducers: " + reduceTasks);

    JobConf conf = new JobConf(BigramCount.class);
    conf.setJobName("BigramCount");

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    /**
     *  Note that these must match the Class arguments given in the mapper 
     */
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already
    Path outputDir = new Path(outputPath);
    FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    JobClient.runJob(conf);
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:sa.edu.kaust.fwindex.BuildIntDocVectorsForwardIndex.java

License:Apache License

/**
 * Runs this tool.//from   w  ww .ja va2  s  . co m
 */
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();
        return -1;
    }
    String inPath = args[0];
    String outPath = args[1];

    JobConf conf = new JobConf(getConf(), BuildIntDocVectorsForwardIndex.class);
    FileSystem fs = FileSystem.get(conf);

    int mapTasks = 10;
    sLogger.info("Tool: BuildIntDocVectorsIndex");

    String intDocVectorsPath = inPath;
    String forwardIndexPath = outPath;

    if (!fs.exists(new Path(intDocVectorsPath))) {
        sLogger.info("Error: IntDocVectors don't exist!");
        return 0;
    }

    if (fs.exists(new Path(forwardIndexPath))) {
        sLogger.info("IntDocVectorsForwardIndex already exists: skipping!");
        return 0;
    }

    conf.set("ForwardIndexPath", forwardIndexPath);

    conf.setJobName("BuildIntDocVectorsForwardIndex");

    Path inputPath = new Path(intDocVectorsPath);
    FileInputFormat.setInputPaths(conf, inputPath);

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(1);

    conf.set("mapred.child.java.opts", "-Xmx2048m");

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapOutputKeyClass(TermDF.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputFormat(NullOutputFormat.class);

    conf.setMapRunnerClass(MyMapRunner.class);
    conf.setReducerClass(MyReducer.class);

    JobClient.runJob(conf);

    return 0;
}

From source file:sa.edu.kaust.twitter.index.BuildPostingsForwardIndex.java

License:Apache License

/**
 * Runs this tool./*from w w  w.  j a  v a 2s .com*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();
        return -1;
    }

    JobConf conf = new JobConf(BuildPostingsForwardIndex.class);
    FileSystem fs = FileSystem.get(conf);

    int mapTasks = 10;
    sLogger.info("Tool: PostingsForwardIndex");

    String postingsPath = args[0];
    String forwardIndexPath = args[1];

    if (!fs.exists(new Path(postingsPath))) {
        sLogger.info("Error: IntDocVectors don't exist!");
        return 0;
    }

    // delete the output directory if it exists already
    //FileSystem.get(conf).delete(new Path(forwardIndexPath), true);
    if (fs.exists(new Path(forwardIndexPath))) {
        sLogger.info("PostingsForwardIndex already exists: skipping!");
        return 0;
    }

    conf.set("ForwardIndexPath", forwardIndexPath);

    conf.setJobName("BuildPostingsForwardIndex");

    Path inputPath = new Path(postingsPath);
    FileInputFormat.setInputPaths(conf, inputPath);

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(1);

    conf.set("mapred.child.java.opts", "-Xmx2048m");

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputFormat(NullOutputFormat.class);

    conf.setMapRunnerClass(MyMapRunner.class);
    conf.setReducerClass(MyReducer.class);

    JobClient.runJob(conf);

    return 0;
}

From source file:sa.edu.kaust.twitter.index.BuildTweetsForwardIndex.java

License:Apache License

/**
 * Runs this tool./*from w  w  w .  j a v  a 2  s . com*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();
        return -1;
    }

    JobConf conf = new JobConf(BuildTweetsForwardIndex.class);
    FileSystem fs = FileSystem.get(conf);

    int mapTasks = 10;
    sLogger.info("Tool: TweetsForwardIndex");

    String postingsPath = args[0];
    String forwardIndexPath = args[1];

    if (!fs.exists(new Path(postingsPath))) {
        sLogger.info("Error: IntDocVectors don't exist!");
        return 0;
    }

    // delete the output directory if it exists already
    //FileSystem.get(conf).delete(new Path(forwardIndexPath), true);
    if (fs.exists(new Path(forwardIndexPath))) {
        sLogger.info("PostingsForwardIndex already exists: skipping!");
        return 0;
    }

    conf.set("ForwardIndexPath", forwardIndexPath);

    conf.setJobName("BuildTweetsForwardIndex");

    Path inputPath = new Path(postingsPath);
    FileInputFormat.setInputPaths(conf, inputPath);

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(1);

    conf.set("mapred.child.java.opts", "-Xmx2048m");

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapOutputKeyClass(LongWritable.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputFormat(NullOutputFormat.class);

    conf.setMapRunnerClass(MyMapRunner.class);
    conf.setReducerClass(MyReducer.class);

    JobClient.runJob(conf);

    return 0;
}

From source file:sg.edu.astar.dsi.mergespill.App.java

public synchronized static void doProcess(String directory, int spillNumber)
        throws IOException, InterruptedException {
    // TODO code application logic here
    System.out.println("directory: " + directory);
    System.out.println("numberOfSpill: " + spillNumber);
    //SETUP/*  w w w  .j av a2s .  c  om*/
    JobConf job = new JobConf();
    //job.setMapOutputKeyClass(Text.class);
    job.setMapOutputKeyClass(TextDsi.class);
    job.setMapOutputValueClass(IntWritable.class);
    //Class<Text> keyClass = (Class<Text>)job.getMapOutputKeyClass();
    Class<TextDsi> keyClass = (Class<TextDsi>) job.getMapOutputKeyClass();
    Class<IntWritable> valClass = (Class<IntWritable>) job.getMapOutputValueClass();
    FileSystem rfs;
    CompressionCodec codec = null;
    Counters.Counter spilledRecordsCounter = null;
    rfs = ((LocalFileSystem) FileSystem.getLocal(job)).getRaw();

    while (!new File(directory).isDirectory()) {
        sleep(5000);
    }

    if (new File(directory).isDirectory()) {
        ArrayList<Path> spillFile = new ArrayList();
        ArrayList<Path> spillFileIndex = new ArrayList();

        App myApp;
        myApp = new App();

        myApp.getSpillFilesAndIndices(new File(directory), spillFile, spillFileIndex, spillNumber);

        ArrayList<SpillRecord> indexCacheList = new ArrayList<>();
        int numSpills = 0;

        Iterator itrSpillFileIndex = spillFileIndex.iterator();
        while (itrSpillFileIndex.hasNext()) {
            numSpills++;
            Path temp = (Path) itrSpillFileIndex.next();
            System.out.println(temp);
            SpillRecord sr = new SpillRecord(temp, job);
            indexCacheList.add(sr);

            System.out.println("indexFile partition size: " + sr.size());
            long startOffset = 0;
            for (int i = 0; i < sr.size(); i++) { //sr.size is the number of partitions
                IndexRecord ir = sr.getIndex(i);
                System.out.println("index[" + i + "] rawLength = " + ir.rawLength);
                System.out.println("index[" + i + "] partLength = " + ir.partLength);
                System.out.println("index[" + i + "] startOffset= " + ir.startOffset);
                startOffset = ir.startOffset;
            }
            System.out.println("========================================");
        }
        System.out.println("Number of spills: " + numSpills);
        //FinalOutputFile
        Path finalOutputFile = new Path(directory + File.separator + "FINALOUTPUTFILE");
        FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096);
        System.out.println("GOT HERE 1");
        Path finalIndexFile = new Path(directory + File.separator + "FINALOUTPUTFILE.index");

        //ONE PARTITION ONLY
        List<Segment<TextDsi, IntWritable>> segmentList = new ArrayList<>(numSpills);
        for (int i = 0; i < numSpills; i++) {
            IndexRecord theIndexRecord = indexCacheList.get(i).getIndex(0);
            Path temp = spillFileIndex.get(i);
            String temp1 = temp.toString();
            String temp2 = temp1.substring(0, temp1.length() - 6);
            //System.out.println(temp2);
            //System.out.println(new Path(temp2).getParent());
            //File myFile = new File(temp2);
            //System.out.println(myFile.getPath());
            Segment<TextDsi, IntWritable> s = new Segment<>(job, rfs, new Path(temp2),
                    theIndexRecord.startOffset, theIndexRecord.partLength, codec, true);
            segmentList.add(i, s);
        }
        System.out.println("GOT HERE 2");
        RawKeyValueIterator kvIter = Merger.merge(job, rfs, keyClass, valClass, null, segmentList, 4,
                new Path("/home/hduser/spillSample2/My"), job.getOutputKeyComparator(), null, false, null,
                spilledRecordsCounter, null, TaskType.MAP);
        System.out.println("GOT HERE 3");
        //write merged output to disk
        long segmentStart = finalOut.getPos();
        FSDataOutputStream finalPartitionOut = CryptoUtils.wrapIfNecessary(job, finalOut);
        Writer<TextDsi, IntWritable> writer = new Writer<TextDsi, IntWritable>(job, finalPartitionOut,
                TextDsi.class, IntWritable.class, codec, spilledRecordsCounter);
        System.out.println("GOT HERE 4");
        Merger.writeFile(kvIter, writer, null, job);
        writer.close();
        finalOut.close();
        System.out.println("GOT HERE 5");

        IndexRecord rec = new IndexRecord();
        final SpillRecord spillRec = new SpillRecord(1);
        rec.startOffset = segmentStart;
        rec.rawLength = writer.getRawLength() + CryptoUtils.cryptoPadding(job);
        rec.partLength = writer.getCompressedLength() + CryptoUtils.cryptoPadding(job);
        System.out.println("rec.startOffset: " + rec.startOffset);
        System.out.println("rec.rawLength  : " + rec.rawLength);
        System.out.println("rec.partLength : " + rec.partLength);
        spillRec.putIndex(rec, 0);
        spillRec.writeToFile(finalIndexFile, job);
        System.out.println("GOT HERE 6");

    } else {
        System.out.println("argument is not a directory! : " + directory);
    }

}

From source file:thinkbig.hadoop.inputformat.TestDocumentInputFormat.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    JobConf job = new JobConf();
    job.setInputFormat(DocumentInputFormat.class);
    job.set("docinput.prepend.key", "TRUE");
    DocumentInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.setMapOutputKeyClass(Text.class);
    JobClient.runJob(job);/*ww  w  .j av a 2  s . c  o  m*/
    return 0;
}

From source file:TVA.Hadoop.MapReduce.Development.Test_RecordReader_Alt.java

/**
 * The main driver for word count map/reduce program.
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the 
 *                     job tracker.//from   w  ww.j a va2s  .c o  m
 */
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), Test_RecordReader_Alt.class);
    conf.setJobName("Test_RecordReader_Alt");

    // the keys are words (strings)
    //conf.setOutputKeyClass(IntWritable.class);
    //conf.setOutputValueClass(DoubleWritable.class);

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(StandardPointFile.class);

    conf.set("gov.tva.mapreduce.AverageFrequency.connectionstring",
            "jdbc:sqlserver://rgocdsql:1433; databaseName=PhasorMeasurementData;user=NaspiApp;password=pw4site;");
    conf.set("gov.tva.mapreduce.AverageFrequency.HistorianID", "2");

    conf.setMapperClass(MapClass.class);
    //conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(DatAware_InputFormat.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }

    /*
     * at this point, we need to check for a parameter that represents the id
     * of any other info we may need to view
     * --- then set the parameter in the job configuration
     *       ex: conf.set( "gov.tva.AvgFreq.Company.ID", other_args.get( n ) );
     */

    FileInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}

From source file:TVA.Hadoop.Samples.TestRecordReader.java

/**
 * The main driver for word count map/reduce program.
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the 
 *                     job tracker./* w w w. ja  v  a  2s  .  c om*/
 */
public int run(String[] args) throws Exception {

    JobConf conf = new JobConf(getConf(), TestRecordReader.class);
    conf.setJobName("TestRecordReader");

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(StandardPointFile.class);

    conf.setMapperClass(MapClass.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(HistorianInputFormat.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }

    FileInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);

    return 0;
}