Example usage for org.apache.hadoop.mapred JobConf setMapOutputValueClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setMapOutputValueClass.

Prototype

public void setMapOutputValueClass(Class<?> theClass)

Source Link

Document

Set the value class for the map output data.

Usage

From source file:pegasus.matvec.MatvecNaive.java

License:Apache License

protected JobConf configPass1() throws Exception {
    final JobConf conf = new JobConf(getConf(), MatvecNaive.class);
    conf.set("number_nodes", "" + number_nodes);
    conf.set("makesym", "" + makesym);
    conf.set("transpose", "" + transpose);
    conf.set("ignore_weights", "" + ignore_weights);

    conf.setJobName("MatvecNaive_pass1");

    conf.setMapperClass(MapPass1.class);
    conf.setReducerClass(RedPass1.class);

    if (vector_path == null)
        FileInputFormat.setInputPaths(conf, edge_path);
    else/*from   ww  w. ja v  a2s  .c o m*/
        FileInputFormat.setInputPaths(conf, edge_path, vector_path);
    FileOutputFormat.setOutputPath(conf, tempmv_path);

    conf.setNumReduceTasks(nreducer);

    conf.setOutputKeyClass(LongWritable.class);
    conf.setOutputValueClass(DoubleWritable.class);
    conf.setMapOutputValueClass(Text.class);

    return conf;
}

From source file:pegasus.matvec.MatvecNaive.java

License:Apache License

protected JobConf configPass2() throws Exception {
    final JobConf conf = new JobConf(getConf(), MatvecNaive.class);
    conf.set("number_nodes", "" + number_nodes);

    conf.setJobName("MatvecNaive_pass2");

    conf.setMapperClass(MapPass2.class);
    conf.setReducerClass(RedPass2.class);

    FileInputFormat.setInputPaths(conf, tempmv_path);
    FileOutputFormat.setOutputPath(conf, output_path);

    conf.setNumReduceTasks(nreducer);/*from  ww w .ja va  2 s .  c  o  m*/

    conf.setOutputKeyClass(LongWritable.class);
    conf.setMapOutputValueClass(DoubleWritable.class);
    conf.setOutputValueClass(Text.class);

    return conf;
}

From source file:pegasus.matvec.MatvecPrep.java

License:Apache License

protected JobConf configStage1(String out_prefix) throws Exception {
    final JobConf conf = new JobConf(getConf(), MatvecPrep.class);
    conf.set("block_size", "" + block_size);
    conf.set("matrix_row", "" + number_nodes);
    conf.set("out_prefix", "" + out_prefix);
    conf.set("makesym", "" + makesym);
    conf.setJobName("MatvecPrep_Stage1");

    conf.setMapperClass(MapStage1.class);
    conf.setReducerClass(RedStage1.class);

    FileSystem fs = FileSystem.get(getConf());
    fs.delete(output_path, true);/*  ww w  . jav a 2  s  .com*/

    FileInputFormat.setInputPaths(conf, edge_path);
    FileOutputFormat.setOutputPath(conf, output_path);

    int num_reduce_tasks = nreducer;

    conf.setNumReduceTasks(num_reduce_tasks);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    return conf;
}

From source file:pegasus.pagerank.PagerankInitVector.java

License:Apache License

protected JobConf configStage1() throws Exception {
    final JobConf conf = new JobConf(getConf(), PagerankInitVector.class);
    conf.set("number_nodes", "" + number_nodes);
    conf.setJobName("PagerankInitVector_Stage1");

    conf.setMapperClass(MapStage1.class);
    conf.setReducerClass(RedStage1.class);

    FileInputFormat.setInputPaths(conf, initial_prinput_path);
    FileOutputFormat.setOutputPath(conf, output_path);

    conf.setNumReduceTasks(nreducers);// w  ww.ja  v a 2s  .c o m

    conf.setOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputValueClass(Text.class);

    return conf;
}

From source file:ronchy.BigramCount.java

License:Apache License

/**
 * Runs this tool./*from  www . j  a v  a 2s. c  o m*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        printUsage();
        return -1;
    }

    String inputPath = args[0];
    String outputPath = args[1];

    int mapTasks = Integer.parseInt(args[2]);
    int reduceTasks = Integer.parseInt(args[3]);

    sLogger.info("Tool: BigramCount");
    sLogger.info(" - input path: " + inputPath);
    sLogger.info(" - output path: " + outputPath);
    sLogger.info(" - number of mappers: " + mapTasks);
    sLogger.info(" - number of reducers: " + reduceTasks);

    JobConf conf = new JobConf(BigramCount.class);
    conf.setJobName("BigramCount");

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(reduceTasks);

    FileInputFormat.setInputPaths(conf, new Path(inputPath));
    FileOutputFormat.setOutputPath(conf, new Path(outputPath));
    FileOutputFormat.setCompressOutput(conf, false);

    /**
     *  Note that these must match the Class arguments given in the mapper 
     */
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setMapperClass(MyMapper.class);
    conf.setReducerClass(MyReducer.class);

    // Delete the output directory if it exists already
    Path outputDir = new Path(outputPath);
    FileSystem.get(outputDir.toUri(), conf).delete(outputDir, true);

    long startTime = System.currentTimeMillis();
    JobClient.runJob(conf);
    sLogger.info("Job Finished in " + (System.currentTimeMillis() - startTime) / 1000.0 + " seconds");

    return 0;
}

From source file:sa.edu.kaust.fwindex.BuildIntDocVectorsForwardIndex.java

License:Apache License

/**
 * Runs this tool.//from  w w w .  ja va2  s  .c o m
 */
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();
        return -1;
    }
    String inPath = args[0];
    String outPath = args[1];

    JobConf conf = new JobConf(getConf(), BuildIntDocVectorsForwardIndex.class);
    FileSystem fs = FileSystem.get(conf);

    int mapTasks = 10;
    sLogger.info("Tool: BuildIntDocVectorsIndex");

    String intDocVectorsPath = inPath;
    String forwardIndexPath = outPath;

    if (!fs.exists(new Path(intDocVectorsPath))) {
        sLogger.info("Error: IntDocVectors don't exist!");
        return 0;
    }

    if (fs.exists(new Path(forwardIndexPath))) {
        sLogger.info("IntDocVectorsForwardIndex already exists: skipping!");
        return 0;
    }

    conf.set("ForwardIndexPath", forwardIndexPath);

    conf.setJobName("BuildIntDocVectorsForwardIndex");

    Path inputPath = new Path(intDocVectorsPath);
    FileInputFormat.setInputPaths(conf, inputPath);

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(1);

    conf.set("mapred.child.java.opts", "-Xmx2048m");

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapOutputKeyClass(TermDF.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputFormat(NullOutputFormat.class);

    conf.setMapRunnerClass(MyMapRunner.class);
    conf.setReducerClass(MyReducer.class);

    JobClient.runJob(conf);

    return 0;
}

From source file:sa.edu.kaust.twitter.index.BuildPostingsForwardIndex.java

License:Apache License

/**
 * Runs this tool.//from  w  w  w. ja  v  a  2s.c  o  m
 */
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();
        return -1;
    }

    JobConf conf = new JobConf(BuildPostingsForwardIndex.class);
    FileSystem fs = FileSystem.get(conf);

    int mapTasks = 10;
    sLogger.info("Tool: PostingsForwardIndex");

    String postingsPath = args[0];
    String forwardIndexPath = args[1];

    if (!fs.exists(new Path(postingsPath))) {
        sLogger.info("Error: IntDocVectors don't exist!");
        return 0;
    }

    // delete the output directory if it exists already
    //FileSystem.get(conf).delete(new Path(forwardIndexPath), true);
    if (fs.exists(new Path(forwardIndexPath))) {
        sLogger.info("PostingsForwardIndex already exists: skipping!");
        return 0;
    }

    conf.set("ForwardIndexPath", forwardIndexPath);

    conf.setJobName("BuildPostingsForwardIndex");

    Path inputPath = new Path(postingsPath);
    FileInputFormat.setInputPaths(conf, inputPath);

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(1);

    conf.set("mapred.child.java.opts", "-Xmx2048m");

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputFormat(NullOutputFormat.class);

    conf.setMapRunnerClass(MyMapRunner.class);
    conf.setReducerClass(MyReducer.class);

    JobClient.runJob(conf);

    return 0;
}

From source file:sa.edu.kaust.twitter.index.BuildTweetsForwardIndex.java

License:Apache License

/**
 * Runs this tool./*  ww w.  j  a v  a2 s.  c  o m*/
 */
public int run(String[] args) throws Exception {
    if (args.length != 2) {
        printUsage();
        return -1;
    }

    JobConf conf = new JobConf(BuildTweetsForwardIndex.class);
    FileSystem fs = FileSystem.get(conf);

    int mapTasks = 10;
    sLogger.info("Tool: TweetsForwardIndex");

    String postingsPath = args[0];
    String forwardIndexPath = args[1];

    if (!fs.exists(new Path(postingsPath))) {
        sLogger.info("Error: IntDocVectors don't exist!");
        return 0;
    }

    // delete the output directory if it exists already
    //FileSystem.get(conf).delete(new Path(forwardIndexPath), true);
    if (fs.exists(new Path(forwardIndexPath))) {
        sLogger.info("PostingsForwardIndex already exists: skipping!");
        return 0;
    }

    conf.set("ForwardIndexPath", forwardIndexPath);

    conf.setJobName("BuildTweetsForwardIndex");

    Path inputPath = new Path(postingsPath);
    FileInputFormat.setInputPaths(conf, inputPath);

    conf.setNumMapTasks(mapTasks);
    conf.setNumReduceTasks(1);

    conf.set("mapred.child.java.opts", "-Xmx2048m");

    conf.setInputFormat(SequenceFileInputFormat.class);
    conf.setMapOutputKeyClass(LongWritable.class);
    conf.setMapOutputValueClass(Text.class);
    conf.setOutputFormat(NullOutputFormat.class);

    conf.setMapRunnerClass(MyMapRunner.class);
    conf.setReducerClass(MyReducer.class);

    JobClient.runJob(conf);

    return 0;
}

From source file:sg.edu.astar.dsi.mergespill.App.java

public synchronized static void doProcess(String directory, int spillNumber)
        throws IOException, InterruptedException {
    // TODO code application logic here
    System.out.println("directory: " + directory);
    System.out.println("numberOfSpill: " + spillNumber);
    //SETUP/*from ww  w .jav  a  2 s .co m*/
    JobConf job = new JobConf();
    //job.setMapOutputKeyClass(Text.class);
    job.setMapOutputKeyClass(TextDsi.class);
    job.setMapOutputValueClass(IntWritable.class);
    //Class<Text> keyClass = (Class<Text>)job.getMapOutputKeyClass();
    Class<TextDsi> keyClass = (Class<TextDsi>) job.getMapOutputKeyClass();
    Class<IntWritable> valClass = (Class<IntWritable>) job.getMapOutputValueClass();
    FileSystem rfs;
    CompressionCodec codec = null;
    Counters.Counter spilledRecordsCounter = null;
    rfs = ((LocalFileSystem) FileSystem.getLocal(job)).getRaw();

    while (!new File(directory).isDirectory()) {
        sleep(5000);
    }

    if (new File(directory).isDirectory()) {
        ArrayList<Path> spillFile = new ArrayList();
        ArrayList<Path> spillFileIndex = new ArrayList();

        App myApp;
        myApp = new App();

        myApp.getSpillFilesAndIndices(new File(directory), spillFile, spillFileIndex, spillNumber);

        ArrayList<SpillRecord> indexCacheList = new ArrayList<>();
        int numSpills = 0;

        Iterator itrSpillFileIndex = spillFileIndex.iterator();
        while (itrSpillFileIndex.hasNext()) {
            numSpills++;
            Path temp = (Path) itrSpillFileIndex.next();
            System.out.println(temp);
            SpillRecord sr = new SpillRecord(temp, job);
            indexCacheList.add(sr);

            System.out.println("indexFile partition size: " + sr.size());
            long startOffset = 0;
            for (int i = 0; i < sr.size(); i++) { //sr.size is the number of partitions
                IndexRecord ir = sr.getIndex(i);
                System.out.println("index[" + i + "] rawLength = " + ir.rawLength);
                System.out.println("index[" + i + "] partLength = " + ir.partLength);
                System.out.println("index[" + i + "] startOffset= " + ir.startOffset);
                startOffset = ir.startOffset;
            }
            System.out.println("========================================");
        }
        System.out.println("Number of spills: " + numSpills);
        //FinalOutputFile
        Path finalOutputFile = new Path(directory + File.separator + "FINALOUTPUTFILE");
        FSDataOutputStream finalOut = rfs.create(finalOutputFile, true, 4096);
        System.out.println("GOT HERE 1");
        Path finalIndexFile = new Path(directory + File.separator + "FINALOUTPUTFILE.index");

        //ONE PARTITION ONLY
        List<Segment<TextDsi, IntWritable>> segmentList = new ArrayList<>(numSpills);
        for (int i = 0; i < numSpills; i++) {
            IndexRecord theIndexRecord = indexCacheList.get(i).getIndex(0);
            Path temp = spillFileIndex.get(i);
            String temp1 = temp.toString();
            String temp2 = temp1.substring(0, temp1.length() - 6);
            //System.out.println(temp2);
            //System.out.println(new Path(temp2).getParent());
            //File myFile = new File(temp2);
            //System.out.println(myFile.getPath());
            Segment<TextDsi, IntWritable> s = new Segment<>(job, rfs, new Path(temp2),
                    theIndexRecord.startOffset, theIndexRecord.partLength, codec, true);
            segmentList.add(i, s);
        }
        System.out.println("GOT HERE 2");
        RawKeyValueIterator kvIter = Merger.merge(job, rfs, keyClass, valClass, null, segmentList, 4,
                new Path("/home/hduser/spillSample2/My"), job.getOutputKeyComparator(), null, false, null,
                spilledRecordsCounter, null, TaskType.MAP);
        System.out.println("GOT HERE 3");
        //write merged output to disk
        long segmentStart = finalOut.getPos();
        FSDataOutputStream finalPartitionOut = CryptoUtils.wrapIfNecessary(job, finalOut);
        Writer<TextDsi, IntWritable> writer = new Writer<TextDsi, IntWritable>(job, finalPartitionOut,
                TextDsi.class, IntWritable.class, codec, spilledRecordsCounter);
        System.out.println("GOT HERE 4");
        Merger.writeFile(kvIter, writer, null, job);
        writer.close();
        finalOut.close();
        System.out.println("GOT HERE 5");

        IndexRecord rec = new IndexRecord();
        final SpillRecord spillRec = new SpillRecord(1);
        rec.startOffset = segmentStart;
        rec.rawLength = writer.getRawLength() + CryptoUtils.cryptoPadding(job);
        rec.partLength = writer.getCompressedLength() + CryptoUtils.cryptoPadding(job);
        System.out.println("rec.startOffset: " + rec.startOffset);
        System.out.println("rec.rawLength  : " + rec.rawLength);
        System.out.println("rec.partLength : " + rec.partLength);
        spillRec.putIndex(rec, 0);
        spillRec.writeToFile(finalIndexFile, job);
        System.out.println("GOT HERE 6");

    } else {
        System.out.println("argument is not a directory! : " + directory);
    }

}

From source file:TVA.Hadoop.MapReduce.Development.Test_RecordReader_Alt.java

/**
 * The main driver for word count map/reduce program.
 * Invoke this method to submit the map/reduce job.
 * @throws IOException When there is communication problems with the 
 *                     job tracker./*from w  w w.j  a  v  a2  s . co m*/
 */
public int run(String[] args) throws Exception {
    JobConf conf = new JobConf(getConf(), Test_RecordReader_Alt.class);
    conf.setJobName("Test_RecordReader_Alt");

    // the keys are words (strings)
    //conf.setOutputKeyClass(IntWritable.class);
    //conf.setOutputValueClass(DoubleWritable.class);

    conf.setMapOutputKeyClass(IntWritable.class);
    conf.setMapOutputValueClass(StandardPointFile.class);

    conf.set("gov.tva.mapreduce.AverageFrequency.connectionstring",
            "jdbc:sqlserver://rgocdsql:1433; databaseName=PhasorMeasurementData;user=NaspiApp;password=pw4site;");
    conf.set("gov.tva.mapreduce.AverageFrequency.HistorianID", "2");

    conf.setMapperClass(MapClass.class);
    //conf.setCombinerClass(Reduce.class);
    conf.setReducerClass(Reduce.class);

    conf.setInputFormat(DatAware_InputFormat.class);

    List<String> other_args = new ArrayList<String>();
    for (int i = 0; i < args.length; ++i) {
        try {
            if ("-m".equals(args[i])) {
                conf.setNumMapTasks(Integer.parseInt(args[++i]));
            } else if ("-r".equals(args[i])) {
                conf.setNumReduceTasks(Integer.parseInt(args[++i]));
            } else {
                other_args.add(args[i]);
            }
        } catch (NumberFormatException except) {
            System.out.println("ERROR: Integer expected instead of " + args[i]);
            return printUsage();
        } catch (ArrayIndexOutOfBoundsException except) {
            System.out.println("ERROR: Required parameter missing from " + args[i - 1]);
            return printUsage();
        }
    }
    // Make sure there are exactly 2 parameters left.
    if (other_args.size() != 2) {
        System.out.println("ERROR: Wrong number of parameters: " + other_args.size() + " instead of 2.");
        return printUsage();
    }

    /*
     * at this point, we need to check for a parameter that represents the id
     * of any other info we may need to view
     * --- then set the parameter in the job configuration
     *       ex: conf.set( "gov.tva.AvgFreq.Company.ID", other_args.get( n ) );
     */

    FileInputFormat.setInputPaths(conf, other_args.get(0));
    FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

    JobClient.runJob(conf);
    return 0;
}