Example usage for org.apache.hadoop.mapred JobConf setMapOutputCompressorClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setMapOutputCompressorClass.

Prototype

public void setMapOutputCompressorClass(Class<? extends CompressionCodec> codecClass)

Source Link

Document

Set the given class as the CompressionCodec for the map outputs.

Usage

From source file:org.sf.xrime.algorithms.transform.vertex.OutAdjVertex2AdjSetVertexTransformer.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    JobConf jobConf = new JobConf(conf, OutAdjVertex2AdjSetVertexTransformer.class);
    jobConf.setJobName("OutAdjVertex2AdjSetVertexTransformer");

    // the keys are vertex identifiers (strings)
    jobConf.setOutputKeyClass(Text.class);
    // the values are adjacent vertexes with labels (Writable)
    jobConf.setOutputValueClass(AdjSetVertex.class);
    jobConf.setMapperClass(MapClass.class);
    // no combiner is needed.
    jobConf.setReducerClass(ReduceClass.class);
    // makes the file format suitable for machine processing.
    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);
    // Enable compression.
    jobConf.setCompressMapOutput(true);/*from www.j  a va2s. c om*/
    jobConf.setMapOutputCompressorClass(GzipCodec.class);
    FileInputFormat.setInputPaths(jobConf, srcPath);
    FileOutputFormat.setOutputPath(jobConf, destPath);
    jobConf.setNumMapTasks(mapperNum);
    jobConf.setNumReduceTasks(reducerNum);

    try {
        this.runningJob = JobClient.runJob(jobConf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.sf.xrime.algorithms.transform.vertex.OutAdjVertex2LabeledSetWithLabelTransformer.java

License:Apache License

@SuppressWarnings("deprecation")
@Override//from   w  ww .j ava 2s  .com
public void execute() throws ProcessorExecutionException {
    // Create a JobConf with default settings.
    JobConf jobConf = new JobConf(conf, OutAdjVertex2LabeledSetWithLabelTransformer.class);
    jobConf.setJobName("OutAdjVertex2LabeledSetWithLabelTransformer");

    // the keys are vertex identifiers (strings)
    jobConf.setOutputKeyClass(Text.class);
    // the values are adjacent vertexes with labels (Writable)
    jobConf.setOutputValueClass(LabeledAdjSetVertexWithTwoHopLabel.class);

    jobConf.setMapperClass(MapClass.class);
    jobConf.setCombinerClass(ReduceClass.class);
    jobConf.setReducerClass(ReduceClass.class);

    // makes the file format suitable for machine processing.
    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    // Enable compression.
    jobConf.setCompressMapOutput(true);
    jobConf.setMapOutputCompressorClass(GzipCodec.class);

    FileInputFormat.setInputPaths(jobConf, srcPath);
    FileOutputFormat.setOutputPath(jobConf, destPath);

    jobConf.setNumMapTasks(mapperNum);
    jobConf.setNumReduceTasks(reducerNum);

    try {
        this.runningJob = JobClient.runJob(jobConf);
        System.out.println("Output Node Num. =" + this.runningJob.getCounters().getCounter(Counter.VertexNum));
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.sf.xrime.algorithms.transform.vertex.OutAdjVertex2SetWithLabelTransformer.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    // Create a JobConf with default settings.
    JobConf jobConf = new JobConf(conf, OutAdjVertex2SetWithLabelTransformer.class);
    jobConf.setJobName("OutAdjVertex2AdjSetVertexWithLabelTransformer");

    // the keys are vertex identifiers (strings)
    jobConf.setOutputKeyClass(Text.class);
    // the values are adjacent vertexes with labels (Writable)
    jobConf.setOutputValueClass(AdjSetVertexWithTwoHopLabel.class);

    jobConf.setMapperClass(MapClass.class);
    jobConf.setCombinerClass(ReduceClass.class);
    jobConf.setReducerClass(ReduceClass.class);

    // makes the file format suitable for machine processing.
    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    // Enable compression.
    jobConf.setCompressMapOutput(true);/*from   w ww. j  av  a  2  s  .  com*/
    jobConf.setMapOutputCompressorClass(GzipCodec.class);

    FileInputFormat.setInputPaths(jobConf, srcPath);
    FileOutputFormat.setOutputPath(jobConf, destPath);

    jobConf.setNumMapTasks(mapperNum);
    jobConf.setNumReduceTasks(reducerNum);

    try {
        this.runningJob = JobClient.runJob(jobConf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.sf.xrime.algorithms.transform.vertex.OutAdjVertex2StrongLabeledSWLTransformer.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    // Create a JobConf with default settings.
    JobConf jobConf = new JobConf(conf, OutAdjVertex2StrongLabeledSWLTransformer.class);
    jobConf.setJobName("OutAdjVertex2StrongLabeledSWLTransformer");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(LabeledAdjSetVertex.class);

    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(LabeledAdjSetVertexWithTwoHopLabel.class);

    jobConf.setMapperClass(MapClass.class);
    jobConf.setReducerClass(ReduceClass.class);

    // makes the file format suitable for machine processing.
    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    // Enable compression.
    jobConf.setCompressMapOutput(true);/* w w  w. j av a2 s  . co m*/
    jobConf.setMapOutputCompressorClass(GzipCodec.class);

    FileInputFormat.setInputPaths(jobConf, srcPath);
    FileOutputFormat.setOutputPath(jobConf, destPath);

    jobConf.setNumMapTasks(mapperNum);
    jobConf.setNumReduceTasks(reducerNum);

    try {
        this.runningJob = JobClient.runJob(jobConf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.sf.xrime.algorithms.transform.vertex.OutAdjVertex2StrongSetWithLabelTransformer.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    // Create a JobConf with default settings.
    JobConf jobConf = new JobConf(conf, OutAdjVertex2StrongSetWithLabelTransformer.class);
    jobConf.setJobName("OutAdjVertex2StrongSetWithLabelTransformer");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(LabeledAdjSetVertex.class);

    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(AdjSetVertexWithTwoHopLabel.class);

    jobConf.setMapperClass(MapClass.class);
    jobConf.setReducerClass(ReduceClass.class);

    // makes the file format suitable for machine processing.
    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    // Enable compression.
    jobConf.setCompressMapOutput(true);/*from   w  w  w.  j a  v  a 2 s. c  om*/
    jobConf.setMapOutputCompressorClass(GzipCodec.class);

    FileInputFormat.setInputPaths(jobConf, srcPath);
    FileOutputFormat.setOutputPath(jobConf, destPath);

    jobConf.setNumMapTasks(mapperNum);
    jobConf.setNumReduceTasks(reducerNum);

    try {
        this.runningJob = JobClient.runJob(jobConf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.terrier.applications.HadoopIndexing.java

License:Mozilla Public License

/** Starts the MapReduce indexing.
 * @param args/*w w  w.j ava 2 s  .  com*/
 * @throws Exception
 */
public static void main(String[] args) throws Exception {
    long time = System.currentTimeMillis();

    boolean docPartitioned = false;
    int numberOfReducers = Integer
            .parseInt(ApplicationSetup.getProperty("terrier.hadoop.indexing.reducers", "26"));
    final HadoopPlugin.JobFactory jf = HadoopPlugin.getJobFactory("HOD-TerrierIndexing");
    if (args.length == 2 && args[0].equals("-p")) {
        logger.info("Document-partitioned Mode, " + numberOfReducers + " output indices.");
        numberOfReducers = Integer.parseInt(args[1]);
        docPartitioned = true;
    } else if (args.length == 1 && args[0].equals("--merge")) {
        if (numberOfReducers > 1)
            mergeLexiconInvertedFiles(ApplicationSetup.TERRIER_INDEX_PATH, numberOfReducers);
        else
            logger.error("No point merging 1 reduce task output");
        return;
    } else if (args.length == 0) {
        logger.info("Term-partitioned Mode, " + numberOfReducers + " reducers creating one inverted index.");
        docPartitioned = false;
        if (numberOfReducers > MAX_REDUCE) {
            logger.warn("Excessive reduce tasks (" + numberOfReducers + ") in use "
                    + "- SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm can use " + MAX_REDUCE + " at most");
        }
    } else {
        logger.fatal(usage());
        return;
    }

    if (!(CompressionFactory.getCompressionConfiguration("inverted", new String[0],
            false) instanceof BitCompressionConfiguration)) {
        logger.error("Sorry, only default BitCompressionConfiguration is supported by HadoopIndexing"
                + " - you can recompress the inverted index later using IndexRecompressor");
        return;
    }

    if (jf == null)
        throw new Exception("Could not get JobFactory from HadoopPlugin");
    final JobConf conf = jf.newJob();
    conf.setJobName("terrierIndexing");
    if (Files.exists(ApplicationSetup.TERRIER_INDEX_PATH)
            && Index.existsIndex(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX)) {
        logger.fatal("Cannot index while index exists at " + ApplicationSetup.TERRIER_INDEX_PATH + ","
                + ApplicationSetup.TERRIER_INDEX_PREFIX);
        return;
    }

    boolean blockIndexing = ApplicationSetup.BLOCK_INDEXING;
    if (blockIndexing) {
        conf.setMapperClass(Hadoop_BlockSinglePassIndexer.class);
        conf.setReducerClass(Hadoop_BlockSinglePassIndexer.class);
    } else {
        conf.setMapperClass(Hadoop_BasicSinglePassIndexer.class);
        conf.setReducerClass(Hadoop_BasicSinglePassIndexer.class);
    }
    FileOutputFormat.setOutputPath(conf, new Path(ApplicationSetup.TERRIER_INDEX_PATH));
    conf.set("indexing.hadoop.prefix", ApplicationSetup.TERRIER_INDEX_PREFIX);
    conf.setMapOutputKeyClass(SplitEmittedTerm.class);
    conf.setMapOutputValueClass(MapEmittedPostingList.class);
    conf.setBoolean("indexing.hadoop.multiple.indices", docPartitioned);

    if (!conf.get("mapred.job.tracker").equals("local")) {
        conf.setMapOutputCompressorClass(GzipCodec.class);
        conf.setCompressMapOutput(true);
    } else {
        conf.setCompressMapOutput(false);
    }

    conf.setInputFormat(MultiFileCollectionInputFormat.class);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setOutputKeyComparatorClass(SplitEmittedTerm.SETRawComparatorTermSplitFlush.class);
    conf.setOutputValueGroupingComparator(SplitEmittedTerm.SETRawComparatorTerm.class);
    conf.setReduceSpeculativeExecution(false);
    //parse the collection.spec
    BufferedReader specBR = Files.openFileReader(ApplicationSetup.COLLECTION_SPEC);
    String line = null;
    List<Path> paths = new ArrayList<Path>();
    while ((line = specBR.readLine()) != null) {
        if (line.startsWith("#"))
            continue;
        paths.add(new Path(line));
    }
    specBR.close();
    FileInputFormat.setInputPaths(conf, paths.toArray(new Path[paths.size()]));
    conf.setNumReduceTasks(numberOfReducers);
    if (numberOfReducers > 1) {
        if (docPartitioned)
            conf.setPartitionerClass(SplitEmittedTerm.SETPartitioner.class);
        else
            conf.setPartitionerClass(SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm.class);
    } else {
        //for JUnit tests, we seem to need to restore the original partitioner class
        conf.setPartitionerClass(HashPartitioner.class);
    }

    JobID jobId = null;
    boolean ranOK = true;
    try {
        RunningJob rj = JobClient.runJob(conf);
        jobId = rj.getID();
        HadoopUtility.finishTerrierJob(conf);
    } catch (Exception e) {
        logger.error("Problem running job", e);
        ranOK = false;
    }
    if (jobId != null) {
        deleteTaskFiles(ApplicationSetup.TERRIER_INDEX_PATH, jobId);
    }
    if (ranOK) {
        if (!docPartitioned) {
            if (numberOfReducers > 1)
                mergeLexiconInvertedFiles(ApplicationSetup.TERRIER_INDEX_PATH, numberOfReducers);
        }

        Hadoop_BasicSinglePassIndexer.finish(ApplicationSetup.TERRIER_INDEX_PATH,
                docPartitioned ? numberOfReducers : 1, jf);
    }
    System.out.println("Time Taken = " + ((System.currentTimeMillis() - time) / 1000) + " seconds");
    jf.close();
}

From source file:org.terrier.utility.io.HadoopUtility.java

License:Mozilla Public License

/** Utility method to set MapOutputCompression if possible.
 * In general, I find that MapOutputCompression fails for
 * local job trackers, so this code checks the job tracker
 * location first.//ww w.j av  a  2 s  .  c o  m
 * @param conf JobConf of job.
 * @return true if MapOutputCompression was set.
 */
public static boolean setMapOutputCompression(JobConf conf) {
    if (!conf.get("mapred.job.tracker").equals("local")) {
        conf.setMapOutputCompressorClass(GzipCodec.class);
        conf.setCompressMapOutput(true);
        return true;
    }
    return false;
}