Example usage for org.apache.hadoop.mapred JobConf setMapOutputCompressorClass

List of usage examples for org.apache.hadoop.mapred JobConf setMapOutputCompressorClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setMapOutputCompressorClass.

Prototype

public void setMapOutputCompressorClass(Class<? extends CompressionCodec> codecClass) 

Source Link

Document

Set the given class as the CompressionCodec for the map outputs.

Usage

From source file:org.sf.xrime.algorithms.transform.vertex.OutAdjVertex2AdjSetVertexTransformer.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    JobConf jobConf = new JobConf(conf, OutAdjVertex2AdjSetVertexTransformer.class);
    jobConf.setJobName("OutAdjVertex2AdjSetVertexTransformer");

    // the keys are vertex identifiers (strings)
    jobConf.setOutputKeyClass(Text.class);
    // the values are adjacent vertexes with labels (Writable)
    jobConf.setOutputValueClass(AdjSetVertex.class);
    jobConf.setMapperClass(MapClass.class);
    // no combiner is needed.
    jobConf.setReducerClass(ReduceClass.class);
    // makes the file format suitable for machine processing.
    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);
    // Enable compression.
    jobConf.setCompressMapOutput(true);/*from www.j  a va2s. c om*/
    jobConf.setMapOutputCompressorClass(GzipCodec.class);
    FileInputFormat.setInputPaths(jobConf, srcPath);
    FileOutputFormat.setOutputPath(jobConf, destPath);
    jobConf.setNumMapTasks(mapperNum);
    jobConf.setNumReduceTasks(reducerNum);

    try {
        this.runningJob = JobClient.runJob(jobConf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.sf.xrime.algorithms.transform.vertex.OutAdjVertex2LabeledSetWithLabelTransformer.java

License:Apache License

@SuppressWarnings("deprecation")
@Override//from   w  ww .j ava 2s  .com
public void execute() throws ProcessorExecutionException {
    // Create a JobConf with default settings.
    JobConf jobConf = new JobConf(conf, OutAdjVertex2LabeledSetWithLabelTransformer.class);
    jobConf.setJobName("OutAdjVertex2LabeledSetWithLabelTransformer");

    // the keys are vertex identifiers (strings)
    jobConf.setOutputKeyClass(Text.class);
    // the values are adjacent vertexes with labels (Writable)
    jobConf.setOutputValueClass(LabeledAdjSetVertexWithTwoHopLabel.class);

    jobConf.setMapperClass(MapClass.class);
    jobConf.setCombinerClass(ReduceClass.class);
    jobConf.setReducerClass(ReduceClass.class);

    // makes the file format suitable for machine processing.
    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    // Enable compression.
    jobConf.setCompressMapOutput(true);
    jobConf.setMapOutputCompressorClass(GzipCodec.class);

    FileInputFormat.setInputPaths(jobConf, srcPath);
    FileOutputFormat.setOutputPath(jobConf, destPath);

    jobConf.setNumMapTasks(mapperNum);
    jobConf.setNumReduceTasks(reducerNum);

    try {
        this.runningJob = JobClient.runJob(jobConf);
        System.out.println("Output Node Num. =" + this.runningJob.getCounters().getCounter(Counter.VertexNum));
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.sf.xrime.algorithms.transform.vertex.OutAdjVertex2SetWithLabelTransformer.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    // Create a JobConf with default settings.
    JobConf jobConf = new JobConf(conf, OutAdjVertex2SetWithLabelTransformer.class);
    jobConf.setJobName("OutAdjVertex2AdjSetVertexWithLabelTransformer");

    // the keys are vertex identifiers (strings)
    jobConf.setOutputKeyClass(Text.class);
    // the values are adjacent vertexes with labels (Writable)
    jobConf.setOutputValueClass(AdjSetVertexWithTwoHopLabel.class);

    jobConf.setMapperClass(MapClass.class);
    jobConf.setCombinerClass(ReduceClass.class);
    jobConf.setReducerClass(ReduceClass.class);

    // makes the file format suitable for machine processing.
    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    // Enable compression.
    jobConf.setCompressMapOutput(true);/*from   w ww. j  av  a  2  s  .  com*/
    jobConf.setMapOutputCompressorClass(GzipCodec.class);

    FileInputFormat.setInputPaths(jobConf, srcPath);
    FileOutputFormat.setOutputPath(jobConf, destPath);

    jobConf.setNumMapTasks(mapperNum);
    jobConf.setNumReduceTasks(reducerNum);

    try {
        this.runningJob = JobClient.runJob(jobConf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.sf.xrime.algorithms.transform.vertex.OutAdjVertex2StrongLabeledSWLTransformer.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    // Create a JobConf with default settings.
    JobConf jobConf = new JobConf(conf, OutAdjVertex2StrongLabeledSWLTransformer.class);
    jobConf.setJobName("OutAdjVertex2StrongLabeledSWLTransformer");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(LabeledAdjSetVertex.class);

    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(LabeledAdjSetVertexWithTwoHopLabel.class);

    jobConf.setMapperClass(MapClass.class);
    jobConf.setReducerClass(ReduceClass.class);

    // makes the file format suitable for machine processing.
    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    // Enable compression.
    jobConf.setCompressMapOutput(true);/* w w  w. j av a2 s  . co m*/
    jobConf.setMapOutputCompressorClass(GzipCodec.class);

    FileInputFormat.setInputPaths(jobConf, srcPath);
    FileOutputFormat.setOutputPath(jobConf, destPath);

    jobConf.setNumMapTasks(mapperNum);
    jobConf.setNumReduceTasks(reducerNum);

    try {
        this.runningJob = JobClient.runJob(jobConf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.sf.xrime.algorithms.transform.vertex.OutAdjVertex2StrongSetWithLabelTransformer.java

License:Apache License

@Override
public void execute() throws ProcessorExecutionException {
    // Create a JobConf with default settings.
    JobConf jobConf = new JobConf(conf, OutAdjVertex2StrongSetWithLabelTransformer.class);
    jobConf.setJobName("OutAdjVertex2StrongSetWithLabelTransformer");

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(LabeledAdjSetVertex.class);

    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(AdjSetVertexWithTwoHopLabel.class);

    jobConf.setMapperClass(MapClass.class);
    jobConf.setReducerClass(ReduceClass.class);

    // makes the file format suitable for machine processing.
    jobConf.setInputFormat(SequenceFileInputFormat.class);
    jobConf.setOutputFormat(SequenceFileOutputFormat.class);

    // Enable compression.
    jobConf.setCompressMapOutput(true);/*from   w  w  w.  j a  v  a 2 s. c  om*/
    jobConf.setMapOutputCompressorClass(GzipCodec.class);

    FileInputFormat.setInputPaths(jobConf, srcPath);
    FileOutputFormat.setOutputPath(jobConf, destPath);

    jobConf.setNumMapTasks(mapperNum);
    jobConf.setNumReduceTasks(reducerNum);

    try {
        this.runningJob = JobClient.runJob(jobConf);
    } catch (IOException e) {
        throw new ProcessorExecutionException(e);
    }
}

From source file:org.terrier.applications.HadoopIndexing.java

License:Mozilla Public License

/** Starts the MapReduce indexing.
 * @param args/*w w  w.j ava 2 s  .  com*/
 * @throws Exception
 */
public static void main(String[] args) throws Exception {
    long time = System.currentTimeMillis();

    boolean docPartitioned = false;
    int numberOfReducers = Integer
            .parseInt(ApplicationSetup.getProperty("terrier.hadoop.indexing.reducers", "26"));
    final HadoopPlugin.JobFactory jf = HadoopPlugin.getJobFactory("HOD-TerrierIndexing");
    if (args.length == 2 && args[0].equals("-p")) {
        logger.info("Document-partitioned Mode, " + numberOfReducers + " output indices.");
        numberOfReducers = Integer.parseInt(args[1]);
        docPartitioned = true;
    } else if (args.length == 1 && args[0].equals("--merge")) {
        if (numberOfReducers > 1)
            mergeLexiconInvertedFiles(ApplicationSetup.TERRIER_INDEX_PATH, numberOfReducers);
        else
            logger.error("No point merging 1 reduce task output");
        return;
    } else if (args.length == 0) {
        logger.info("Term-partitioned Mode, " + numberOfReducers + " reducers creating one inverted index.");
        docPartitioned = false;
        if (numberOfReducers > MAX_REDUCE) {
            logger.warn("Excessive reduce tasks (" + numberOfReducers + ") in use "
                    + "- SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm can use " + MAX_REDUCE + " at most");
        }
    } else {
        logger.fatal(usage());
        return;
    }

    if (!(CompressionFactory.getCompressionConfiguration("inverted", new String[0],
            false) instanceof BitCompressionConfiguration)) {
        logger.error("Sorry, only default BitCompressionConfiguration is supported by HadoopIndexing"
                + " - you can recompress the inverted index later using IndexRecompressor");
        return;
    }

    if (jf == null)
        throw new Exception("Could not get JobFactory from HadoopPlugin");
    final JobConf conf = jf.newJob();
    conf.setJobName("terrierIndexing");
    if (Files.exists(ApplicationSetup.TERRIER_INDEX_PATH)
            && Index.existsIndex(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX)) {
        logger.fatal("Cannot index while index exists at " + ApplicationSetup.TERRIER_INDEX_PATH + ","
                + ApplicationSetup.TERRIER_INDEX_PREFIX);
        return;
    }

    boolean blockIndexing = ApplicationSetup.BLOCK_INDEXING;
    if (blockIndexing) {
        conf.setMapperClass(Hadoop_BlockSinglePassIndexer.class);
        conf.setReducerClass(Hadoop_BlockSinglePassIndexer.class);
    } else {
        conf.setMapperClass(Hadoop_BasicSinglePassIndexer.class);
        conf.setReducerClass(Hadoop_BasicSinglePassIndexer.class);
    }
    FileOutputFormat.setOutputPath(conf, new Path(ApplicationSetup.TERRIER_INDEX_PATH));
    conf.set("indexing.hadoop.prefix", ApplicationSetup.TERRIER_INDEX_PREFIX);
    conf.setMapOutputKeyClass(SplitEmittedTerm.class);
    conf.setMapOutputValueClass(MapEmittedPostingList.class);
    conf.setBoolean("indexing.hadoop.multiple.indices", docPartitioned);

    if (!conf.get("mapred.job.tracker").equals("local")) {
        conf.setMapOutputCompressorClass(GzipCodec.class);
        conf.setCompressMapOutput(true);
    } else {
        conf.setCompressMapOutput(false);
    }

    conf.setInputFormat(MultiFileCollectionInputFormat.class);
    conf.setOutputFormat(NullOutputFormat.class);
    conf.setOutputKeyComparatorClass(SplitEmittedTerm.SETRawComparatorTermSplitFlush.class);
    conf.setOutputValueGroupingComparator(SplitEmittedTerm.SETRawComparatorTerm.class);
    conf.setReduceSpeculativeExecution(false);
    //parse the collection.spec
    BufferedReader specBR = Files.openFileReader(ApplicationSetup.COLLECTION_SPEC);
    String line = null;
    List<Path> paths = new ArrayList<Path>();
    while ((line = specBR.readLine()) != null) {
        if (line.startsWith("#"))
            continue;
        paths.add(new Path(line));
    }
    specBR.close();
    FileInputFormat.setInputPaths(conf, paths.toArray(new Path[paths.size()]));
    conf.setNumReduceTasks(numberOfReducers);
    if (numberOfReducers > 1) {
        if (docPartitioned)
            conf.setPartitionerClass(SplitEmittedTerm.SETPartitioner.class);
        else
            conf.setPartitionerClass(SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm.class);
    } else {
        //for JUnit tests, we seem to need to restore the original partitioner class
        conf.setPartitionerClass(HashPartitioner.class);
    }

    JobID jobId = null;
    boolean ranOK = true;
    try {
        RunningJob rj = JobClient.runJob(conf);
        jobId = rj.getID();
        HadoopUtility.finishTerrierJob(conf);
    } catch (Exception e) {
        logger.error("Problem running job", e);
        ranOK = false;
    }
    if (jobId != null) {
        deleteTaskFiles(ApplicationSetup.TERRIER_INDEX_PATH, jobId);
    }
    if (ranOK) {
        if (!docPartitioned) {
            if (numberOfReducers > 1)
                mergeLexiconInvertedFiles(ApplicationSetup.TERRIER_INDEX_PATH, numberOfReducers);
        }

        Hadoop_BasicSinglePassIndexer.finish(ApplicationSetup.TERRIER_INDEX_PATH,
                docPartitioned ? numberOfReducers : 1, jf);
    }
    System.out.println("Time Taken = " + ((System.currentTimeMillis() - time) / 1000) + " seconds");
    jf.close();
}

From source file:org.terrier.utility.io.HadoopUtility.java

License:Mozilla Public License

/** Utility method to set MapOutputCompression if possible.
 * In general, I find that MapOutputCompression fails for
 * local job trackers, so this code checks the job tracker
 * location first.//ww w.j av  a  2 s  .  c o  m
 * @param conf JobConf of job.
 * @return true if MapOutputCompression was set.
 */
public static boolean setMapOutputCompression(JobConf conf) {
    if (!conf.get("mapred.job.tracker").equals("local")) {
        conf.setMapOutputCompressorClass(GzipCodec.class);
        conf.setCompressMapOutput(true);
        return true;
    }
    return false;
}