List of usage examples for org.apache.hadoop.mapred JobConf setMapOutputCompressorClass
public void setMapOutputCompressorClass(Class<? extends CompressionCodec> codecClass)
From source file:org.sf.xrime.algorithms.transform.vertex.OutAdjVertex2AdjSetVertexTransformer.java
License:Apache License
@Override public void execute() throws ProcessorExecutionException { JobConf jobConf = new JobConf(conf, OutAdjVertex2AdjSetVertexTransformer.class); jobConf.setJobName("OutAdjVertex2AdjSetVertexTransformer"); // the keys are vertex identifiers (strings) jobConf.setOutputKeyClass(Text.class); // the values are adjacent vertexes with labels (Writable) jobConf.setOutputValueClass(AdjSetVertex.class); jobConf.setMapperClass(MapClass.class); // no combiner is needed. jobConf.setReducerClass(ReduceClass.class); // makes the file format suitable for machine processing. jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); // Enable compression. jobConf.setCompressMapOutput(true);/*from www.j a va2s. c om*/ jobConf.setMapOutputCompressorClass(GzipCodec.class); FileInputFormat.setInputPaths(jobConf, srcPath); FileOutputFormat.setOutputPath(jobConf, destPath); jobConf.setNumMapTasks(mapperNum); jobConf.setNumReduceTasks(reducerNum); try { this.runningJob = JobClient.runJob(jobConf); } catch (IOException e) { throw new ProcessorExecutionException(e); } }
From source file:org.sf.xrime.algorithms.transform.vertex.OutAdjVertex2LabeledSetWithLabelTransformer.java
License:Apache License
@SuppressWarnings("deprecation") @Override//from w ww .j ava 2s .com public void execute() throws ProcessorExecutionException { // Create a JobConf with default settings. JobConf jobConf = new JobConf(conf, OutAdjVertex2LabeledSetWithLabelTransformer.class); jobConf.setJobName("OutAdjVertex2LabeledSetWithLabelTransformer"); // the keys are vertex identifiers (strings) jobConf.setOutputKeyClass(Text.class); // the values are adjacent vertexes with labels (Writable) jobConf.setOutputValueClass(LabeledAdjSetVertexWithTwoHopLabel.class); jobConf.setMapperClass(MapClass.class); jobConf.setCombinerClass(ReduceClass.class); jobConf.setReducerClass(ReduceClass.class); // makes the file format suitable for machine processing. jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); // Enable compression. jobConf.setCompressMapOutput(true); jobConf.setMapOutputCompressorClass(GzipCodec.class); FileInputFormat.setInputPaths(jobConf, srcPath); FileOutputFormat.setOutputPath(jobConf, destPath); jobConf.setNumMapTasks(mapperNum); jobConf.setNumReduceTasks(reducerNum); try { this.runningJob = JobClient.runJob(jobConf); System.out.println("Output Node Num. =" + this.runningJob.getCounters().getCounter(Counter.VertexNum)); } catch (IOException e) { throw new ProcessorExecutionException(e); } }
From source file:org.sf.xrime.algorithms.transform.vertex.OutAdjVertex2SetWithLabelTransformer.java
License:Apache License
@Override public void execute() throws ProcessorExecutionException { // Create a JobConf with default settings. JobConf jobConf = new JobConf(conf, OutAdjVertex2SetWithLabelTransformer.class); jobConf.setJobName("OutAdjVertex2AdjSetVertexWithLabelTransformer"); // the keys are vertex identifiers (strings) jobConf.setOutputKeyClass(Text.class); // the values are adjacent vertexes with labels (Writable) jobConf.setOutputValueClass(AdjSetVertexWithTwoHopLabel.class); jobConf.setMapperClass(MapClass.class); jobConf.setCombinerClass(ReduceClass.class); jobConf.setReducerClass(ReduceClass.class); // makes the file format suitable for machine processing. jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); // Enable compression. jobConf.setCompressMapOutput(true);/*from w ww. j av a 2 s . com*/ jobConf.setMapOutputCompressorClass(GzipCodec.class); FileInputFormat.setInputPaths(jobConf, srcPath); FileOutputFormat.setOutputPath(jobConf, destPath); jobConf.setNumMapTasks(mapperNum); jobConf.setNumReduceTasks(reducerNum); try { this.runningJob = JobClient.runJob(jobConf); } catch (IOException e) { throw new ProcessorExecutionException(e); } }
From source file:org.sf.xrime.algorithms.transform.vertex.OutAdjVertex2StrongLabeledSWLTransformer.java
License:Apache License
@Override public void execute() throws ProcessorExecutionException { // Create a JobConf with default settings. JobConf jobConf = new JobConf(conf, OutAdjVertex2StrongLabeledSWLTransformer.class); jobConf.setJobName("OutAdjVertex2StrongLabeledSWLTransformer"); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(LabeledAdjSetVertex.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(LabeledAdjSetVertexWithTwoHopLabel.class); jobConf.setMapperClass(MapClass.class); jobConf.setReducerClass(ReduceClass.class); // makes the file format suitable for machine processing. jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); // Enable compression. jobConf.setCompressMapOutput(true);/* w w w. j av a2 s . co m*/ jobConf.setMapOutputCompressorClass(GzipCodec.class); FileInputFormat.setInputPaths(jobConf, srcPath); FileOutputFormat.setOutputPath(jobConf, destPath); jobConf.setNumMapTasks(mapperNum); jobConf.setNumReduceTasks(reducerNum); try { this.runningJob = JobClient.runJob(jobConf); } catch (IOException e) { throw new ProcessorExecutionException(e); } }
From source file:org.sf.xrime.algorithms.transform.vertex.OutAdjVertex2StrongSetWithLabelTransformer.java
License:Apache License
@Override public void execute() throws ProcessorExecutionException { // Create a JobConf with default settings. JobConf jobConf = new JobConf(conf, OutAdjVertex2StrongSetWithLabelTransformer.class); jobConf.setJobName("OutAdjVertex2StrongSetWithLabelTransformer"); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(LabeledAdjSetVertex.class); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(AdjSetVertexWithTwoHopLabel.class); jobConf.setMapperClass(MapClass.class); jobConf.setReducerClass(ReduceClass.class); // makes the file format suitable for machine processing. jobConf.setInputFormat(SequenceFileInputFormat.class); jobConf.setOutputFormat(SequenceFileOutputFormat.class); // Enable compression. jobConf.setCompressMapOutput(true);/*from w w w. j a v a 2 s. c om*/ jobConf.setMapOutputCompressorClass(GzipCodec.class); FileInputFormat.setInputPaths(jobConf, srcPath); FileOutputFormat.setOutputPath(jobConf, destPath); jobConf.setNumMapTasks(mapperNum); jobConf.setNumReduceTasks(reducerNum); try { this.runningJob = JobClient.runJob(jobConf); } catch (IOException e) { throw new ProcessorExecutionException(e); } }
From source file:org.terrier.applications.HadoopIndexing.java
License:Mozilla Public License
/** Starts the MapReduce indexing. * @param args/*w w w.j ava 2 s . com*/ * @throws Exception */ public static void main(String[] args) throws Exception { long time = System.currentTimeMillis(); boolean docPartitioned = false; int numberOfReducers = Integer .parseInt(ApplicationSetup.getProperty("terrier.hadoop.indexing.reducers", "26")); final HadoopPlugin.JobFactory jf = HadoopPlugin.getJobFactory("HOD-TerrierIndexing"); if (args.length == 2 && args[0].equals("-p")) { logger.info("Document-partitioned Mode, " + numberOfReducers + " output indices."); numberOfReducers = Integer.parseInt(args[1]); docPartitioned = true; } else if (args.length == 1 && args[0].equals("--merge")) { if (numberOfReducers > 1) mergeLexiconInvertedFiles(ApplicationSetup.TERRIER_INDEX_PATH, numberOfReducers); else logger.error("No point merging 1 reduce task output"); return; } else if (args.length == 0) { logger.info("Term-partitioned Mode, " + numberOfReducers + " reducers creating one inverted index."); docPartitioned = false; if (numberOfReducers > MAX_REDUCE) { logger.warn("Excessive reduce tasks (" + numberOfReducers + ") in use " + "- SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm can use " + MAX_REDUCE + " at most"); } } else { logger.fatal(usage()); return; } if (!(CompressionFactory.getCompressionConfiguration("inverted", new String[0], false) instanceof BitCompressionConfiguration)) { logger.error("Sorry, only default BitCompressionConfiguration is supported by HadoopIndexing" + " - you can recompress the inverted index later using IndexRecompressor"); return; } if (jf == null) throw new Exception("Could not get JobFactory from HadoopPlugin"); final JobConf conf = jf.newJob(); conf.setJobName("terrierIndexing"); if (Files.exists(ApplicationSetup.TERRIER_INDEX_PATH) && Index.existsIndex(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX)) { logger.fatal("Cannot index while index exists at " + ApplicationSetup.TERRIER_INDEX_PATH + "," + ApplicationSetup.TERRIER_INDEX_PREFIX); return; } boolean blockIndexing = ApplicationSetup.BLOCK_INDEXING; if (blockIndexing) { conf.setMapperClass(Hadoop_BlockSinglePassIndexer.class); conf.setReducerClass(Hadoop_BlockSinglePassIndexer.class); } else { conf.setMapperClass(Hadoop_BasicSinglePassIndexer.class); conf.setReducerClass(Hadoop_BasicSinglePassIndexer.class); } FileOutputFormat.setOutputPath(conf, new Path(ApplicationSetup.TERRIER_INDEX_PATH)); conf.set("indexing.hadoop.prefix", ApplicationSetup.TERRIER_INDEX_PREFIX); conf.setMapOutputKeyClass(SplitEmittedTerm.class); conf.setMapOutputValueClass(MapEmittedPostingList.class); conf.setBoolean("indexing.hadoop.multiple.indices", docPartitioned); if (!conf.get("mapred.job.tracker").equals("local")) { conf.setMapOutputCompressorClass(GzipCodec.class); conf.setCompressMapOutput(true); } else { conf.setCompressMapOutput(false); } conf.setInputFormat(MultiFileCollectionInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setOutputKeyComparatorClass(SplitEmittedTerm.SETRawComparatorTermSplitFlush.class); conf.setOutputValueGroupingComparator(SplitEmittedTerm.SETRawComparatorTerm.class); conf.setReduceSpeculativeExecution(false); //parse the collection.spec BufferedReader specBR = Files.openFileReader(ApplicationSetup.COLLECTION_SPEC); String line = null; List<Path> paths = new ArrayList<Path>(); while ((line = specBR.readLine()) != null) { if (line.startsWith("#")) continue; paths.add(new Path(line)); } specBR.close(); FileInputFormat.setInputPaths(conf, paths.toArray(new Path[paths.size()])); conf.setNumReduceTasks(numberOfReducers); if (numberOfReducers > 1) { if (docPartitioned) conf.setPartitionerClass(SplitEmittedTerm.SETPartitioner.class); else conf.setPartitionerClass(SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm.class); } else { //for JUnit tests, we seem to need to restore the original partitioner class conf.setPartitionerClass(HashPartitioner.class); } JobID jobId = null; boolean ranOK = true; try { RunningJob rj = JobClient.runJob(conf); jobId = rj.getID(); HadoopUtility.finishTerrierJob(conf); } catch (Exception e) { logger.error("Problem running job", e); ranOK = false; } if (jobId != null) { deleteTaskFiles(ApplicationSetup.TERRIER_INDEX_PATH, jobId); } if (ranOK) { if (!docPartitioned) { if (numberOfReducers > 1) mergeLexiconInvertedFiles(ApplicationSetup.TERRIER_INDEX_PATH, numberOfReducers); } Hadoop_BasicSinglePassIndexer.finish(ApplicationSetup.TERRIER_INDEX_PATH, docPartitioned ? numberOfReducers : 1, jf); } System.out.println("Time Taken = " + ((System.currentTimeMillis() - time) / 1000) + " seconds"); jf.close(); }
From source file:org.terrier.utility.io.HadoopUtility.java
License:Mozilla Public License
/** Utility method to set MapOutputCompression if possible. * In general, I find that MapOutputCompression fails for * local job trackers, so this code checks the job tracker * location first.//ww w.j av a 2 s . c o m * @param conf JobConf of job. * @return true if MapOutputCompression was set. */ public static boolean setMapOutputCompression(JobConf conf) { if (!conf.get("mapred.job.tracker").equals("local")) { conf.setMapOutputCompressorClass(GzipCodec.class); conf.setCompressMapOutput(true); return true; } return false; }