List of usage examples for org.apache.hadoop.mapreduce Job setOutputKeyClass
public void setOutputKeyClass(Class<?> theClass) throws IllegalStateException
From source file:com.lightboxtechnologies.spectrum.ExtentsExtractor.java
License:Apache License
public static int run(String imageID, String friendlyName, String outDir) throws Exception { Job job = SKJobFactory.createJob(imageID, friendlyName, "ExtentsExtractor"); job.setJarByClass(ExtentsExtractor.class); job.setMapperClass(ExtentsExtractorMapper.class); job.setNumReduceTasks(1);/* w w w.j a v a 2 s. c om*/ job.setReducerClass(Reducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(JsonWritable.class); job.setInputFormatClass(FsEntryHBaseInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, new Path(outDir)); FsEntryHBaseInputFormat.setupJob(job, imageID); System.out.println("Spinning off ExtentsExtractor Job..."); job.waitForCompletion(true); return 0; }
From source file:com.lightboxtechnologies.spectrum.ExtractData.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 4) { System.err.println("Usage: ExtractData <imageID> <friendly_name> <extents_file> <evidence file>"); return 2; }/* w w w . j a v a 2 s.co m*/ final String imageID = args[0]; final String friendlyName = args[1]; final String extentsPath = args[2]; final String image = args[3]; Configuration conf = getConf(); final Job job = SKJobFactory.createJobFromConf(imageID, friendlyName, "ExtractData", conf); job.setJarByClass(ExtractData.class); job.setMapperClass(ExtractDataMapper.class); job.setReducerClass(KeyValueSortReducer.class); job.setNumReduceTasks(1); // job ctor copies the Configuration we pass it, get the real one conf = job.getConfiguration(); conf.setLong("timestamp", System.currentTimeMillis()); job.setInputFormatClass(RawFileInputFormat.class); RawFileInputFormat.addInputPath(job, new Path(image)); job.setOutputFormatClass(HFileOutputFormat.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); conf.setInt("mapreduce.job.jvm.numtasks", -1); final FileSystem fs = FileSystem.get(conf); Path hfileDir = new Path("/texaspete/ev/tmp", UUID.randomUUID().toString()); hfileDir = hfileDir.makeQualified(fs); LOG.info("Hashes will be written temporarily to " + hfileDir); HFileOutputFormat.setOutputPath(job, hfileDir); final Path extp = new Path(extentsPath); final URI extents = extp.toUri(); LOG.info("extents file is " + extents); DistributedCache.addCacheFile(extents, conf); conf.set("com.lbt.extentsname", extp.getName()); // job.getConfiguration().setBoolean("mapred.task.profile", true); // job.getConfiguration().setBoolean("mapreduce.task.profile", true); HBaseTables.summon(conf, HBaseTables.HASH_TBL_B, HBaseTables.HASH_COLFAM_B); HBaseTables.summon(conf, HBaseTables.ENTRIES_TBL_B, HBaseTables.ENTRIES_COLFAM_B); final boolean result = job.waitForCompletion(true); if (result) { LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf); HBaseConfiguration.addHbaseResources(conf); loader.setConf(conf); LOG.info("Loading hashes into hbase"); chmodR(fs, hfileDir); loader.doBulkLoad(hfileDir, new HTable(conf, HBaseTables.HASH_TBL_B)); // result = fs.delete(hfileDir, true); } return result ? 0 : 1; }
From source file:com.lightboxtechnologies.spectrum.FolderCount.java
License:Apache License
public static void main(String[] args) throws Exception { final Configuration conf = new Configuration(); final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: FolderCount <table> <outpath>"); System.exit(2);/*from w ww . ja v a2 s .co m*/ } final Job job = new Job(conf, "FolderCount"); job.setJarByClass(FolderCount.class); job.setMapperClass(FolderCountMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setNumReduceTasks(1); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(FsEntryHBaseInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); final Scan scan = new Scan(); scan.addFamily(HBaseTables.ENTRIES_COLFAM_B); job.getConfiguration().set(TableInputFormat.INPUT_TABLE, otherArgs[0]); job.getConfiguration().set(TableInputFormat.SCAN, convertScanToString(scan)); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.lightboxtechnologies.spectrum.MRCoffeeJob.java
License:Apache License
public static int run(String imageID, String outpath, String[] command, Configuration conf) throws ClassNotFoundException, DecoderException, IOException, InterruptedException { conf.setStrings("command", command); conf.setLong("timestamp", System.currentTimeMillis()); final Job job = new Job(conf, "MRCoffeeJob"); job.setJarByClass(MRCoffeeJob.class); job.setMapperClass(MRCoffeeMapper.class); // job.setReducerClass(KeyValueSortReducer.class); // job.setNumReduceTasks(1); job.setNumReduceTasks(0);// ww w. j a va 2 s . com FsEntryHBaseInputFormat.setupJob(job, imageID); job.setInputFormatClass(FsEntryHBaseInputFormat.class); job.setOutputKeyClass(ImmutableHexWritable.class); // job.setOutputValueClass(KeyValue.class); job.setOutputValueClass(JsonWritable.class); // job.setOutputFormatClass(HFileOutputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // HFileOutputFormat.setOutputPath(job, new Path(outpath)); TextOutputFormat.setOutputPath(job, new Path(outpath)); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.lightboxtechnologies.spectrum.PythonJob.java
License:Apache License
public static int run(String imageID, String friendlyName, String outpath, String pymap, String pyred, String format, Configuration conf) throws Exception { if (conf == null) { conf = HBaseConfiguration.create(); }/*from ww w . j av a2 s .c o m*/ final Job job = SKJobFactory.createJobFromConf(imageID, friendlyName, "PythonJob", conf); job.setJarByClass(PythonJob.class); job.setMapperClass(PythonMapper.class); PyEngine py = new PyEngine(); configPyTask(job, py, "map", pymap); job.setMapOutputKeyClass(py.getKeyClass()); job.setMapOutputValueClass(py.getValueClass()); int numReduces = 1; job.setOutputKeyClass(py.getKeyClass()); job.setOutputValueClass(py.getValueClass()); if (pyred.equals("none")) { numReduces = 0; } else if (pyred.equals("identity")) { job.setReducerClass(Reducer.class); job.setOutputKeyClass(py.getKeyClass()); job.setOutputValueClass(py.getValueClass()); } else if (pyred.equals("LongSumReducer")) { job.setReducerClass(LongSumReducer.class); job.setCombinerClass(LongSumReducer.class); } else { job.setReducerClass(PythonReducer.class); configPyTask(job, py, "reduce", pyred); job.setOutputKeyClass(py.getKeyClass()); job.setOutputValueClass(py.getValueClass()); } job.setNumReduceTasks(numReduces); // it is possible to run over a flat json file... // String input = otherArgs[0]; // if (input.endsWith(".json") == true) { // job.setInputFormatClass(FsEntryJsonInputFormat.class); // FsEntryJsonInputFormat.addInputPath(job, new Path(input)); // } // else { FsEntryHBaseInputFormat.setupJob(job, imageID); job.setInputFormatClass(FsEntryHBaseInputFormat.class); if (format != null && format.equals("SequenceFileOutputFormat")) { job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); } else { job.setOutputFormatClass(TextOutputFormat.class); } FileOutputFormat.setOutputPath(job, new Path(outpath)); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.lightboxtechnologies.spectrum.SequenceFileExport.java
License:Apache License
public static void main(String[] args) throws Exception { final Configuration conf = new Configuration(); final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); String imageID;/*w ww .j a v a2 s.c o m*/ String outpath; String friendlyname; final Set<String> exts = new HashSet<String>(); if ("-f".equals(otherArgs[0])) { if (otherArgs.length != 4) { die(); } // load extensions from file final Path extpath = new Path(otherArgs[1]); InputStream in = null; try { in = extpath.getFileSystem(conf).open(extpath); Reader r = null; try { r = new InputStreamReader(in); BufferedReader br = null; try { br = new BufferedReader(r); String line; while ((line = br.readLine()) != null) { exts.add(line.trim().toLowerCase()); } br.close(); } finally { IOUtils.closeQuietly(br); } r.close(); } finally { IOUtils.closeQuietly(r); } in.close(); } finally { IOUtils.closeQuietly(in); } imageID = otherArgs[2]; friendlyname = otherArgs[3]; outpath = otherArgs[4]; } else { if (otherArgs.length < 3) { die(); } // read extensions from trailing args imageID = otherArgs[0]; friendlyname = otherArgs[1]; outpath = otherArgs[2]; // lowercase all file extensions for (int i = 2; i < otherArgs.length; ++i) { exts.add(otherArgs[i].toLowerCase()); } } conf.setStrings("extensions", exts.toArray(new String[exts.size()])); final Job job = SKJobFactory.createJobFromConf(imageID, friendlyname, "SequenceFileExport", conf); job.setJarByClass(SequenceFileExport.class); job.setMapperClass(SequenceFileExportMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(MapWritable.class); job.setInputFormatClass(FsEntryHBaseInputFormat.class); FsEntryHBaseInputFormat.setupJob(job, imageID); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); FileOutputFormat.setOutputPath(job, new Path(outpath)); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.linkedin.cubert.io.rubix.RubixStorage.java
License:Open Source License
@Override public void prepareOutput(Job job, Configuration conf, JsonNode params, BlockSchema schema, Path path) { Class<?> tupleClass = TupleFactory.getInstance().newTuple().getClass(); job.setOutputKeyClass(tupleClass); job.setOutputValueClass(tupleClass); job.setOutputFormatClass(RubixOutputFormat.class); if (params.has("compact")) conf.setBoolean(CubertStrings.USE_COMPACT_SERIALIZATION, Boolean.parseBoolean(JsonUtils.getText(params, "compact"))); }
From source file:com.linkedin.hadoop.example.WordCountCounters.java
License:Apache License
/** * Azkaban will look for a method named `run` to start your job. Use this method to setup all the * Hadoop-related configuration for your job and submit it. * * @throws Exception If there is an exception during the configuration or submission of your job *///from ww w . ja v a 2 s . c o m public void run() throws Exception { _logger.info(String.format("Configuring job for the class %s", getClass().getSimpleName())); Job job = Job.getInstance(getConf()); job.setJarByClass(WordCountJob.class); job.setJobName(_name); job.setMapperClass(WordCountMapper.class); job.setCombinerClass(WordCountCombiner.class); job.setReducerClass(WordCountReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); String inputPath = _properties.getProperty("input.path"); String outputPath = _properties.getProperty("output.path"); boolean forceOverwrite = Boolean.parseBoolean(_properties.getProperty("force.output.overwrite", "false")); FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); // Before we submit the job, remove the old the output directory if (forceOverwrite) { FileSystem fs = FileSystem.get(job.getConfiguration()); fs.delete(FileOutputFormat.getOutputPath(job), true); } // Since we have Kerberos enabled at LinkedIn, we must add the token to our configuration. If // you don't use Kerberos security for your Hadoop cluster, you don't need this code. if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) { job.getConfiguration().set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION")); } // Submit the job for execution _logger.info(String.format("About to submit the job named %s", _name)); boolean succeeded = job.waitForCompletion(true); // Before we return, display our custom counters for the job in the Azkaban logs long inputWords = job.getCounters().findCounter(WordCountCounters.INPUT_WORDS).getValue(); _logger.info(String.format("Read a total of %d input words", inputWords)); // Azkaban will not realize the Hadoop job failed unless you specifically throw an exception if (!succeeded) { throw new Exception(String.format("Azkaban job %s failed", _name)); } }
From source file:com.linkedin.oneclick.wordcount.WordCount.java
License:Apache License
public int run(String[] args) throws Exception { Configuration conf = getConf(); Job job = new Job(conf, "Word Count"); job.setJarByClass(WordCount.class); String workDirectory = args.length >= 1 ? args[0] : "wordcount"; Path input = new Path(workDirectory, "input.txt"); FileSystem fs = input.getFileSystem(conf); fs.mkdirs(input.getParent());//from w w w . j a va 2 s. c o m copy(resourceInputStream(getClass().getResource("/onegin.txt")), createOutputStream(conf, input), conf); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(WordCountMapper.class); FileInputFormat.addInputPath(job, input); job.setCombinerClass(WordCountReducer.class); job.setReducerClass(WordCountReducer.class); job.setOutputFormatClass(TextOutputFormat.class); Path output = clean(conf, new Path(workDirectory, "wordcount")); FileOutputFormat.setOutputPath(job, output); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); return job.waitForCompletion(true) ? 0 : -1; }
From source file:com.linkedin.thirdeye.hadoop.aggregation.AggregationPhaseJob.java
License:Apache License
public Job run() throws Exception { Job job = Job.getInstance(getConf()); job.setJobName(name);//w ww .j a v a 2s .c o m job.setJarByClass(AggregationPhaseJob.class); FileSystem fs = FileSystem.get(getConf()); Configuration configuration = job.getConfiguration(); // Properties LOGGER.info("Properties {}", props); // Input Path String inputPathDir = getAndSetConfiguration(configuration, AGG_PHASE_INPUT_PATH); LOGGER.info("Input path dir: " + inputPathDir); for (String inputPath : inputPathDir.split(ThirdEyeConstants.FIELD_SEPARATOR)) { LOGGER.info("Adding input:" + inputPath); Path input = new Path(inputPath); FileInputFormat.addInputPath(job, input); } // Output path Path outputPath = new Path(getAndSetConfiguration(configuration, AGG_PHASE_OUTPUT_PATH)); LOGGER.info("Output path dir: " + outputPath.toString()); if (fs.exists(outputPath)) { fs.delete(outputPath, true); } FileOutputFormat.setOutputPath(job, outputPath); // Schema Schema avroSchema = ThirdeyeAvroUtils.getSchema(inputPathDir); LOGGER.info("Schema : {}", avroSchema.toString(true)); job.getConfiguration().set(AGG_PHASE_AVRO_SCHEMA.toString(), avroSchema.toString()); // ThirdEyeConfig String metricTypesProperty = ThirdeyeAvroUtils.getMetricTypesProperty( props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_NAMES.toString()), props.getProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString()), avroSchema); props.setProperty(ThirdEyeConfigProperties.THIRDEYE_METRIC_TYPES.toString(), metricTypesProperty); ThirdEyeConfig thirdeyeConfig = ThirdEyeConfig.fromProperties(props); LOGGER.info("Thirdeye Config {}", thirdeyeConfig.encode()); job.getConfiguration().set(AGG_PHASE_THIRDEYE_CONFIG.toString(), OBJECT_MAPPER.writeValueAsString(thirdeyeConfig)); // Map config job.setMapperClass(AggregationMapper.class); job.setInputFormatClass(AvroKeyInputFormat.class); job.setMapOutputKeyClass(BytesWritable.class); job.setMapOutputValueClass(BytesWritable.class); // Reduce config job.setReducerClass(AggregationReducer.class); job.setOutputKeyClass(AvroKey.class); job.setOutputValueClass(NullWritable.class); AvroJob.setOutputKeySchema(job, avroSchema); job.setOutputFormatClass(AvroKeyOutputFormat.class); String numReducers = props.getProperty(ThirdEyeJobProperties.THIRDEYE_NUM_REDUCERS.getName()); LOGGER.info("Num Reducers : {}", numReducers); if (StringUtils.isNotBlank(numReducers)) { job.setNumReduceTasks(Integer.valueOf(numReducers)); LOGGER.info("Setting num reducers {}", job.getNumReduceTasks()); } job.waitForCompletion(true); Counter counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS); LOGGER.info(counter.getDisplayName() + " : " + counter.getValue()); if (counter.getValue() == 0) { throw new IllegalStateException("No input records in " + inputPathDir); } counter = job.getCounters().findCounter(AggregationCounter.NUMBER_OF_RECORDS_FLATTENED); LOGGER.info(counter.getDisplayName() + " : " + counter.getValue()); for (String metric : thirdeyeConfig.getMetricNames()) { counter = job.getCounters().findCounter(thirdeyeConfig.getCollection(), metric); LOGGER.info(counter.getDisplayName() + " : " + counter.getValue()); } return job; }