List of usage examples for org.apache.hadoop.mapreduce Job setOutputFormatClass
public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException
From source file:com.lightboxtechnologies.spectrum.ExtentsExtractor.java
License:Apache License
public static int run(String imageID, String friendlyName, String outDir) throws Exception { Job job = SKJobFactory.createJob(imageID, friendlyName, "ExtentsExtractor"); job.setJarByClass(ExtentsExtractor.class); job.setMapperClass(ExtentsExtractorMapper.class); job.setNumReduceTasks(1);//from w ww . j av a2s .c o m job.setReducerClass(Reducer.class); job.setOutputKeyClass(LongWritable.class); job.setOutputValueClass(JsonWritable.class); job.setInputFormatClass(FsEntryHBaseInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputPath(job, new Path(outDir)); FsEntryHBaseInputFormat.setupJob(job, imageID); System.out.println("Spinning off ExtentsExtractor Job..."); job.waitForCompletion(true); return 0; }
From source file:com.lightboxtechnologies.spectrum.ExtractData.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length != 4) { System.err.println("Usage: ExtractData <imageID> <friendly_name> <extents_file> <evidence file>"); return 2; }//w w w. j ava 2 s.c o m final String imageID = args[0]; final String friendlyName = args[1]; final String extentsPath = args[2]; final String image = args[3]; Configuration conf = getConf(); final Job job = SKJobFactory.createJobFromConf(imageID, friendlyName, "ExtractData", conf); job.setJarByClass(ExtractData.class); job.setMapperClass(ExtractDataMapper.class); job.setReducerClass(KeyValueSortReducer.class); job.setNumReduceTasks(1); // job ctor copies the Configuration we pass it, get the real one conf = job.getConfiguration(); conf.setLong("timestamp", System.currentTimeMillis()); job.setInputFormatClass(RawFileInputFormat.class); RawFileInputFormat.addInputPath(job, new Path(image)); job.setOutputFormatClass(HFileOutputFormat.class); job.setOutputKeyClass(ImmutableBytesWritable.class); job.setOutputValueClass(KeyValue.class); conf.setInt("mapreduce.job.jvm.numtasks", -1); final FileSystem fs = FileSystem.get(conf); Path hfileDir = new Path("/texaspete/ev/tmp", UUID.randomUUID().toString()); hfileDir = hfileDir.makeQualified(fs); LOG.info("Hashes will be written temporarily to " + hfileDir); HFileOutputFormat.setOutputPath(job, hfileDir); final Path extp = new Path(extentsPath); final URI extents = extp.toUri(); LOG.info("extents file is " + extents); DistributedCache.addCacheFile(extents, conf); conf.set("com.lbt.extentsname", extp.getName()); // job.getConfiguration().setBoolean("mapred.task.profile", true); // job.getConfiguration().setBoolean("mapreduce.task.profile", true); HBaseTables.summon(conf, HBaseTables.HASH_TBL_B, HBaseTables.HASH_COLFAM_B); HBaseTables.summon(conf, HBaseTables.ENTRIES_TBL_B, HBaseTables.ENTRIES_COLFAM_B); final boolean result = job.waitForCompletion(true); if (result) { LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf); HBaseConfiguration.addHbaseResources(conf); loader.setConf(conf); LOG.info("Loading hashes into hbase"); chmodR(fs, hfileDir); loader.doBulkLoad(hfileDir, new HTable(conf, HBaseTables.HASH_TBL_B)); // result = fs.delete(hfileDir, true); } return result ? 0 : 1; }
From source file:com.lightboxtechnologies.spectrum.FolderCount.java
License:Apache License
public static void main(String[] args) throws Exception { final Configuration conf = new Configuration(); final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: FolderCount <table> <outpath>"); System.exit(2);//from ww w . j a v a2s .c o m } final Job job = new Job(conf, "FolderCount"); job.setJarByClass(FolderCount.class); job.setMapperClass(FolderCountMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setNumReduceTasks(1); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(FsEntryHBaseInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); final Scan scan = new Scan(); scan.addFamily(HBaseTables.ENTRIES_COLFAM_B); job.getConfiguration().set(TableInputFormat.INPUT_TABLE, otherArgs[0]); job.getConfiguration().set(TableInputFormat.SCAN, convertScanToString(scan)); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.lightboxtechnologies.spectrum.JsonImport.java
License:Apache License
public static int run(String jsonPath, String imageHash, String friendlyName, Configuration conf) throws Exception { if (conf == null) { conf = HBaseConfiguration.create(); }//w w w. ja va2 s. c o m conf.set(HBaseTables.ENTRIES_TBL_VAR, HBaseTables.ENTRIES_TBL); final Job job = SKJobFactory.createJobFromConf(imageHash, friendlyName, "JsonImport", conf); job.setJarByClass(JsonImport.class); job.setMapperClass(FsEntryMapLoader.class); job.setNumReduceTasks(0); job.setOutputFormatClass(FsEntryHBaseOutputFormat.class); FileInputFormat.addInputPath(job, new Path(jsonPath)); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.lightboxtechnologies.spectrum.MRCoffeeJob.java
License:Apache License
public static int run(String imageID, String outpath, String[] command, Configuration conf) throws ClassNotFoundException, DecoderException, IOException, InterruptedException { conf.setStrings("command", command); conf.setLong("timestamp", System.currentTimeMillis()); final Job job = new Job(conf, "MRCoffeeJob"); job.setJarByClass(MRCoffeeJob.class); job.setMapperClass(MRCoffeeMapper.class); // job.setReducerClass(KeyValueSortReducer.class); // job.setNumReduceTasks(1); job.setNumReduceTasks(0);/* w ww. jav a 2 s .c om*/ FsEntryHBaseInputFormat.setupJob(job, imageID); job.setInputFormatClass(FsEntryHBaseInputFormat.class); job.setOutputKeyClass(ImmutableHexWritable.class); // job.setOutputValueClass(KeyValue.class); job.setOutputValueClass(JsonWritable.class); // job.setOutputFormatClass(HFileOutputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // HFileOutputFormat.setOutputPath(job, new Path(outpath)); TextOutputFormat.setOutputPath(job, new Path(outpath)); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.lightboxtechnologies.spectrum.PythonJob.java
License:Apache License
public static int run(String imageID, String friendlyName, String outpath, String pymap, String pyred, String format, Configuration conf) throws Exception { if (conf == null) { conf = HBaseConfiguration.create(); }//from w ww .ja v a 2s .c o m final Job job = SKJobFactory.createJobFromConf(imageID, friendlyName, "PythonJob", conf); job.setJarByClass(PythonJob.class); job.setMapperClass(PythonMapper.class); PyEngine py = new PyEngine(); configPyTask(job, py, "map", pymap); job.setMapOutputKeyClass(py.getKeyClass()); job.setMapOutputValueClass(py.getValueClass()); int numReduces = 1; job.setOutputKeyClass(py.getKeyClass()); job.setOutputValueClass(py.getValueClass()); if (pyred.equals("none")) { numReduces = 0; } else if (pyred.equals("identity")) { job.setReducerClass(Reducer.class); job.setOutputKeyClass(py.getKeyClass()); job.setOutputValueClass(py.getValueClass()); } else if (pyred.equals("LongSumReducer")) { job.setReducerClass(LongSumReducer.class); job.setCombinerClass(LongSumReducer.class); } else { job.setReducerClass(PythonReducer.class); configPyTask(job, py, "reduce", pyred); job.setOutputKeyClass(py.getKeyClass()); job.setOutputValueClass(py.getValueClass()); } job.setNumReduceTasks(numReduces); // it is possible to run over a flat json file... // String input = otherArgs[0]; // if (input.endsWith(".json") == true) { // job.setInputFormatClass(FsEntryJsonInputFormat.class); // FsEntryJsonInputFormat.addInputPath(job, new Path(input)); // } // else { FsEntryHBaseInputFormat.setupJob(job, imageID); job.setInputFormatClass(FsEntryHBaseInputFormat.class); if (format != null && format.equals("SequenceFileOutputFormat")) { job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); } else { job.setOutputFormatClass(TextOutputFormat.class); } FileOutputFormat.setOutputPath(job, new Path(outpath)); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.lightboxtechnologies.spectrum.SequenceFileExport.java
License:Apache License
public static void main(String[] args) throws Exception { final Configuration conf = new Configuration(); final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); String imageID;/*from w ww .ja v a 2 s . c om*/ String outpath; String friendlyname; final Set<String> exts = new HashSet<String>(); if ("-f".equals(otherArgs[0])) { if (otherArgs.length != 4) { die(); } // load extensions from file final Path extpath = new Path(otherArgs[1]); InputStream in = null; try { in = extpath.getFileSystem(conf).open(extpath); Reader r = null; try { r = new InputStreamReader(in); BufferedReader br = null; try { br = new BufferedReader(r); String line; while ((line = br.readLine()) != null) { exts.add(line.trim().toLowerCase()); } br.close(); } finally { IOUtils.closeQuietly(br); } r.close(); } finally { IOUtils.closeQuietly(r); } in.close(); } finally { IOUtils.closeQuietly(in); } imageID = otherArgs[2]; friendlyname = otherArgs[3]; outpath = otherArgs[4]; } else { if (otherArgs.length < 3) { die(); } // read extensions from trailing args imageID = otherArgs[0]; friendlyname = otherArgs[1]; outpath = otherArgs[2]; // lowercase all file extensions for (int i = 2; i < otherArgs.length; ++i) { exts.add(otherArgs[i].toLowerCase()); } } conf.setStrings("extensions", exts.toArray(new String[exts.size()])); final Job job = SKJobFactory.createJobFromConf(imageID, friendlyname, "SequenceFileExport", conf); job.setJarByClass(SequenceFileExport.class); job.setMapperClass(SequenceFileExportMapper.class); job.setNumReduceTasks(0); job.setOutputKeyClass(BytesWritable.class); job.setOutputValueClass(MapWritable.class); job.setInputFormatClass(FsEntryHBaseInputFormat.class); FsEntryHBaseInputFormat.setupJob(job, imageID); job.setOutputFormatClass(SequenceFileOutputFormat.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); FileOutputFormat.setOutputPath(job, new Path(outpath)); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.linkedin.cubert.io.avro.AvroStorage.java
License:Open Source License
@Override public void prepareOutput(Job job, Configuration conf, JsonNode params, BlockSchema schema, Path path) { Schema avroSchema = null;//from w w w .j a v a 2s . co m // we can specify the avro schema of the output directly, instead of deriving // it from the BlockSchema // if (json.has("avroSchema")) // { // avroSchema = // new org.apache.avro.Schema.Parser().parse(JsonUtils.getText(json, // "avroSchema")); // } // else // { // schema = new BlockSchema(json.get("schema")); avroSchema = AvroUtils.convertFromBlockSchema("record", schema); conf.set("cubert.avro.output.schema", avroSchema.toString()); job.setOutputFormatClass(PigAvroOutputFormatAdaptor.class); // AvroJob.setOutputKeySchema(job, avroSchema); // AvroJob.setOutputValueSchema(job, Schema.create(Type.NULL)); // job.setOutputFormatClass(AvroKeyOutputFormat.class); }
From source file:com.linkedin.cubert.io.rubix.RubixStorage.java
License:Open Source License
@Override public void prepareOutput(Job job, Configuration conf, JsonNode params, BlockSchema schema, Path path) { Class<?> tupleClass = TupleFactory.getInstance().newTuple().getClass(); job.setOutputKeyClass(tupleClass);/*from w w w . jav a 2 s . c om*/ job.setOutputValueClass(tupleClass); job.setOutputFormatClass(RubixOutputFormat.class); if (params.has("compact")) conf.setBoolean(CubertStrings.USE_COMPACT_SERIALIZATION, Boolean.parseBoolean(JsonUtils.getText(params, "compact"))); }
From source file:com.linkedin.cubert.io.text.TextStorage.java
License:Open Source License
@Override public void prepareOutput(Job job, Configuration conf, JsonNode params, BlockSchema schema, Path path) { if (params.has("separator")) { conf.set(CubertStrings.TEXT_OUTPUT_SEPARATOR, JsonUtils.getText(params, "separator")); }//from w w w . ja v a 2 s. c o m job.setOutputFormatClass(PigTextOutputFormatWrapper.class); }