List of usage examples for org.apache.hadoop.mapreduce Job setOutputFormatClass
public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException
From source file:com.mongodb.hadoop.examples.wordcount.split.WordCountSplitTest.java
License:Apache License
private final static void test(boolean useShards, boolean useChunks, Boolean slaveok, boolean useQuery) throws Exception { final Configuration conf = new Configuration(); MongoConfigUtil.setInputURI(conf, "mongodb://localhost:30000/test.lines"); conf.setBoolean(MongoConfigUtil.SPLITS_USE_SHARDS, useShards); conf.setBoolean(MongoConfigUtil.SPLITS_USE_CHUNKS, useChunks); if (useQuery) { //NOTE: must do this BEFORE Job is created final MongoConfig mongo_conf = new MongoConfig(conf); com.mongodb.BasicDBObject query = new com.mongodb.BasicDBObject(); query.put("num", new com.mongodb.BasicDBObject(Collections.singletonMap("$mod", new int[] { 2, 0 }))); System.out.println(" --- setting query on num"); mongo_conf.setQuery(query);//from w ww.j a v a 2s.c o m System.out.println(" --- query is: " + mongo_conf.getQuery()); } String output_table = null; if (useChunks) { if (useShards) output_table = "with_shards_and_chunks"; else output_table = "with_chunks"; } else { if (useShards) output_table = "with_shards"; else output_table = "no_splits"; } if (slaveok != null) { output_table += "_" + slaveok; } MongoConfigUtil.setOutputURI(conf, "mongodb://localhost:30000/test." + output_table); System.out.println("Conf: " + conf); final Job job = new Job(conf, "word count " + output_table); job.setJarByClass(WordCountSplitTest.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(MongoInputFormat.class); job.setOutputFormatClass(MongoOutputFormat.class); final long start = System.currentTimeMillis(); System.out.println(" ----------------------- running test " + output_table + " --------------------"); try { boolean result = job.waitForCompletion(true); System.out.println("job.waitForCompletion( true ) returned " + result); } catch (Exception e) { System.out.println("job.waitForCompletion( true ) threw Exception"); e.printStackTrace(); } final long end = System.currentTimeMillis(); final float seconds = ((float) (end - start)) / 1000; java.text.NumberFormat nf = java.text.NumberFormat.getInstance(); nf.setMaximumFractionDigits(3); System.out.println("finished run in " + nf.format(seconds) + " seconds"); com.mongodb.Mongo m = new com.mongodb.Mongo( new com.mongodb.MongoURI("mongodb://localhost:30000/?slaveok=true")); com.mongodb.DB db = m.getDB("test"); com.mongodb.DBCollection coll = db.getCollection(output_table); com.mongodb.BasicDBObject query = new com.mongodb.BasicDBObject(); query.put("_id", "the"); com.mongodb.DBCursor cur = coll.find(query); if (!cur.hasNext()) System.out.println("FAILURE: could not find count of \'the\'"); else System.out.println("'the' count: " + cur.next()); // if (! result) // System.exit( 1 ); }
From source file:com.mongodb.hadoop.examples.wordcount.WordCount.java
License:Apache License
public static void main(String[] args) throws Exception { final Configuration conf = new Configuration(); MongoConfigUtil.setInputURI(conf, "mongodb://localhost/test.in"); MongoConfigUtil.setOutputURI(conf, "mongodb://localhost/test.out"); System.out.println("Conf: " + conf); final Job job = new Job(conf, "word count"); job.setJarByClass(WordCount.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setInputFormatClass(MongoInputFormat.class); job.setOutputFormatClass(MongoOutputFormat.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.mongodb.hadoop.util.MongoTool.java
License:Apache License
private int runMapReduceJob(final Configuration conf) throws IOException { final Job job = Job.getInstance(conf, getJobName()); /**//from w w w . j a va2s.c om * Any arguments specified with -D <property>=<value> * on the CLI will be picked up and set here * They override any XML level values * Note that -D<space> is important - no space will * not work as it gets picked up by Java itself */ // TODO - Do we need to set job name somehow more specifically? // This may or may not be correct/sane job.setJarByClass(getClass()); final Class<? extends Mapper> mapper = MongoConfigUtil.getMapper(conf); LOG.debug("Mapper Class: " + mapper); LOG.debug("Input URI: " + conf.get(MongoConfigUtil.INPUT_URI)); job.setMapperClass(mapper); Class<? extends Reducer> combiner = MongoConfigUtil.getCombiner(conf); if (combiner != null) { job.setCombinerClass(combiner); } job.setReducerClass(MongoConfigUtil.getReducer(conf)); job.setOutputFormatClass(MongoConfigUtil.getOutputFormat(conf)); job.setOutputKeyClass(MongoConfigUtil.getOutputKey(conf)); job.setOutputValueClass(MongoConfigUtil.getOutputValue(conf)); job.setInputFormatClass(MongoConfigUtil.getInputFormat(conf)); Class mapOutputKeyClass = MongoConfigUtil.getMapperOutputKey(conf); Class mapOutputValueClass = MongoConfigUtil.getMapperOutputValue(conf); if (mapOutputKeyClass != null) { job.setMapOutputKeyClass(mapOutputKeyClass); } if (mapOutputValueClass != null) { job.setMapOutputValueClass(mapOutputValueClass); } /** * Determines if the job will run verbosely e.g. print debug output * Only works with foreground jobs */ final boolean verbose = MongoConfigUtil.isJobVerbose(conf); /** * Run job in foreground aka wait for completion or background? */ final boolean background = MongoConfigUtil.isJobBackground(conf); try { if (background) { LOG.info("Setting up and running MapReduce job in background."); job.submit(); return 0; } else { LOG.info("Setting up and running MapReduce job in foreground, will wait for results. {Verbose? " + verbose + "}"); return job.waitForCompletion(true) ? 0 : 1; } } catch (final Exception e) { LOG.error("Exception while executing job... ", e); return 1; } }
From source file:com.moz.fiji.mapreduce.IntegrationTestFijiTableInputFormat.java
License:Apache License
public Job setupJob(String jobName, Path outputFile, Class<? extends Mapper> mapperClass, Class<? extends Reducer> reducerClass, EntityId startKey, EntityId limitKey, FijiRowFilter filter) throws Exception { final Job job = new Job(createConfiguration()); final Configuration conf = job.getConfiguration(); // Get settings for test. final FijiDataRequest request = FijiDataRequest.builder() .addColumns(ColumnsDef.create().add("info", "name").add("info", "email")).build(); job.setJarByClass(IntegrationTestFijiTableInputFormat.class); // Setup the InputFormat. FijiTableInputFormat.configureJob(job, getFooTable().getURI(), request, startKey, limitKey, filter); job.setInputFormatClass(HBaseFijiTableInputFormat.class); // Duplicate functionality from MapReduceJobBuilder, since we are not using it here: final List<Path> jarFiles = Lists.newArrayList(); final FileSystem fs = FileSystem.getLocal(conf); for (String cpEntry : System.getProperty("java.class.path").split(":")) { if (cpEntry.endsWith(".jar")) { jarFiles.add(fs.makeQualified(new Path(cpEntry))); }//from w ww . ja va 2 s .com } DistributedCacheJars.addJarsToDistributedCache(job, jarFiles); // Create a test job. job.setJobName(jobName); // Setup the OutputFormat. TextOutputFormat.setOutputPath(job, outputFile.getParent()); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setOutputFormatClass(TextOutputFormat.class); // Set the mapper class. if (null != mapperClass) { job.setMapperClass(mapperClass); } // Set the reducer class. if (null != reducerClass) { job.setReducerClass(reducerClass); } return job; }
From source file:com.moz.fiji.mapreduce.MapReduceJobOutput.java
License:Apache License
/** * Configures the output for a MapReduce job. * * @param job The job to configure./*from w ww . ja va 2s . c om*/ * @throws IOException If there is an error. */ public void configure(Job job) throws IOException { job.setOutputFormatClass(getOutputFormatClass()); }
From source file:com.mozilla.main.ReadHBaseWriteHdfs.java
License:LGPL
@Override public int run(String[] args) throws Exception { Configuration conf = new Configuration(); conf.set("mapred.job.queue.name", "prod"); Job job = new Job(conf, "ReadHBaseWriteHDFS"); job.setJarByClass(ReadHBaseWriteHdfs.class); Scan scan = new Scan(); scan.addFamily("data".getBytes()); TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, ReadHBaseWriteHdfsMapper.class, Text.class, Text.class, job); job.setReducerClass(ReadHBaseWriteHdfsReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(1000);/* ww w. j a v a 2s .c om*/ job.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setCompressOutput(job, true); FileOutputFormat.setOutputCompressorClass(job, GzipCodec.class); SequenceFileOutputFormat.setOutputPath(job, new Path(args[0])); job.waitForCompletion(true); if (job.isSuccessful()) { System.out.println("DONE"); } return 0; }
From source file:com.msd.gin.halyard.tools.HalyardParallelExport.java
License:Apache License
@Override public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(newOption("h", null, "Prints this help")); options.addOption(newOption("v", null, "Prints version")); options.addOption(newOption("s", "source_htable", "Source HBase table with Halyard RDF store")); options.addOption(newOption("q", "sparql_query", "SPARQL tuple or graph query with use of '" + PARALLEL_SPLIT_FUNCTION_URI + "' function")); options.addOption(newOption("t", "target_url", "file://<path>/<file_name>{0}.<ext> or hdfs://<path>/<file_name>{0}.<ext> or jdbc:<jdbc_connection>/<table_name>")); options.addOption(newOption("p", "property=value", "JDBC connection properties")); options.addOption(newOption("l", "driver_classpath", "JDBC driver classpath delimited by ':'")); options.addOption(newOption("c", "driver_class", "JDBC driver class name")); try {/*from www. j a va 2 s .c o m*/ CommandLine cmd = new PosixParser().parse(options, args); if (args.length == 0 || cmd.hasOption('h')) { printHelp(options); return -1; } if (cmd.hasOption('v')) { Properties p = new Properties(); try (InputStream in = HalyardExport.class .getResourceAsStream("/META-INF/maven/com.msd.gin.halyard/hbasesail/pom.properties")) { if (in != null) p.load(in); } System.out.println("Halyard Parallel Export version " + p.getProperty("version", "unknown")); return 0; } if (!cmd.getArgList().isEmpty()) throw new ExportException("Unknown arguments: " + cmd.getArgList().toString()); for (char c : "sqt".toCharArray()) { if (!cmd.hasOption(c)) throw new ExportException("Missing mandatory option: " + c); } for (char c : "sqtlc".toCharArray()) { String s[] = cmd.getOptionValues(c); if (s != null && s.length > 1) throw new ExportException("Multiple values for option: " + c); } String source = cmd.getOptionValue('s'); String query = cmd.getOptionValue('q'); if (!query.contains(PARALLEL_SPLIT_FUNCTION_NAME)) { throw new ExportException("Parallel export SPARQL query must contain '" + PARALLEL_SPLIT_FUNCTION_URI + "' function."); } String target = cmd.getOptionValue('t'); if ((target.startsWith("file:") || target.startsWith("hdfs:")) && !target.contains("{0}")) { throw new ExportException( "Parallel export file target must contain '{0}' counter in the file path or name."); } getConf().set(SOURCE, source); getConf().set(QUERY, query); getConf().set(TARGET, target); String driver = cmd.getOptionValue('c'); if (driver != null) { getConf().set(JDBC_DRIVER, driver); } String props[] = cmd.getOptionValues('p'); if (props != null) { for (int i = 0; i < props.length; i++) { props[i] = Base64.encodeBase64String(props[i].getBytes(UTF8)); } getConf().setStrings(JDBC_PROPERTIES, props); } TableMapReduceUtil.addDependencyJars(getConf(), HalyardExport.class, NTriplesUtil.class, Rio.class, AbstractRDFHandler.class, RDFFormat.class, RDFParser.class, HTable.class, HBaseConfiguration.class, AuthenticationProtos.class, Trace.class); HBaseConfiguration.addHbaseResources(getConf()); Job job = Job.getInstance(getConf(), "HalyardParallelExport " + source + " -> " + target); String cp = cmd.getOptionValue('l'); if (cp != null) { String jars[] = cp.split(":"); for (int i = 0; i < jars.length; i++) { File f = new File(jars[i]); if (!f.isFile()) throw new ExportException("Invalid JDBC driver classpath element: " + jars[i]); job.addFileToClassPath(new Path(f.toURI())); jars[i] = f.getName(); } job.getConfiguration().setStrings(JDBC_CLASSPATH, jars); } job.setJarByClass(HalyardParallelExport.class); job.setMaxMapAttempts(1); job.setMapperClass(ParallelExportMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Void.class); job.setNumReduceTasks(0); job.setInputFormatClass(IndexedInputFormat.class); job.setOutputFormatClass(NullOutputFormat.class); TableMapReduceUtil.initCredentials(job); if (job.waitForCompletion(true)) { LOG.info("Parallel Export Completed.."); return 0; } return -1; } catch (RuntimeException exp) { System.out.println(exp.getMessage()); printHelp(options); throw exp; } }
From source file:com.mvdb.platform.action.VersionMerge.java
License:Apache License
public static void main(String[] args) throws Exception { logger.error("error1"); logger.warn("warning1"); logger.info("info1"); logger.debug("debug1"); logger.trace("trace1"); ActionUtils.setUpInitFileProperty(); // LoggerContext lc = (LoggerContext) LoggerFactory.getILoggerFactory(); // StatusPrinter.print(lc); Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); //Also add lastMergedTimeStamp and mergeUptoTimestamp and passive db name which would be mv1 or mv2 if (otherArgs.length != 3) { System.err.println("Usage: versionmerge <customer-directory>"); System.exit(2);//from ww w. ja v a2s . c om } //Example: file:/home/umesh/.mvdb/etl/data/alpha //Example: hdfs://localhost:9000/data/alpha String customerDirectory = otherArgs[0]; String lastMergedDirName = otherArgs[1]; String lastCopiedDirName = otherArgs[2]; org.apache.hadoop.conf.Configuration conf1 = new org.apache.hadoop.conf.Configuration(); //conf1.addResource(new Path("/home/umesh/ops/hadoop-1.2.0/conf/core-site.xml")); FileSystem hdfsFileSystem = FileSystem.get(conf1); Path topPath = new Path(customerDirectory); //Clean scratch db Path passiveDbPath = new Path(topPath, "db/mv1"); Path tempDbPath = new Path(topPath, "db/tmp-" + (int) (Math.random() * 100000)); if (hdfsFileSystem.exists(tempDbPath)) { boolean success = hdfsFileSystem.delete(tempDbPath, true); if (success == false) { System.err.println(String.format("Unable to delete temp directory %s", tempDbPath.toString())); System.exit(1); } } //last three parameters are hardcoded and the nulls must be replaced later after changing inout parameters. Path[] inputPaths = getInputPaths(hdfsFileSystem, topPath, lastMergedDirName, lastCopiedDirName, null); Set<String> tableNameSet = new HashSet<String>(); for (Path path : inputPaths) { tableNameSet.add(path.getName()); } Job job = new Job(conf, "versionmerge"); job.setJarByClass(VersionMerge.class); job.setMapperClass(VersionMergeMapper.class); job.setReducerClass(VersionMergeReducer.class); job.setMapOutputKeyClass(MergeKey.class); job.setMapOutputValueClass(BytesWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(BytesWritable.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); String lastDirName = null; if (inputPaths != null && inputPaths.length > 1) { lastDirName = inputPaths[(inputPaths.length) - 2].getParent().getName(); } for (Path inputPath : inputPaths) { FileInputFormat.addInputPath(job, inputPath); } FileOutputFormat.setOutputPath(job, tempDbPath); for (String table : tableNameSet) { if (table.endsWith(".dat") == false) { continue; } table = table.replaceAll("-", ""); table = table.replaceAll(".dat", ""); MultipleOutputs.addNamedOutput(job, table, SequenceFileOutputFormat.class, Text.class, BytesWritable.class); } boolean success = job.waitForCompletion(true); System.out.println("Success:" + success); System.out.println(ManagementFactory.getRuntimeMXBean().getName()); if (success && lastDirName != null) { ActionUtils.setConfigurationValue(new Path(customerDirectory).getName(), ConfigurationKeys.LAST_MERGE_TO_MVDB_DIRNAME, lastDirName); } //hdfsFileSystem.delete(passiveDbPath, true); //hdfsFileSystem.rename(tempDbPath, passiveDbPath); System.exit(success ? 0 : 1); }
From source file:com.mycompany.hadooptrain.WordCount.java
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { Path inputPath = new Path(args[0]); Path outputDir = new Path(args[1]); // Create configuration Configuration conf = new Configuration(true); // Create job Job job = new Job(conf, "WordCount"); job.setJarByClass(WordCountMapper.class); // Setup MapReduce job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); job.setNumReduceTasks(1);/* w w w . j a v a 2 s . c o m*/ // Specify key / value job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); // Input FileInputFormat.addInputPath(job, inputPath); job.setInputFormatClass(TextInputFormat.class); // Output FileOutputFormat.setOutputPath(job, outputDir); job.setOutputFormatClass(TextOutputFormat.class); // Delete output if exists FileSystem hdfs = FileSystem.get(conf); if (hdfs.exists(outputDir)) hdfs.delete(outputDir, true); // Execute job int code = job.waitForCompletion(true) ? 0 : 1; System.exit(code); }
From source file:com.mycompany.maprpractice.runnerClass.WordCount.java
public int run(String[] args) throws Exception { if (args.length != 2) { System.err.printf("Usage: %s [generic options] <input> <output>\n", getClass().getSimpleName()); ToolRunner.printGenericCommandUsage(System.err); return -1; }/* w w w .j ava 2s.co m*/ Job job = new org.apache.hadoop.mapreduce.Job(); job.setJarByClass(WordCount.class); job.setJobName("WordCounter"); String inputPath = "C:\\Users\\priyamdixit\\Desktop\\TestData\\wordCount.txt"; String outputPath = "C:\\Users\\priyamdixit\\Desktop\\TestData"; FileInputFormat.addInputPath(job, new Path(inputPath)); FileOutputFormat.setOutputPath(job, new Path(outputPath)); // FileInputFormat.addInputPath(job, new Path(args[0])); // FileOutputFormat.setOutputPath(job, new Path(args[1])); // job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapperClass(WordCountMapper.class); job.setReducerClass(WordCountReducer.class); int returnValue = job.waitForCompletion(true) ? 0 : 1; System.out.println("job.isSuccessful " + job.isSuccessful()); return returnValue; }