List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath
public static void setOutputPath(Job job, Path outputDir)
From source file:com.cloudera.sa.giraph.examples.kmeans.Job.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 4) { System.out.println("KMeans Help:"); System.out.println("Parameters: <numbersOfWorkers> <inputLocation> <outputLocation> <k>"); System.out.println("Example: 1 inputFolder outputFolder 3"); return;//from ww w . j av a 2s. c om } String numberOfWorkers = args[0]; String inputLocation = args[1]; String outputLocation = args[2]; int k = Integer.parseInt(args[3]); GiraphJob bspJob = new GiraphJob(new Configuration(), Job.class.getName()); bspJob.getConfiguration().setInt(Constants.K, k); bspJob.getConfiguration().setVertexClass(KMeansVertex.class); bspJob.getConfiguration().setMasterComputeClass(MasterCompute.class); bspJob.getConfiguration().setVertexInputFormatClass(InputFormat.class); GiraphFileInputFormat.addVertexInputPath(bspJob.getConfiguration(), new Path(inputLocation)); bspJob.getConfiguration().setVertexOutputFormatClass(OutputFormat.class); int minWorkers = Integer.parseInt(numberOfWorkers); int maxWorkers = Integer.parseInt(numberOfWorkers); bspJob.getConfiguration().setWorkerConfiguration(minWorkers, maxWorkers, 100.0f); FileOutputFormat.setOutputPath(bspJob.getInternalJob(), new Path(outputLocation)); boolean verbose = true; if (bspJob.run(verbose)) { System.out.println("Ended well"); } else { System.out.println("Ended with Failure"); } }
From source file:com.cloudera.sa.giraph.examples.ktrusses.Job.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 4) { System.out.println("KTrusses Help:"); System.out.println("Parameters: <numbersOfWorkers> <inputLocation> <outputLocation> <k>"); System.out.println("Example: 1 inputFolder outputFolder 4"); return;//from w w w . j a v a 2s. c o m } String numberOfWorkers = args[0]; String inputLocation = args[1]; String outputLocation = args[2]; int k = Integer.parseInt(args[3]); GiraphJob bspJob = new GiraphJob(new Configuration(), Job.class.getName()); bspJob.getConfiguration().setVertexClass(KTrussVertex.class); bspJob.getConfiguration().setVertexInputFormatClass(InputFormat.class); GiraphFileInputFormat.addVertexInputPath(bspJob.getConfiguration(), new Path(inputLocation)); bspJob.getConfiguration().setMasterComputeClass(MasterCompute.class); bspJob.getConfiguration().setVertexOutputFormatClass(OutputFormat.class); int minWorkers = Integer.parseInt(numberOfWorkers); int maxWorkers = Integer.parseInt(numberOfWorkers); bspJob.getConfiguration().setWorkerConfiguration(minWorkers, maxWorkers, 100.0f); bspJob.getConfiguration().setInt(Constants.K, k); FileOutputFormat.setOutputPath(bspJob.getInternalJob(), new Path(outputLocation)); boolean verbose = true; if (bspJob.run(verbose)) { System.out.println("Ended well"); } else { System.out.println("Ended with Failure"); } }
From source file:com.cloudera.sa.giraph.examples.triangles.Job.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 3) { System.out.println("Componentisation Help:"); System.out.println("Parameters: Componentisation <numbersOfWorkers> <inputLocaiton> <outputLocation>"); System.out.println("Example: Componentisation 1 inputFolder outputFolder"); return;/* w ww . ja v a 2 s . c o m*/ } String numberOfWorkers = args[0]; String inputLocation = args[1]; String outputLocation = args[2]; GiraphJob bspJob = new GiraphJob(new Configuration(), Job.class.getName()); bspJob.getConfiguration().setVertexClass(TrianglesVertex.class); bspJob.getConfiguration().setVertexInputFormatClass(InputFormat.class); GiraphFileInputFormat.addVertexInputPath(bspJob.getConfiguration(), new Path(inputLocation)); bspJob.getConfiguration().setVertexOutputFormatClass(OutputFormat.class); int minWorkers = Integer.parseInt(numberOfWorkers); int maxWorkers = Integer.parseInt(numberOfWorkers); bspJob.getConfiguration().setWorkerConfiguration(minWorkers, maxWorkers, 100.0f); FileOutputFormat.setOutputPath(bspJob.getInternalJob(), new Path(outputLocation)); boolean verbose = true; if (bspJob.run(verbose)) { System.out.println("Ended well"); } else { System.out.println("Ended with Failure"); } }
From source file:com.cloudera.sa.giraph.examples.wordcount.Job.java
License:Apache License
public static void main(String[] args) throws Exception { if (args.length != 3) { System.out.println("Componentisation Help:"); System.out.println("Parameters: <numbersOfWorkers> <inputLocation> <outputLocation>"); System.out.println("Example: 1 inputFolder outputFolder"); return;/*from ww w .j a v a 2 s. c o m*/ } String numberOfWorkers = args[0]; String inputLocation = args[1]; String outputLocation = args[2]; GiraphJob bspJob = new GiraphJob(new Configuration(), Job.class.getName()); bspJob.getConfiguration().setVertexClass(WordCountVertex.class); bspJob.getConfiguration().setEdgeInputFormatClass(InputFormat.class); GiraphFileInputFormat.addEdgeInputPath(bspJob.getConfiguration(), new Path(inputLocation)); bspJob.getConfiguration().setVertexOutputFormatClass(OutputFormat.class); int minWorkers = Integer.parseInt(numberOfWorkers); int maxWorkers = Integer.parseInt(numberOfWorkers); bspJob.getConfiguration().setWorkerConfiguration(minWorkers, maxWorkers, 100.0f); FileOutputFormat.setOutputPath(bspJob.getInternalJob(), new Path(outputLocation)); boolean verbose = true; if (bspJob.run(verbose)) { System.out.println("Ended well"); } else { System.out.println("Ended with Failure"); } }
From source file:com.cloudera.sa.hbasebulkload.HBASEBulkLoadDriver.java
@Override public int run(String[] args) throws Exception { Configuration config = getConf(); args = new GenericOptionsParser(config, args).getRemainingArgs(); if (args.length < 6) { /*System.out.println("hadoop jar HBASEBulkLoad.jar " + "com.cloudera.sa.hbasebulkload.HBASEBulkLoadDriver" + " <inputpath> <outputpath> <hbaseTable> <hbaseColumnFamily" + " \"<hbaseColumns (delimiter seperated)>\" <column delimiter>");*/ ToolRunner.printGenericCommandUsage(System.out); return 2; }// w w w .j a v a 2 s.com String hbaseTab = args[2]; String hbaseColumnFamily = args[3]; String hbaseColumns = args[4]; String hbaseColumnSeperator = args[5]; config.set(HBASEBulkLoadConstants.HBASE_TABLE_KEY, hbaseTab.trim().toLowerCase(Locale.ENGLISH)); config.set(HBASEBulkLoadConstants.HBASE_COLUMN_FAMILY_KEY, hbaseColumnFamily); config.set(HBASEBulkLoadConstants.HBASE_COLUMNS_KEY, hbaseColumns.trim().toLowerCase(Locale.ENGLISH)); config.set(HBASEBulkLoadConstants.HBASE_COLUMN_SEPERATOR_KEY, hbaseColumnSeperator); System.out.println(2); Job job = Job.getInstance(config, this.getClass().getName() + "-" + hbaseTab); HBaseConfiguration.addHbaseResources(config); job.setInputFormatClass(TextInputFormat.class); job.setJarByClass(HBASEBulkLoadDriver.class); job.setMapperClass(HBASEBulkLoadKeyValueMapper.class); job.setMapOutputKeyClass(ImmutableBytesWritable.class); job.setMapOutputValueClass(Put.class); job.setCombinerClass(PutCombiner.class); job.setReducerClass(PutSortReducer.class); Connection connection = ConnectionFactory.createConnection(config); Table hTab = connection.getTable(TableName.valueOf(hbaseTab)); FileSystem.get(getConf()).delete(new Path(args[1]), true); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); //job.setOutputFormatClass(HFileOutputFormat2.class); TableMapReduceUtil.initTableReducerJob(hTab.getName().getNameAsString(), null, job); //job.setNumReduceTasks(0); TableMapReduceUtil.addDependencyJars(job); HFileOutputFormat2.configureIncrementalLoadMap(job, hTab); int exitCode = job.waitForCompletion(true) ? HBASEBulkLoadConstants.SUCCESS : HBASEBulkLoadConstants.FAILURE; System.out.println(8); if (HBASEBulkLoadConstants.SUCCESS == exitCode) { LoadIncrementalHFiles loader = new LoadIncrementalHFiles(config); loader.doBulkLoad(new Path(args[1]), (HTable) hTab); connection.close(); } return exitCode; }
From source file:com.cloudera.sa.securewordcount.SecureWordCountDriver.java
@Override public int run(String[] args) throws Exception { Configuration config = getConf(); args = new GenericOptionsParser(config, args).getRemainingArgs(); if (args.length < 2) { ToolRunner.printGenericCommandUsage(System.out); return 2; }//from ww w . j a v a 2s . c o m Job job = Job.getInstance(config, this.getClass().getName() + "-wordcount"); job.setJarByClass(SecureWordCountDriver.class); job.setInputFormatClass(TextInputFormat.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); return job.waitForCompletion(true) ? 0 : 1; }
From source file:com.cloudera.science.matching.graph.BipartiteMatchingRunner.java
License:Open Source License
@Override public int run(String[] args) throws Exception { if (args.length != 3) { System.err.println("Usage: <input> <output> <numworkers>"); System.err.println("The input should be the output of the InputPreparer Crunch pipeline."); System.err.println("The output is the directory where the output of the matching will be"); System.err.println("written, and the numworkers should be <= the number of map slots available"); System.err.println("on your Hadoop cluster."); return 1; }//from w w w . j ava2s . co m GiraphJob job = new GiraphJob(getConf(), getClass().getName()); GiraphConfiguration conf = job.getConfiguration(); conf.setVertexClass(BipartiteMatchingVertex.class); conf.setVertexInputFormatClass(BipartiteMatchingVertexInputFormat.class); conf.setVertexOutputFormatClass(BipartiteMatchingVertexOutputFormat.class); conf.setComputationClass(BipartiteMatching.class); conf.setOutEdgesClass(ArrayListEdges.class); GiraphFileInputFormat.addVertexInputPath(job.getConfiguration(), new Path(args[0])); FileOutputFormat.setOutputPath(job.getInternalJob(), new Path(args[1])); int numWorkers = Integer.parseInt(args[2]); job.getConfiguration().setWorkerConfiguration(numWorkers, numWorkers, 100.0f); return job.run(true) ? 0 : -1; }
From source file:com.cloudera.sqoop.mapreduce.db.TestDataDrivenDBInputFormat.java
License:Apache License
public void testDateSplits() throws Exception { Statement s = connection.createStatement(); final String DATE_TABLE = "datetable"; final String COL = "foo"; try {//from w w w . j ava 2s. c o m try { // delete the table if it already exists. s.executeUpdate("DROP TABLE " + DATE_TABLE); } catch (SQLException e) { // Ignored; proceed regardless of whether we deleted the table; // it may have simply not existed. } // Create the table. s.executeUpdate("CREATE TABLE " + DATE_TABLE + "(" + COL + " TIMESTAMP)"); s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-01')"); s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-02')"); s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-05-01')"); s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2011-04-01')"); // commit this tx. connection.commit(); Configuration conf = new Configuration(); conf.set("fs.defaultFS", "file:///"); FileSystem fs = FileSystem.getLocal(conf); fs.delete(new Path(OUT_DIR), true); // now do a dd import Job job = new Job(conf); job.setMapperClass(ValMapper.class); job.setReducerClass(Reducer.class); job.setMapOutputKeyClass(DateCol.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(DateCol.class); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(1); job.getConfiguration().setInt("mapreduce.map.tasks", 2); FileOutputFormat.setOutputPath(job, new Path(OUT_DIR)); DBConfiguration.configureDB(job.getConfiguration(), DRIVER_CLASS, DB_URL, (String) null, (String) null); DataDrivenDBInputFormat.setInput(job, DateCol.class, DATE_TABLE, null, COL, COL); boolean ret = job.waitForCompletion(true); assertTrue("job failed", ret); // Check to see that we imported as much as we thought we did. assertEquals("Did not get all the records", 4, job.getCounters() .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue()); } finally { s.close(); } }
From source file:com.cloudera.sqoop.mapreduce.MergeJob.java
License:Apache License
public boolean runMergeJob() throws IOException { Configuration conf = options.getConf(); Job job = new Job(conf); String userClassName = options.getClassName(); if (null == userClassName) { // Shouldn't get here. throw new IOException("Record class name not specified with " + "--class-name."); }/*from w w w . j a v a 2 s . c o m*/ // Set the external jar to use for the job. String existingJar = options.getExistingJarName(); if (existingJar != null) { // User explicitly identified a jar path. LOG.debug("Setting job jar to user-specified jar: " + existingJar); job.getConfiguration().set("mapred.jar", existingJar); } else { // Infer it from the location of the specified class, if it's on the // classpath. try { Class<? extends Object> userClass = conf.getClassByName(userClassName); if (null != userClass) { String userJar = Jars.getJarPathForClass(userClass); LOG.debug("Setting job jar based on user class " + userClassName + ": " + userJar); job.getConfiguration().set("mapred.jar", userJar); } else { LOG.warn("Specified class " + userClassName + " is not in a jar. " + "MapReduce may not find the class"); } } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } } try { Path oldPath = new Path(options.getMergeOldPath()); Path newPath = new Path(options.getMergeNewPath()); Configuration jobConf = job.getConfiguration(); FileSystem fs = FileSystem.get(jobConf); oldPath = oldPath.makeQualified(fs); newPath = newPath.makeQualified(fs); FileInputFormat.addInputPath(job, oldPath); FileInputFormat.addInputPath(job, newPath); jobConf.set(MERGE_OLD_PATH_KEY, oldPath.toString()); jobConf.set(MERGE_NEW_PATH_KEY, newPath.toString()); jobConf.set(MERGE_KEY_COL_KEY, options.getMergeKeyCol()); jobConf.set(MERGE_SQOOP_RECORD_KEY, userClassName); FileOutputFormat.setOutputPath(job, new Path(options.getTargetDir())); if (ExportJobBase.isSequenceFiles(jobConf, newPath)) { job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(MergeRecordMapper.class); } else { job.setMapperClass(MergeTextMapper.class); job.setOutputFormatClass(RawKeyTextOutputFormat.class); } jobConf.set("mapred.output.key.class", userClassName); job.setOutputValueClass(NullWritable.class); job.setReducerClass(MergeReducer.class); // Set the intermediate data types. job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(MergeRecord.class); // Make sure Sqoop and anything else we need is on the classpath. cacheJars(job, null); return this.runJob(job); } catch (InterruptedException ie) { throw new IOException(ie); } catch (ClassNotFoundException cnfe) { throw new IOException(cnfe); } }
From source file:com.cloudera.traffic.AveragerRunner.java
License:Apache License
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException { Configuration conf = new Configuration(); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = new Job(conf); job.setJarByClass(AveragerRunner.class); job.setMapperClass(AveragerMapper.class); job.setReducerClass(AveragerReducer.class); job.setCombinerClass(AveragerReducer.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(AverageWritable.class); job.setInputFormatClass(TextInputFormat.class); FileInputFormat.addInputPath(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); job.waitForCompletion(true);//from www .j a v a 2s. c o m }