List of usage examples for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath
public static void setOutputPath(Job job, Path outputDir)
From source file:com.littlehotspot.hadoop.mr.nginx.module.hdfs2hbase.api.user.UserScheduler.java
License:Open Source License
@Override public int run(String[] args) throws Exception { try {// w w w . j a v a 2 s . co m CommonVariables.initMapReduce(this.getConf(), args);// ? MAP REDUCE CommonVariables.hBaseHelper = new HBaseHelper(this.getConf()); // ?? String matcherRegex = CommonVariables.getParameterValue(Argument.MapperInputFormatRegex); String hdfsInputPath = CommonVariables.getParameterValue(Argument.InputPath); String hdfsOutputPath = CommonVariables.getParameterValue(Argument.OutputPath); // ??? if (StringUtils.isNotBlank(matcherRegex)) { CommonVariables.MAPPER_INPUT_FORMAT_REGEX = Pattern.compile(matcherRegex); } Path inputPath = new Path(hdfsInputPath); Path outputPath = new Path(hdfsOutputPath); Job job = Job.getInstance(this.getConf(), this.getClass().getName()); job.setJarByClass(this.getClass()); job.setMapperClass(UserMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(UserReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); FileInputFormat.addInputPath(job, inputPath); FileOutputFormat.setOutputPath(job, outputPath); FileSystem fileSystem = FileSystem.get(new URI(outputPath.toString()), this.getConf()); if (fileSystem.exists(outputPath)) { fileSystem.delete(outputPath, true); } // boolean state = job.waitForCompletion(true); if (!state) { throw new Exception("MapReduce task execute failed........."); } return 0; } catch (Exception e) { e.printStackTrace(); return 1; } }
From source file:com.main.MRSearchMain.java
public void searchHBase(int numOfDays) throws IOException, InterruptedException, ClassNotFoundException { long startTime; long endTime; String path = "/home/hadoop/app/hadoop-2.0.0-cdh4.3.0/etc/hadoop/"; Configuration conf = HBaseConfiguration.create(); // conf.set("hbase.zookeeper.quorum", "streamslab.localdomain"); // conf.set("fs.default.name", "hdfs://streamslab.localdomain:8020"); // conf.set("mapred.job.tracker", "hdfs://streamslab.localdomain:50300"); // conf.set("fs.hdfs.impl", // org.apache.hadoop.hdfs.DistributedFileSystem.class.getName()); conf.set("fs.file.impl", org.apache.hadoop.fs.LocalFileSystem.class.getName()); //?,FileSystem? conf.addResource(new Path(path + "core-site.xml")); conf.addResource(new Path(path + "hdfs-site.xml")); conf.addResource(new Path(path + "mapred-site.xml")); /* //from w w w . ja v a 2 s .c om * ?map */ conf.set("search.license", "C87310"); conf.set("search.color", "10"); conf.set("search.direction", "2"); Job job = new Job(conf, "MRSearchHBase"); System.out.println("search.license: " + conf.get("search.license")); job.setNumReduceTasks(0); job.setJarByClass(MRSearchMain.class); Scan scan = new Scan(); scan.addFamily(FAMILY_NAME); byte[] startRow = Bytes.toBytes("2011010100000"); byte[] stopRow; switch (numOfDays) { case 1: stopRow = Bytes.toBytes("2011010200000"); break; case 10: stopRow = Bytes.toBytes("2011011100000"); break; case 30: stopRow = Bytes.toBytes("2011020100000"); break; case 365: stopRow = Bytes.toBytes("2012010100000"); break; default: stopRow = Bytes.toBytes("2011010101000"); } // ?key scan.setStartRow(startRow); scan.setStopRow(stopRow); TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, SearchMapper.class, ImmutableBytesWritable.class, Text.class, job); Path outPath = new Path("searchresult"); LOG.info("outPath:" + outPath.toString()); //hdfs FileSystem file = null; try { file = FileSystem.get(conf); } catch (IOException e) { e.printStackTrace(); } // HDFS_File file = new HDFS_File(); // file.DelFile(conf, outPath.getName(), true); // //"hdfs://streamslab.localdomain:8020/ if (file.exists(outPath)) { file.delete(outPath, true); LOG.info("=====delPath " + outPath.toString() + "====="); } FileOutputFormat.setOutputPath(job, outPath);// startTime = System.currentTimeMillis(); job.waitForCompletion(true); endTime = System.currentTimeMillis(); LOG.info("Time used: " + (endTime - startTime)); LOG.info("startRow:" + Text.decode(startRow)); LOG.info("stopRow: " + Text.decode(stopRow)); }
From source file:com.marklogic.mapreduce.examples.BinaryReader.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); if (args.length < 2) { System.err.println("Usage: BinaryReader configFile outputDir"); System.exit(2);/*from w ww . j av a 2 s . co m*/ } String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = Job.getInstance(conf, "binary reader"); job.setJarByClass(BinaryReader.class); job.setInputFormatClass(DocumentInputFormat.class); job.setMapperClass(DocMapper.class); job.setMapOutputKeyClass(DocumentURI.class); job.setMapOutputValueClass(BytesWritable.class); job.setOutputFormatClass(BinaryOutputFormat.class); job.setOutputKeyClass(DocumentURI.class); job.setOutputValueClass(BytesWritable.class); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); conf = job.getConfiguration(); conf.addResource(otherArgs[0]); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.marklogic.mapreduce.examples.LinkCount.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); if (args.length < 2) { System.err.println("Usage: LinkCount configFile outputDir"); System.exit(2);/*from www. jav a 2 s . c o m*/ } String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = Job.getInstance(conf, "link count"); job.setJarByClass(LinkCount.class); job.setInputFormatClass(ValueInputFormat.class); job.setMapperClass(RefMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); conf = job.getConfiguration(); conf.addResource(otherArgs[0]); conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, Writable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.marklogic.mapreduce.examples.LinkCountCooccurrences.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); if (args.length < 2) { System.err.println("Usage: LinkCountCooccurrences configFile outputDir"); System.exit(2);/*from ww w . ja v a2 s . c o m*/ } String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = Job.getInstance(conf, "link count cooccurrences"); job.setJarByClass(LinkCountCooccurrences.class); job.setInputFormatClass(KeyValueInputFormat.class); job.setMapperClass(RefMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); conf = job.getConfiguration(); conf.addResource(otherArgs[0]); conf.setClass(MarkLogicConstants.INPUT_KEY_CLASS, Text.class, Writable.class); conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, Writable.class); conf.setClass(MarkLogicConstants.INPUT_LEXICON_FUNCTION_CLASS, HrefTitleMap.class, ElemAttrValueCooccurrences.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.marklogic.mapreduce.examples.LinkCountHDFS.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); if (args.length < 2) { System.err.println("Usage: LinkCountHDFS inputDir outputDir"); System.exit(2);/*from ww w . jav a2s . c o m*/ } String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = Job.getInstance(conf, "link count hdfs"); job.setJarByClass(LinkCountHDFS.class); job.setInputFormatClass(HDFSInputFormat.class); job.setMapperClass(RefMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setReducerClass(IntSumReducer.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); HDFSInputFormat.setInputPaths(job, new Path(otherArgs[0])); FileOutputFormat.setOutputPath(job, new Path(otherArgs[1])); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.marklogic.mapreduce.examples.LinkCountValue.java
License:Apache License
public static void main(String[] args) throws Exception { Configuration conf = new Configuration(); if (args.length < 2) { System.err.println("Usage: LinkCountValue configFile outputDir"); System.exit(2);/*from ww w . j a v a 2 s .c om*/ } String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); Job job = Job.getInstance(conf, "link count value"); job.setJarByClass(LinkCountValue.class); job.setInputFormatClass(ValueInputFormat.class); job.setMapperClass(RefMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputFormatClass(TextOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); FileOutputFormat.setOutputPath(job, new Path(args[1])); conf = job.getConfiguration(); conf.addResource(otherArgs[0]); conf.setClass(MarkLogicConstants.INPUT_VALUE_CLASS, Text.class, Writable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.mb.saas.bi.job.WordCountJob.java
License:Apache License
public static boolean runHadoopMapReduceJob() throws Exception { System.setProperty("HADOOP_USER_NAME", "hadoop"); File jarFile = UploadResource.createTempJar("bin"); ClassLoader classLoader = UploadResource.getClassLoader(); Thread.currentThread().setContextClassLoader(classLoader); Configuration conf = new Configuration(); conf.set("fs.defaultFS", "hdfs://mbcluster/"); conf.set("dfs.nameservices", "mbcluster"); conf.set("dfs.ha.namenodes.mbcluster", "ns1,ns2"); conf.set("dfs.namenode.rpc-address.mbcluster.ns1", "master:4001"); conf.set("dfs.namenode.rpc-address.mbcluster.ns2", "backup:4001"); conf.set("dfs.client.failover.proxy.provider.mbcluster", "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"); Job job = new Job(conf, "word count"); job.setJarByClass(WordCountJob.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); if (jarFile != null) ((JobConf) job.getConfiguration()).setJar(jarFile.getAbsolutePath()); boolean isMapReduceJarSetted = false; String hadoopMapReduceJar = "F:/henry_projects/mbHiveAnalyzer/t.jar"; File file = new File(hadoopMapReduceJar); if (file.exists()) { ((JobConf) job.getConfiguration()).setJar(hadoopMapReduceJar); isMapReduceJarSetted = true;//w w w .j ava 2 s. co m } if (!isMapReduceJarSetted && jarFile != null) ((JobConf) job.getConfiguration()).setJar(jarFile.getAbsolutePath()); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path("/input/wordcount.txt")); FileOutputFormat.setOutputPath(job, new Path("/output/001")); System.exit(job.waitForCompletion(true) ? 0 : 1); return true; }
From source file:com.mb.saas.bi.job.WordCountJob.java
License:Apache License
public static void main(String[] args) throws Exception { System.setProperty("HADOOP_USER_NAME", "hadoop"); File jarFile = UploadResource.createTempJar("bin"); System.setProperty("hadoop.home.dir", "F:/hadoop"); ClassLoader classLoader = UploadResource.getClassLoader(); Thread.currentThread().setContextClassLoader(classLoader); Configuration conf = new Configuration(); // conf.set("fs.defaultFS", "hdfs://slave1:4001"); // conf.set("mapreduce.framework.name", "yarn"); // conf.set("yarn.resourcemanager.address", "master:8032"); // conf.set("yarn.resourcemanager.scheduler.address", "master:8030"); conf.set("fs.defaultFS", "hdfs://mbcluster/"); conf.set("dfs.nameservices", "mbcluster"); conf.set("dfs.ha.namenodes.mbcluster", "ns1,ns2"); conf.set("dfs.namenode.rpc-address.mbcluster.ns1", "master:4001"); conf.set("dfs.namenode.rpc-address.mbcluster.ns2", "backup:4001"); conf.set("dfs.client.failover.proxy.provider.mbcluster", "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider"); conf.set("mapred.remote.os", "Linux"); System.out.println(conf.get("mapred.remote.os")); // conf.set("mapreduce.job.reduces", "2"); // conf.set("mapreduce.tasktracker.map.tasks.maximum", "8"); // conf.set("mapreduce.input.fileinputformat.split.maxsize","123"); Job job = new Job(conf, "word count"); job.setJarByClass(WordCountJob.class); job.setMapperClass(TokenizerMapper.class); job.setCombinerClass(IntSumReducer.class); job.setReducerClass(IntSumReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); if (jarFile != null) ((JobConf) job.getConfiguration()).setJar(jarFile.getAbsolutePath()); // job.setMaxMapAttempts(2); job.setNumReduceTasks(1);//from w w w .j a v a 2s. co m FileInputFormat.addInputPath(job, new Path("/input/wordcount2.txt")); // FileInputFormat.addInputPath(job, new Path("/input/wordcount2.txt")); FileOutputFormat.setOutputPath(job, new Path("/output/001002")); System.exit(job.waitForCompletion(true) ? 0 : 1); }
From source file:com.metamx.druid.indexer.DeterminePartitionsJob.java
License:Open Source License
public boolean run() { try {/*from w w w .ja v a 2s. c o m*/ /* * Group by (timestamp, dimensions) so we can correctly count dimension values as they would appear * in the final segment. */ if (!config.getPartitionsSpec().isAssumeGrouped()) { final Job groupByJob = new Job(new Configuration(), String.format( "%s-determine_partitions_groupby-%s", config.getDataSource(), config.getIntervals())); injectSystemProperties(groupByJob); groupByJob.setInputFormatClass(TextInputFormat.class); groupByJob.setMapperClass(DeterminePartitionsGroupByMapper.class); groupByJob.setMapOutputKeyClass(BytesWritable.class); groupByJob.setMapOutputValueClass(NullWritable.class); groupByJob.setCombinerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setReducerClass(DeterminePartitionsGroupByReducer.class); groupByJob.setOutputKeyClass(BytesWritable.class); groupByJob.setOutputValueClass(NullWritable.class); groupByJob.setOutputFormatClass(SequenceFileOutputFormat.class); groupByJob.setJarByClass(DeterminePartitionsJob.class); config.addInputPaths(groupByJob); config.intoConfiguration(groupByJob); FileOutputFormat.setOutputPath(groupByJob, config.makeGroupedDataDir()); groupByJob.submit(); log.info("Job %s submitted, status available at: %s", groupByJob.getJobName(), groupByJob.getTrackingURL()); if (!groupByJob.waitForCompletion(true)) { log.error("Job failed: %s", groupByJob.getJobID()); return false; } } else { log.info("Skipping group-by job."); } /* * Read grouped data and determine appropriate partitions. */ final Job dimSelectionJob = new Job(new Configuration(), String.format( "%s-determine_partitions_dimselection-%s", config.getDataSource(), config.getIntervals())); dimSelectionJob.getConfiguration().set("io.sort.record.percent", "0.19"); injectSystemProperties(dimSelectionJob); if (!config.getPartitionsSpec().isAssumeGrouped()) { // Read grouped data from the groupByJob. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionPostGroupByMapper.class); dimSelectionJob.setInputFormatClass(SequenceFileInputFormat.class); FileInputFormat.addInputPath(dimSelectionJob, config.makeGroupedDataDir()); } else { // Directly read the source data, since we assume it's already grouped. dimSelectionJob.setMapperClass(DeterminePartitionsDimSelectionAssumeGroupedMapper.class); dimSelectionJob.setInputFormatClass(TextInputFormat.class); config.addInputPaths(dimSelectionJob); } SortableBytes.useSortableBytesAsMapOutputKey(dimSelectionJob); dimSelectionJob.setMapOutputValueClass(Text.class); dimSelectionJob.setCombinerClass(DeterminePartitionsDimSelectionCombiner.class); dimSelectionJob.setReducerClass(DeterminePartitionsDimSelectionReducer.class); dimSelectionJob.setOutputKeyClass(BytesWritable.class); dimSelectionJob.setOutputValueClass(Text.class); dimSelectionJob.setOutputFormatClass(DeterminePartitionsDimSelectionOutputFormat.class); dimSelectionJob.setJarByClass(DeterminePartitionsJob.class); config.intoConfiguration(dimSelectionJob); FileOutputFormat.setOutputPath(dimSelectionJob, config.makeIntermediatePath()); dimSelectionJob.submit(); log.info("Job %s submitted, status available at: %s", dimSelectionJob.getJobName(), dimSelectionJob.getTrackingURL()); if (!dimSelectionJob.waitForCompletion(true)) { log.error("Job failed: %s", dimSelectionJob.getJobID().toString()); return false; } /* * Load partitions determined by the previous job. */ log.info("Job completed, loading up partitions for intervals[%s].", config.getSegmentGranularIntervals()); FileSystem fileSystem = null; Map<DateTime, List<HadoopyShardSpec>> shardSpecs = Maps.newTreeMap(DateTimeComparator.getInstance()); int shardCount = 0; for (Interval segmentGranularity : config.getSegmentGranularIntervals()) { DateTime bucket = segmentGranularity.getStart(); final Path partitionInfoPath = config.makeSegmentPartitionInfoPath(new Bucket(0, bucket, 0)); if (fileSystem == null) { fileSystem = partitionInfoPath.getFileSystem(dimSelectionJob.getConfiguration()); } if (fileSystem.exists(partitionInfoPath)) { List<ShardSpec> specs = config.jsonMapper.readValue( Utils.openInputStream(dimSelectionJob, partitionInfoPath), new TypeReference<List<ShardSpec>>() { }); List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size()); for (int i = 0; i < specs.size(); ++i) { actualSpecs.add(new HadoopyShardSpec(specs.get(i), shardCount++)); log.info("DateTime[%s], partition[%d], spec[%s]", bucket, i, actualSpecs.get(i)); } shardSpecs.put(bucket, actualSpecs); } else { log.info("Path[%s] didn't exist!?", partitionInfoPath); } } config.setShardSpecs(shardSpecs); return true; } catch (Exception e) { throw Throwables.propagate(e); } }