List of usage examples for org.apache.hadoop.mapreduce.lib.output LazyOutputFormat setOutputFormatClass
@SuppressWarnings("unchecked") public static void setOutputFormatClass(Job job, Class<? extends OutputFormat> theClass)
From source file:org.apache.sqoop.mapreduce.mainframe.MainframeImportJob.java
License:Apache License
@Override protected void configureOutputFormat(Job job, String tableName, String tableClassName) throws ClassNotFoundException, IOException { super.configureOutputFormat(job, tableName, tableClassName); LazyOutputFormat.setOutputFormatClass(job, getOutputFormatClass()); }
From source file:org.jc.mrsqoophelper.main.SqoopHelperMain.java
public static void main(String[] args) { if (args == null || args.length < 6) { System.out.println("Incorrect usage of tool:"); System.out.println(/*from ww w . j a va 2 s . c o m*/ "hadoop jar program.jar <package_name> <json_schema_file> <jar_classpath> <absolute_src_path> <field constraint output_file cast_to;field constraint output_path> <input_file_1;input_file_2;input_file_3> <output_file1;output_file2;..."); return; } String packageName = args[0]; String jsonPath = args[1]; String classPath = args[2]; String absolutePathOfSrc = args[3]; //Pair field constraint ouput_file, do not join with logical operators. //Example: date gt 'yyyy/MM/dd' ouput_file // date lt 'yyyy/MM/dd' output_file //This is correct! //But: //date gt 'yyyy/MM/dd' and date lt 'yyyy/MM/dd' //This is wrong!! //Conditions will be appended with AND //separate conditions with semi-colons. //conditions with same output file will be appended with AND operator String fieldToFilter = args[4]; //separate input files by comma String inputAvroFiles = args[5]; String outputAvroFiles = args[6]; try { Path pathOfSchema = Paths.get(jsonPath); fieldToFilter = fieldToFilter.replace(CMD_LINE_TRIPLET_ORIGINAL_DELIM, TRIPLET_ELEMENTS_DELIMITER_SYMBOL); String schemaAsJson = Files.readAllLines(pathOfSchema, Charset.forName("UTF-8")).get(0); Class recordClass = Utils.ClassBuilder(schemaAsJson, packageName, classPath, absolutePathOfSrc); Configuration conf = new Configuration(); conf.setStrings(FIELD_COND_OUTPUT_TRIPLET, fieldToFilter.split(";")); conf.set(TRIPLET_ELEMENTS_DELIMITER, TRIPLET_ELEMENTS_DELIMITER_SYMBOL); conf.set(FQCN_RECORD_CLASS, recordClass.getName()); conf.set(AVRO_SCHEMA_AS_JSON, schemaAsJson); conf.set(SRC_ABSOLUTE_PATH, absolutePathOfSrc); conf.set(CLASS_ABSOLUTE_PATH, classPath); conf.set(PACKAGE_NAME, packageName); Job job = new Job(conf); //Set context for mapper and reducer. job.setJobName("Filter Records for Sqoop Export"); job.setInputFormatClass(AvroKeyInputFormat.class); job.setMapperClass(Mapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setReducerClass(Reducer.class); String[] argPaths = inputAvroFiles.split(";"); org.apache.hadoop.fs.Path[] paths = new org.apache.hadoop.fs.Path[argPaths.length]; for (int i = 0; i < argPaths.length; ++i) { paths[i] = new org.apache.hadoop.fs.Path(argPaths[i]); } org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths(job, paths); LazyOutputFormat.setOutputFormatClass(job, NullOutputFormat.class); //creating output files Schema avsc = new Schema.Parser().parse(schemaAsJson); DataFileWriter<Object> dfw = new DataFileWriter<>(new GenericDatumWriter<>(avsc)); for (String out : outputAvroFiles.split(";")) { dfw.create(avsc, new File(out)); dfw.close(); } /*YarnLocalCluster yarnLocalCluster = new YarnLocalCluster.Builder() .setNumNodeManagers(1) .setNumLocalDirs(Integer.parseInt("1")) .setNumLogDirs(Integer.parseInt("1")) .setResourceManagerAddress("localhost") .setResourceManagerHostname("localhost:37001") .setResourceManagerSchedulerAddress("localhost:37002") .setResourceManagerResourceTrackerAddress("localhost:37003") .setResourceManagerWebappAddress("localhost:37004") .setUseInJvmContainerExecutor(true) .setConfig(conf) .build(); yarnLocalCluster.start();*/ System.exit(job.waitForCompletion(true) ? 0 : 1); } catch (Exception ex) { Logger.getLogger(SqoopHelperMain.class.getName()).log(Level.SEVERE, null, ex); System.out.println("Could not generate class for avro schema."); } }
From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java
License:Open Source License
protected boolean runDictionaryJobSampling() throws IOException, ClassNotFoundException, InterruptedException { boolean jobOK; Job job = null;/*from w ww .ja v a 2 s . c o m*/ // if input path does not exists, fail if (!this.inputFS.exists(this.conf.getInputPath())) { System.out.println("Dictionary input path does not exist: " + this.conf.getInputPath()); System.exit(-1); } // if samples path exists... if (this.dictionaryFS.exists(this.conf.getDictionarySamplesPath())) { if (this.conf.getDeleteDictionarySamplesPath()) { // ... and option provided, delete recursively this.dictionaryFS.delete(this.conf.getDictionarySamplesPath(), true); } else { // ... and option not provided, fail System.out.println("Dictionary samples path does exist: " + this.conf.getDictionarySamplesPath()); System.out.println("Select other path or use option -ds to overwrite"); System.exit(-1); } } // Job to create a SequenceInputFormat with Roles job = new Job(this.conf.getConfigurationObject(), this.conf.getDictionaryJobName() + " phase 1"); job.setJarByClass(HDTBuilderDriver.class); System.out.println("input = " + this.conf.getInputPath()); System.out.println("samples = " + this.conf.getDictionarySamplesPath()); FileInputFormat.addInputPath(job, this.conf.getInputPath()); FileOutputFormat.setOutputPath(job, this.conf.getDictionarySamplesPath()); job.setInputFormatClass(LzoTextInputFormat.class); LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); job.setMapperClass(DictionarySamplerMapper.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setCombinerClass(DictionarySamplerReducer.class); job.setReducerClass(DictionarySamplerReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); job.setNumReduceTasks(this.conf.getDictionarySampleReducers()); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); jobOK = job.waitForCompletion(true); return jobOK; }
From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java
License:Open Source License
protected boolean runDictionaryJob() throws ClassNotFoundException, IOException, InterruptedException, URISyntaxException { boolean jobOK; Job job = null;// www. j a va 2 s .c o m BufferedWriter bufferedWriter; // if output path exists... if (this.dictionaryFS.exists(this.conf.getDictionaryOutputPath())) { if (this.conf.getDeleteDictionaryOutputPath()) { // ... and option provided, delete recursively this.dictionaryFS.delete(this.conf.getDictionaryOutputPath(), true); } else { // ... and option not provided, fail System.out.println("Dictionary output path does exist: " + this.conf.getDictionaryOutputPath()); System.out.println("Select other path or use option -dd to overwrite"); System.exit(-1); } } // Sample the SequenceInputFormat to do TotalSort and create final output job = new Job(this.conf.getConfigurationObject(), this.conf.getDictionaryJobName() + " phase 2"); job.setJarByClass(HDTBuilderDriver.class); System.out.println("samples = " + this.conf.getDictionarySamplesPath()); System.out.println("output = " + this.conf.getDictionaryOutputPath()); FileInputFormat.addInputPath(job, this.conf.getDictionarySamplesPath()); FileOutputFormat.setOutputPath(job, this.conf.getDictionaryOutputPath()); job.setInputFormatClass(SequenceFileInputFormat.class); LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); // Identity Mapper // job.setMapperClass(Mapper.class); job.setCombinerClass(DictionaryCombiner.class); job.setPartitionerClass(TotalOrderPartitioner.class); job.setReducerClass(DictionaryReducer.class); job.setNumReduceTasks(this.conf.getDictionaryReducers()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); System.out.println("Sampling started"); InputSampler.writePartitionFile(job, new InputSampler.IntervalSampler<Text, Text>(this.conf.getSampleProbability())); String partitionFile = TotalOrderPartitioner.getPartitionFile(job.getConfiguration()); URI partitionUri = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH); DistributedCache.addCacheFile(partitionUri, job.getConfiguration()); DistributedCache.createSymlink(job.getConfiguration()); System.out.println("Sampling finished"); MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SHARED, SequenceFileOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SUBJECTS, SequenceFileOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.PREDICATES, SequenceFileOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.OBJECTS, SequenceFileOutputFormat.class, Text.class, NullWritable.class); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); jobOK = job.waitForCompletion(true); this.numShared = job.getCounters().findCounter(Counters.Shared).getValue(); this.numSubjects = job.getCounters().findCounter(Counters.Subjects).getValue(); this.numPredicates = job.getCounters().findCounter(Counters.Predicates).getValue(); this.numObjects = job.getCounters().findCounter(Counters.Objects).getValue(); bufferedWriter = new BufferedWriter( new OutputStreamWriter(this.dictionaryFS.create(this.conf.getDictionaryCountersFile()))); bufferedWriter.write(HDTBuilderConfiguration.SHARED + "=" + this.numShared + "\n"); bufferedWriter.write(HDTBuilderConfiguration.SUBJECTS + "=" + this.numSubjects + "\n"); bufferedWriter.write(HDTBuilderConfiguration.PREDICATES + "=" + this.numPredicates + "\n"); bufferedWriter.write(HDTBuilderConfiguration.OBJECTS + "=" + this.numObjects + "\n"); bufferedWriter.close(); return jobOK; }
From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java
License:Open Source License
protected boolean runDictionaryJobWithOneJob() throws ClassNotFoundException, IOException, InterruptedException, URISyntaxException { boolean jobOK; Job job = null;/*w w w. j a v a2s .c om*/ BufferedWriter bufferedWriter; // if input path does not exists, fail if (!this.inputFS.exists(this.conf.getInputPath())) { System.out.println("Dictionary input path does not exist: " + this.conf.getInputPath()); System.exit(-1); } // if output path exists... if (this.dictionaryFS.exists(this.conf.getDictionaryOutputPath())) { if (this.conf.getDeleteDictionaryOutputPath()) { // ... and option provided, delete recursively this.dictionaryFS.delete(this.conf.getDictionaryOutputPath(), true); } else { // ... and option not provided, fail System.out.println("Dictionary output path does exist: " + this.conf.getDictionaryOutputPath()); System.out.println("Select other path or use option -dd to overwrite"); System.exit(-1); } } // Launch job job = new Job(this.conf.getConfigurationObject(), this.conf.getTriplesJobName()); job.setJarByClass(HDTBuilderDriver.class); FileInputFormat.addInputPath(job, this.conf.getInputPath()); FileOutputFormat.setOutputPath(job, this.conf.getDictionaryOutputPath()); job.setInputFormatClass(LzoTextInputFormat.class); LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); job.setMapperClass(DictionaryMapper.class); job.setCombinerClass(DictionaryCombiner.class); job.setReducerClass(DictionaryReducer.class); job.setNumReduceTasks(this.conf.getDictionaryReducers()); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SHARED, SequenceFileOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SUBJECTS, SequenceFileOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.PREDICATES, SequenceFileOutputFormat.class, Text.class, NullWritable.class); MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.OBJECTS, SequenceFileOutputFormat.class, Text.class, NullWritable.class); jobOK = job.waitForCompletion(true); this.numShared = job.getCounters().findCounter(Counters.Shared).getValue(); this.numSubjects = job.getCounters().findCounter(Counters.Subjects).getValue(); this.numPredicates = job.getCounters().findCounter(Counters.Predicates).getValue(); this.numObjects = job.getCounters().findCounter(Counters.Objects).getValue(); bufferedWriter = new BufferedWriter( new OutputStreamWriter(this.dictionaryFS.create(this.conf.getDictionaryCountersFile()))); bufferedWriter.write(HDTBuilderConfiguration.SHARED + "=" + this.numShared + "\n"); bufferedWriter.write(HDTBuilderConfiguration.SUBJECTS + "=" + this.numSubjects + "\n"); bufferedWriter.write(HDTBuilderConfiguration.PREDICATES + "=" + this.numPredicates + "\n"); bufferedWriter.write(HDTBuilderConfiguration.OBJECTS + "=" + this.numObjects + "\n"); bufferedWriter.close(); return jobOK; }
From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java
License:Open Source License
protected boolean runTriplesJobSampling() throws ClassNotFoundException, IOException, InterruptedException { Job job = null;/* w ww .j ava 2 s. c o m*/ boolean jobOK; BufferedWriter bufferedWriter; // if input path does not exists, fail if (!this.inputFS.exists(this.conf.getInputPath())) { System.out.println("Dictionary input path does not exist: " + this.conf.getInputPath()); System.exit(-1); } // if dictionary output path does not exists, fail if (!this.dictionaryFS.exists(this.conf.getInputPath())) { System.out.println("Dictionary output path does not exist: " + this.conf.getInputPath()); System.exit(-1); } // if samples path exists, fail if (this.dictionaryFS.exists(this.conf.getTriplesSamplesPath())) { if (this.conf.getDeleteTriplesSamplesPath()) { // ... and option // provided, delete // recursively this.dictionaryFS.delete(this.conf.getTriplesSamplesPath(), true); } else { // ... and option not provided, fail System.out.println("Triples samples path does exist: " + this.conf.getTriplesSamplesPath()); System.out.println("Select other path or use option -dst to overwrite"); System.exit(-1); } } this.conf.setProperty("mapred.child.java.opts", "-XX:ErrorFile=/home/hadoop/tmp/hs_err_pid%p.log -Xmx2500m"); // Job to create a SequenceInputFormat job = new Job(this.conf.getConfigurationObject(), this.conf.getTriplesJobName() + " phase 1"); job.setJarByClass(HDTBuilderDriver.class); FileInputFormat.addInputPath(job, this.conf.getInputPath()); FileOutputFormat.setOutputPath(job, this.conf.getTriplesSamplesPath()); job.setInputFormatClass(LzoTextInputFormat.class); LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); job.setMapperClass(TriplesSPOMapper.class); job.setSortComparatorClass(TripleSPOComparator.class); job.setGroupingComparatorClass(TripleSPOComparator.class); job.setMapOutputKeyClass(TripleSPOWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setOutputKeyClass(TripleSPOWritable.class); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(this.conf.getTriplesReducers()); DistributedCache.addCacheFile(this.conf.getDictionaryFile().toUri(), job.getConfiguration()); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); jobOK = job.waitForCompletion(true); this.numTriples = job.getCounters().findCounter(Counters.Triples).getValue(); bufferedWriter = new BufferedWriter( new OutputStreamWriter(this.triplesFS.create(this.conf.getTriplesCountersFile()))); bufferedWriter.write(this.numTriples.toString() + "\n"); bufferedWriter.close(); return jobOK; }
From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java
License:Open Source License
protected boolean runTriplesJob() throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException { Job job = null;/*from www .j a v a 2s . co m*/ boolean jobOK; // if triples output path exists... if (this.triplesFS.exists(this.conf.getTriplesOutputPath())) { if (this.conf.getDeleteTriplesOutputPath()) { // ... and option provided, delete recursively this.triplesFS.delete(this.conf.getTriplesOutputPath(), true); } else { // ... and option not provided, fail System.out.println("Triples output path does exist: " + this.conf.getTriplesOutputPath()); System.out.println("Select other path or use option -dt to overwrite"); System.exit(-1); } } job = new Job(this.conf.getConfigurationObject(), this.conf.getTriplesJobName() + " phase 2"); job.setJarByClass(HDTBuilderDriver.class); FileInputFormat.addInputPath(job, this.conf.getTriplesSamplesPath()); FileOutputFormat.setOutputPath(job, this.conf.getTriplesOutputPath()); job.setInputFormatClass(SequenceFileInputFormat.class); LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); job.setSortComparatorClass(TripleSPOComparator.class); job.setGroupingComparatorClass(TripleSPOComparator.class); job.setPartitionerClass(TotalOrderPartitioner.class); job.setOutputKeyClass(TripleSPOWritable.class); job.setOutputValueClass(NullWritable.class); job.setNumReduceTasks(this.conf.getTriplesReducers()); System.out.println("Sampling started"); InputSampler.writePartitionFile(job, new InputSampler.IntervalSampler<Text, Text>(this.conf.getSampleProbability())); String partitionFile = TotalOrderPartitioner.getPartitionFile(job.getConfiguration()); URI partitionUri = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH); DistributedCache.addCacheFile(partitionUri, job.getConfiguration()); DistributedCache.createSymlink(job.getConfiguration()); System.out.println("Sampling finished"); SequenceFileOutputFormat.setCompressOutput(job, true); SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class); SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK); jobOK = job.waitForCompletion(true); return jobOK; }
From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java
License:Open Source License
protected boolean runTriplesJobWithOneJob() throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException { Job job = null;//www . j av a 2 s . com boolean jobOK; BufferedWriter bufferedWriter; // if input path does not exists, fail if (!this.inputFS.exists(this.conf.getInputPath())) { System.out.println("Dictionary input path does not exist: " + this.conf.getInputPath()); System.exit(-1); } // if dictionary output path does not exists, fail if (!this.dictionaryFS.exists(this.conf.getInputPath())) { System.out.println("Dictionary output path does not exist: " + this.conf.getInputPath()); System.exit(-1); } // if triples output path exists... if (this.triplesFS.exists(this.conf.getTriplesOutputPath())) { if (this.conf.getDeleteTriplesOutputPath()) { // ... and option provided, delete recursively this.triplesFS.delete(this.conf.getTriplesOutputPath(), true); } else { // ... and option not provided, fail System.out.println("Triples output path does exist: " + this.conf.getTriplesOutputPath()); System.out.println("Select other path or use option -dt to overwrite"); System.exit(-1); } } // Launch job this.conf.setProperty("mapred.child.java.opts", "-XX:ErrorFile=/home/hadoop/tmp/hs_err_pid%p.log -Xmx2500m"); job = new Job(this.conf.getConfigurationObject(), this.conf.getDictionaryJobName()); job.setJarByClass(HDTBuilderDriver.class); FileInputFormat.addInputPath(job, this.conf.getInputPath()); FileOutputFormat.setOutputPath(job, this.conf.getTriplesOutputPath()); job.setInputFormatClass(LzoTextInputFormat.class); LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class); job.setMapperClass(TriplesSPOMapper.class); job.setSortComparatorClass(TripleSPOComparator.class); job.setMapOutputKeyClass(TripleSPOWritable.class); job.setMapOutputValueClass(NullWritable.class); job.setNumReduceTasks(this.conf.getTriplesReducers()); job.setOutputKeyClass(TripleSPOWritable.class); job.setOutputValueClass(NullWritable.class); DistributedCache.addCacheFile(this.conf.getDictionaryFile().toUri(), job.getConfiguration()); // DistributedCache.addCacheFile(this.conf.getDictionaryMapFile().toUri(), job.getConfiguration()); // DistributedCache.addCacheFile(this.conf.getDictionaryReduceFile().toUri(), job.getConfiguration()); jobOK = job.waitForCompletion(true); this.numTriples = job.getCounters().findCounter(Counters.Triples).getValue(); bufferedWriter = new BufferedWriter( new OutputStreamWriter(this.triplesFS.create(this.conf.getTriplesCountersFile()))); bufferedWriter.write(this.numTriples.toString() + "\n"); bufferedWriter.close(); return jobOK; }
From source file:tv.icntv.log.stb.filter.FilterJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration configuration = getConf(); Path input = new Path(args[0]); Path output = new Path(args[1]); Job stbFilterJob = Job.getInstance(configuration, "stb parser first:filter by rule file"); //setting job configuration ..... stbFilterJob.setMapperClass(FilterMapper.class); stbFilterJob.setOutputKeyClass(NullWritable.class); stbFilterJob.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(stbFilterJob, input); stbFilterJob.setJarByClass(getClass()); FileOutputFormat.setOutputPath(stbFilterJob, output); LazyOutputFormat.setOutputFormatClass(stbFilterJob, TextOutputFormat.class); stbFilterJob.setNumReduceTasks(0);// w w w . j av a 2s . c o m return stbFilterJob.waitForCompletion(true) ? 0 : 1; }
From source file:tv.icntv.log.stb.filter.FilterJob.java
License:Apache License
@Override public boolean run(Map<String, String> maps) throws Exception { Configuration configuration = getConf(); // ????// w ww.j a v a2s . c om configuration.setBoolean("mapreduce.reduce.speculative", false); configuration.setBoolean("mapreduce.map.speculative", false); //setting conf Path input = new Path(maps.get(INPUT)); Path back = new Path(maps.get(BACK)); Path output = new Path(maps.get(OUTPUT_PREFIX)); configuration.set(OUTPUT_SUFFIX, maps.get(OUTPUT_SUFFIX)); configuration.set(OUTPUT_PREFIX, output.toString()); configuration.set(OTHER_PATH, maps.get(OTHER_PATH)); // configuration // Path input=new Path("/icntv/log/stb/2014-05-19/stb-2014-05-18-23.lzo_deflate"); // Path back=new Path("/icntv/parser/stb/filter/status/2014-05-18/"); // Path output=new Path("/icntv/parser/stb/filter/result/2014-05-18/"); Path[] in = HadoopUtils.createFile(input, back, new PathFilter() { @Override public boolean accept(Path path) { return path.getName().endsWith(file_success_suffix); //To change body of implemented methods use File | Settings | File Templates. } }, file_success_suffix, parseing_suffix, parsed_suffix); if (null == in || in.length == 0) { logger.info("input not exist;"); return false; } List<Path> inTemp = Lists.newArrayList(in); String ye = DateUtils.addDay(input.getName(), "yyyy-MM-dd", -1); Path prefix = new Path(input.getParent() + File.separator + ye, "stb-" + ye + "-23.lzo"); logger.info("prefix path ={}", prefix.toString()); if (HadoopUtils.isExist(prefix)) { logger.info("add today path= {}", prefix.toString()); inTemp.add(prefix); } String day = DateUtils.addDay(input.getName(), "yyyy-MM-dd", 1); Path nextPath = new Path(input.getParent() + File.separator + day, "stb-" + day + "-00.lzo"); logger.info("next path ={},writed path={}", nextPath.toString(), new Path(input.getParent() + File.separator + day, "stb-" + day + "-00.lzo.writed")); if (HadoopUtils .isExist(new Path(input.getParent() + File.separator + day, "stb-" + day + "-00.lzo.writed"))) { logger.info("add today path= {}", nextPath.toString()); inTemp.add(nextPath); } logger.info("input size = {}", inTemp.size()); // inTemp.add(new Path(input.getParent()+ File.separator+ DateTime.now().toString("yyyy-MM-dd"),"") Job stbFilterJob = Job.getInstance(configuration, "stb parser first:filter by rule file"); //setting job configuration ..... stbFilterJob.setMapperClass(FilterMapper.class); stbFilterJob.setOutputKeyClass(NullWritable.class); stbFilterJob.setOutputValueClass(Text.class); FileInputFormat.setInputPaths(stbFilterJob, inTemp.toArray(new Path[inTemp.size()])); stbFilterJob.setJarByClass(getClass()); FileOutputFormat.setOutputPath(stbFilterJob, output); LazyOutputFormat.setOutputFormatClass(stbFilterJob, TextOutputFormat.class); stbFilterJob.setNumReduceTasks(0); if (stbFilterJob.waitForCompletion(true)) { ; for (Path path : in) { HadoopUtils.rename(new Path(path + parseing_suffix), new Path(path + parsed_suffix)); } return true; } return false; }