Example usage for org.apache.hadoop.mapreduce.lib.output LazyOutputFormat setOutputFormatClass

List of usage examples for org.apache.hadoop.mapreduce.lib.output LazyOutputFormat setOutputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output LazyOutputFormat setOutputFormatClass.

Prototype

@SuppressWarnings("unchecked")
public static void setOutputFormatClass(Job job, Class<? extends OutputFormat> theClass) 

Source Link

Document

Set the underlying output format for LazyOutputFormat.

Usage

From source file:org.apache.sqoop.mapreduce.mainframe.MainframeImportJob.java

License:Apache License

@Override
protected void configureOutputFormat(Job job, String tableName, String tableClassName)
        throws ClassNotFoundException, IOException {
    super.configureOutputFormat(job, tableName, tableClassName);
    LazyOutputFormat.setOutputFormatClass(job, getOutputFormatClass());
}

From source file:org.jc.mrsqoophelper.main.SqoopHelperMain.java

public static void main(String[] args) {
    if (args == null || args.length < 6) {
        System.out.println("Incorrect usage of tool:");
        System.out.println(/*from   ww w  . j  a  va  2  s  .  c o  m*/
                "hadoop jar program.jar <package_name> <json_schema_file> <jar_classpath> <absolute_src_path> <field constraint output_file cast_to;field constraint output_path> <input_file_1;input_file_2;input_file_3> <output_file1;output_file2;...");
        return;
    }

    String packageName = args[0];
    String jsonPath = args[1];
    String classPath = args[2];
    String absolutePathOfSrc = args[3];
    //Pair field constraint ouput_file, do not join with logical operators.
    //Example: date gt 'yyyy/MM/dd' ouput_file
    //         date lt 'yyyy/MM/dd' output_file
    //This is correct!
    //But:
    //date gt 'yyyy/MM/dd' and date lt 'yyyy/MM/dd'
    //This is wrong!!
    //Conditions will be appended with AND
    //separate conditions with semi-colons.
    //conditions with same output file will be appended with AND operator
    String fieldToFilter = args[4];
    //separate input files by comma
    String inputAvroFiles = args[5];
    String outputAvroFiles = args[6];

    try {
        Path pathOfSchema = Paths.get(jsonPath);
        fieldToFilter = fieldToFilter.replace(CMD_LINE_TRIPLET_ORIGINAL_DELIM,
                TRIPLET_ELEMENTS_DELIMITER_SYMBOL);
        String schemaAsJson = Files.readAllLines(pathOfSchema, Charset.forName("UTF-8")).get(0);
        Class recordClass = Utils.ClassBuilder(schemaAsJson, packageName, classPath, absolutePathOfSrc);
        Configuration conf = new Configuration();

        conf.setStrings(FIELD_COND_OUTPUT_TRIPLET, fieldToFilter.split(";"));
        conf.set(TRIPLET_ELEMENTS_DELIMITER, TRIPLET_ELEMENTS_DELIMITER_SYMBOL);
        conf.set(FQCN_RECORD_CLASS, recordClass.getName());
        conf.set(AVRO_SCHEMA_AS_JSON, schemaAsJson);
        conf.set(SRC_ABSOLUTE_PATH, absolutePathOfSrc);
        conf.set(CLASS_ABSOLUTE_PATH, classPath);
        conf.set(PACKAGE_NAME, packageName);

        Job job = new Job(conf);
        //Set context for mapper and reducer.
        job.setJobName("Filter Records for Sqoop Export");
        job.setInputFormatClass(AvroKeyInputFormat.class);
        job.setMapperClass(Mapper.class);
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(Text.class);
        job.setReducerClass(Reducer.class);
        String[] argPaths = inputAvroFiles.split(";");
        org.apache.hadoop.fs.Path[] paths = new org.apache.hadoop.fs.Path[argPaths.length];
        for (int i = 0; i < argPaths.length; ++i) {
            paths[i] = new org.apache.hadoop.fs.Path(argPaths[i]);
        }

        org.apache.hadoop.mapreduce.lib.input.FileInputFormat.setInputPaths(job, paths);
        LazyOutputFormat.setOutputFormatClass(job, NullOutputFormat.class);

        //creating output files
        Schema avsc = new Schema.Parser().parse(schemaAsJson);
        DataFileWriter<Object> dfw = new DataFileWriter<>(new GenericDatumWriter<>(avsc));
        for (String out : outputAvroFiles.split(";")) {
            dfw.create(avsc, new File(out));
            dfw.close();
        }
        /*YarnLocalCluster yarnLocalCluster = new YarnLocalCluster.Builder()
        .setNumNodeManagers(1)
        .setNumLocalDirs(Integer.parseInt("1"))
        .setNumLogDirs(Integer.parseInt("1"))
        .setResourceManagerAddress("localhost")
        .setResourceManagerHostname("localhost:37001")
        .setResourceManagerSchedulerAddress("localhost:37002")
        .setResourceManagerResourceTrackerAddress("localhost:37003")
        .setResourceManagerWebappAddress("localhost:37004")
        .setUseInJvmContainerExecutor(true)
        .setConfig(conf)
        .build();
                
                
                
        yarnLocalCluster.start();*/

        System.exit(job.waitForCompletion(true) ? 0 : 1);
    } catch (Exception ex) {
        Logger.getLogger(SqoopHelperMain.class.getName()).log(Level.SEVERE, null, ex);
        System.out.println("Could not generate class for avro schema.");
    }

}

From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java

License:Open Source License

protected boolean runDictionaryJobSampling() throws IOException, ClassNotFoundException, InterruptedException {
    boolean jobOK;
    Job job = null;/*from w  ww .ja  v  a  2  s . c o m*/

    // if input path does not exists, fail
    if (!this.inputFS.exists(this.conf.getInputPath())) {
        System.out.println("Dictionary input path does not exist: " + this.conf.getInputPath());
        System.exit(-1);
    }

    // if samples path exists...
    if (this.dictionaryFS.exists(this.conf.getDictionarySamplesPath())) {
        if (this.conf.getDeleteDictionarySamplesPath()) { // ... and option provided, delete recursively
            this.dictionaryFS.delete(this.conf.getDictionarySamplesPath(), true);
        } else { // ... and option not provided, fail
            System.out.println("Dictionary samples path does exist: " + this.conf.getDictionarySamplesPath());
            System.out.println("Select other path or use option -ds to overwrite");
            System.exit(-1);
        }
    }

    // Job to create a SequenceInputFormat with Roles
    job = new Job(this.conf.getConfigurationObject(), this.conf.getDictionaryJobName() + " phase 1");
    job.setJarByClass(HDTBuilderDriver.class);

    System.out.println("input = " + this.conf.getInputPath());
    System.out.println("samples = " + this.conf.getDictionarySamplesPath());

    FileInputFormat.addInputPath(job, this.conf.getInputPath());
    FileOutputFormat.setOutputPath(job, this.conf.getDictionarySamplesPath());

    job.setInputFormatClass(LzoTextInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

    job.setMapperClass(DictionarySamplerMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);
    job.setCombinerClass(DictionarySamplerReducer.class);
    job.setReducerClass(DictionarySamplerReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);

    job.setNumReduceTasks(this.conf.getDictionarySampleReducers());

    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

    jobOK = job.waitForCompletion(true);

    return jobOK;
}

From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java

License:Open Source License

protected boolean runDictionaryJob()
        throws ClassNotFoundException, IOException, InterruptedException, URISyntaxException {
    boolean jobOK;
    Job job = null;// www.  j a  va 2 s  .c  o m
    BufferedWriter bufferedWriter;

    // if output path exists...
    if (this.dictionaryFS.exists(this.conf.getDictionaryOutputPath())) {
        if (this.conf.getDeleteDictionaryOutputPath()) { // ... and option provided, delete recursively
            this.dictionaryFS.delete(this.conf.getDictionaryOutputPath(), true);
        } else { // ... and option not provided, fail
            System.out.println("Dictionary output path does exist: " + this.conf.getDictionaryOutputPath());
            System.out.println("Select other path or use option -dd to overwrite");
            System.exit(-1);
        }
    }

    // Sample the SequenceInputFormat to do TotalSort and create final output
    job = new Job(this.conf.getConfigurationObject(), this.conf.getDictionaryJobName() + " phase 2");

    job.setJarByClass(HDTBuilderDriver.class);

    System.out.println("samples = " + this.conf.getDictionarySamplesPath());
    System.out.println("output = " + this.conf.getDictionaryOutputPath());

    FileInputFormat.addInputPath(job, this.conf.getDictionarySamplesPath());
    FileOutputFormat.setOutputPath(job, this.conf.getDictionaryOutputPath());

    job.setInputFormatClass(SequenceFileInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

    // Identity Mapper
    // job.setMapperClass(Mapper.class);
    job.setCombinerClass(DictionaryCombiner.class);
    job.setPartitionerClass(TotalOrderPartitioner.class);
    job.setReducerClass(DictionaryReducer.class);

    job.setNumReduceTasks(this.conf.getDictionaryReducers());

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    System.out.println("Sampling started");
    InputSampler.writePartitionFile(job,
            new InputSampler.IntervalSampler<Text, Text>(this.conf.getSampleProbability()));
    String partitionFile = TotalOrderPartitioner.getPartitionFile(job.getConfiguration());
    URI partitionUri = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH);
    DistributedCache.addCacheFile(partitionUri, job.getConfiguration());
    DistributedCache.createSymlink(job.getConfiguration());
    System.out.println("Sampling finished");

    MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SHARED, SequenceFileOutputFormat.class,
            Text.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SUBJECTS, SequenceFileOutputFormat.class,
            Text.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.PREDICATES, SequenceFileOutputFormat.class,
            Text.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.OBJECTS, SequenceFileOutputFormat.class,
            Text.class, NullWritable.class);

    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

    jobOK = job.waitForCompletion(true);

    this.numShared = job.getCounters().findCounter(Counters.Shared).getValue();
    this.numSubjects = job.getCounters().findCounter(Counters.Subjects).getValue();
    this.numPredicates = job.getCounters().findCounter(Counters.Predicates).getValue();
    this.numObjects = job.getCounters().findCounter(Counters.Objects).getValue();

    bufferedWriter = new BufferedWriter(
            new OutputStreamWriter(this.dictionaryFS.create(this.conf.getDictionaryCountersFile())));

    bufferedWriter.write(HDTBuilderConfiguration.SHARED + "=" + this.numShared + "\n");
    bufferedWriter.write(HDTBuilderConfiguration.SUBJECTS + "=" + this.numSubjects + "\n");
    bufferedWriter.write(HDTBuilderConfiguration.PREDICATES + "=" + this.numPredicates + "\n");
    bufferedWriter.write(HDTBuilderConfiguration.OBJECTS + "=" + this.numObjects + "\n");

    bufferedWriter.close();

    return jobOK;
}

From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java

License:Open Source License

protected boolean runDictionaryJobWithOneJob()
        throws ClassNotFoundException, IOException, InterruptedException, URISyntaxException {
    boolean jobOK;
    Job job = null;/*w  w w.  j a v  a2s  .c  om*/
    BufferedWriter bufferedWriter;

    // if input path does not exists, fail
    if (!this.inputFS.exists(this.conf.getInputPath())) {
        System.out.println("Dictionary input path does not exist: " + this.conf.getInputPath());
        System.exit(-1);
    }

    // if output path exists...
    if (this.dictionaryFS.exists(this.conf.getDictionaryOutputPath())) {
        if (this.conf.getDeleteDictionaryOutputPath()) { // ... and option provided, delete recursively
            this.dictionaryFS.delete(this.conf.getDictionaryOutputPath(), true);
        } else { // ... and option not provided, fail
            System.out.println("Dictionary output path does exist: " + this.conf.getDictionaryOutputPath());
            System.out.println("Select other path or use option -dd to overwrite");
            System.exit(-1);
        }
    }

    // Launch job
    job = new Job(this.conf.getConfigurationObject(), this.conf.getTriplesJobName());
    job.setJarByClass(HDTBuilderDriver.class);

    FileInputFormat.addInputPath(job, this.conf.getInputPath());
    FileOutputFormat.setOutputPath(job, this.conf.getDictionaryOutputPath());

    job.setInputFormatClass(LzoTextInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

    job.setMapperClass(DictionaryMapper.class);
    job.setCombinerClass(DictionaryCombiner.class);
    job.setReducerClass(DictionaryReducer.class);

    job.setNumReduceTasks(this.conf.getDictionaryReducers());

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(Text.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(NullWritable.class);

    MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SHARED, SequenceFileOutputFormat.class,
            Text.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.SUBJECTS, SequenceFileOutputFormat.class,
            Text.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.PREDICATES, SequenceFileOutputFormat.class,
            Text.class, NullWritable.class);
    MultipleOutputs.addNamedOutput(job, HDTBuilderConfiguration.OBJECTS, SequenceFileOutputFormat.class,
            Text.class, NullWritable.class);

    jobOK = job.waitForCompletion(true);

    this.numShared = job.getCounters().findCounter(Counters.Shared).getValue();
    this.numSubjects = job.getCounters().findCounter(Counters.Subjects).getValue();
    this.numPredicates = job.getCounters().findCounter(Counters.Predicates).getValue();
    this.numObjects = job.getCounters().findCounter(Counters.Objects).getValue();

    bufferedWriter = new BufferedWriter(
            new OutputStreamWriter(this.dictionaryFS.create(this.conf.getDictionaryCountersFile())));

    bufferedWriter.write(HDTBuilderConfiguration.SHARED + "=" + this.numShared + "\n");
    bufferedWriter.write(HDTBuilderConfiguration.SUBJECTS + "=" + this.numSubjects + "\n");
    bufferedWriter.write(HDTBuilderConfiguration.PREDICATES + "=" + this.numPredicates + "\n");
    bufferedWriter.write(HDTBuilderConfiguration.OBJECTS + "=" + this.numObjects + "\n");

    bufferedWriter.close();

    return jobOK;
}

From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java

License:Open Source License

protected boolean runTriplesJobSampling() throws ClassNotFoundException, IOException, InterruptedException {
    Job job = null;/*  w ww  .j ava 2  s.  c  o m*/
    boolean jobOK;
    BufferedWriter bufferedWriter;

    // if input path does not exists, fail
    if (!this.inputFS.exists(this.conf.getInputPath())) {
        System.out.println("Dictionary input path does not exist: " + this.conf.getInputPath());
        System.exit(-1);
    }

    // if dictionary output path does not exists, fail
    if (!this.dictionaryFS.exists(this.conf.getInputPath())) {
        System.out.println("Dictionary output path does not exist: " + this.conf.getInputPath());
        System.exit(-1);
    }

    // if samples path exists, fail
    if (this.dictionaryFS.exists(this.conf.getTriplesSamplesPath())) {
        if (this.conf.getDeleteTriplesSamplesPath()) { // ... and option
            // provided, delete
            // recursively
            this.dictionaryFS.delete(this.conf.getTriplesSamplesPath(), true);
        } else { // ... and option not provided, fail
            System.out.println("Triples samples path does exist: " + this.conf.getTriplesSamplesPath());
            System.out.println("Select other path or use option -dst to overwrite");
            System.exit(-1);
        }
    }

    this.conf.setProperty("mapred.child.java.opts",
            "-XX:ErrorFile=/home/hadoop/tmp/hs_err_pid%p.log -Xmx2500m");

    // Job to create a SequenceInputFormat
    job = new Job(this.conf.getConfigurationObject(), this.conf.getTriplesJobName() + " phase 1");

    job.setJarByClass(HDTBuilderDriver.class);

    FileInputFormat.addInputPath(job, this.conf.getInputPath());
    FileOutputFormat.setOutputPath(job, this.conf.getTriplesSamplesPath());

    job.setInputFormatClass(LzoTextInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

    job.setMapperClass(TriplesSPOMapper.class);
    job.setSortComparatorClass(TripleSPOComparator.class);
    job.setGroupingComparatorClass(TripleSPOComparator.class);
    job.setMapOutputKeyClass(TripleSPOWritable.class);
    job.setMapOutputValueClass(NullWritable.class);
    job.setOutputKeyClass(TripleSPOWritable.class);
    job.setOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(this.conf.getTriplesReducers());

    DistributedCache.addCacheFile(this.conf.getDictionaryFile().toUri(), job.getConfiguration());

    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

    jobOK = job.waitForCompletion(true);

    this.numTriples = job.getCounters().findCounter(Counters.Triples).getValue();
    bufferedWriter = new BufferedWriter(
            new OutputStreamWriter(this.triplesFS.create(this.conf.getTriplesCountersFile())));
    bufferedWriter.write(this.numTriples.toString() + "\n");
    bufferedWriter.close();

    return jobOK;
}

From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java

License:Open Source License

protected boolean runTriplesJob()
        throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
    Job job = null;/*from www  .j  a v a 2s  . co m*/
    boolean jobOK;

    // if triples output path exists...
    if (this.triplesFS.exists(this.conf.getTriplesOutputPath())) {
        if (this.conf.getDeleteTriplesOutputPath()) { // ... and option provided, delete recursively
            this.triplesFS.delete(this.conf.getTriplesOutputPath(), true);
        } else { // ... and option not provided, fail
            System.out.println("Triples output path does exist: " + this.conf.getTriplesOutputPath());
            System.out.println("Select other path or use option -dt to overwrite");
            System.exit(-1);
        }
    }

    job = new Job(this.conf.getConfigurationObject(), this.conf.getTriplesJobName() + " phase 2");

    job.setJarByClass(HDTBuilderDriver.class);

    FileInputFormat.addInputPath(job, this.conf.getTriplesSamplesPath());
    FileOutputFormat.setOutputPath(job, this.conf.getTriplesOutputPath());

    job.setInputFormatClass(SequenceFileInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

    job.setSortComparatorClass(TripleSPOComparator.class);
    job.setGroupingComparatorClass(TripleSPOComparator.class);

    job.setPartitionerClass(TotalOrderPartitioner.class);

    job.setOutputKeyClass(TripleSPOWritable.class);
    job.setOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(this.conf.getTriplesReducers());

    System.out.println("Sampling started");
    InputSampler.writePartitionFile(job,
            new InputSampler.IntervalSampler<Text, Text>(this.conf.getSampleProbability()));
    String partitionFile = TotalOrderPartitioner.getPartitionFile(job.getConfiguration());
    URI partitionUri = new URI(partitionFile + "#" + TotalOrderPartitioner.DEFAULT_PATH);
    DistributedCache.addCacheFile(partitionUri, job.getConfiguration());
    DistributedCache.createSymlink(job.getConfiguration());
    System.out.println("Sampling finished");

    SequenceFileOutputFormat.setCompressOutput(job, true);
    SequenceFileOutputFormat.setOutputCompressorClass(job, com.hadoop.compression.lzo.LzoCodec.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

    jobOK = job.waitForCompletion(true);

    return jobOK;
}

From source file:org.rdfhdt.mrbuilder.HDTBuilderDriver.java

License:Open Source License

protected boolean runTriplesJobWithOneJob()
        throws IOException, ClassNotFoundException, InterruptedException, URISyntaxException {
    Job job = null;//www  . j av a 2  s .  com
    boolean jobOK;
    BufferedWriter bufferedWriter;

    // if input path does not exists, fail
    if (!this.inputFS.exists(this.conf.getInputPath())) {
        System.out.println("Dictionary input path does not exist: " + this.conf.getInputPath());
        System.exit(-1);
    }

    // if dictionary output path does not exists, fail
    if (!this.dictionaryFS.exists(this.conf.getInputPath())) {
        System.out.println("Dictionary output path does not exist: " + this.conf.getInputPath());
        System.exit(-1);
    }

    // if triples output path exists...
    if (this.triplesFS.exists(this.conf.getTriplesOutputPath())) {
        if (this.conf.getDeleteTriplesOutputPath()) { // ... and option provided, delete recursively
            this.triplesFS.delete(this.conf.getTriplesOutputPath(), true);
        } else { // ... and option not provided, fail
            System.out.println("Triples output path does exist: " + this.conf.getTriplesOutputPath());
            System.out.println("Select other path or use option -dt to overwrite");
            System.exit(-1);
        }
    }

    // Launch job
    this.conf.setProperty("mapred.child.java.opts",
            "-XX:ErrorFile=/home/hadoop/tmp/hs_err_pid%p.log -Xmx2500m");

    job = new Job(this.conf.getConfigurationObject(), this.conf.getDictionaryJobName());
    job.setJarByClass(HDTBuilderDriver.class);

    FileInputFormat.addInputPath(job, this.conf.getInputPath());
    FileOutputFormat.setOutputPath(job, this.conf.getTriplesOutputPath());

    job.setInputFormatClass(LzoTextInputFormat.class);
    LazyOutputFormat.setOutputFormatClass(job, SequenceFileOutputFormat.class);

    job.setMapperClass(TriplesSPOMapper.class);
    job.setSortComparatorClass(TripleSPOComparator.class);
    job.setMapOutputKeyClass(TripleSPOWritable.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(this.conf.getTriplesReducers());

    job.setOutputKeyClass(TripleSPOWritable.class);
    job.setOutputValueClass(NullWritable.class);

    DistributedCache.addCacheFile(this.conf.getDictionaryFile().toUri(), job.getConfiguration());
    // DistributedCache.addCacheFile(this.conf.getDictionaryMapFile().toUri(), job.getConfiguration());
    // DistributedCache.addCacheFile(this.conf.getDictionaryReduceFile().toUri(), job.getConfiguration());

    jobOK = job.waitForCompletion(true);

    this.numTriples = job.getCounters().findCounter(Counters.Triples).getValue();
    bufferedWriter = new BufferedWriter(
            new OutputStreamWriter(this.triplesFS.create(this.conf.getTriplesCountersFile())));
    bufferedWriter.write(this.numTriples.toString() + "\n");
    bufferedWriter.close();

    return jobOK;
}

From source file:tv.icntv.log.stb.filter.FilterJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration configuration = getConf();
    Path input = new Path(args[0]);

    Path output = new Path(args[1]);
    Job stbFilterJob = Job.getInstance(configuration, "stb parser first:filter by rule file");
    //setting job configuration .....
    stbFilterJob.setMapperClass(FilterMapper.class);
    stbFilterJob.setOutputKeyClass(NullWritable.class);
    stbFilterJob.setOutputValueClass(Text.class);
    FileInputFormat.setInputPaths(stbFilterJob, input);
    stbFilterJob.setJarByClass(getClass());

    FileOutputFormat.setOutputPath(stbFilterJob, output);
    LazyOutputFormat.setOutputFormatClass(stbFilterJob, TextOutputFormat.class);

    stbFilterJob.setNumReduceTasks(0);// w w w  .  j  av a  2s  .  c o m
    return stbFilterJob.waitForCompletion(true) ? 0 : 1;
}

From source file:tv.icntv.log.stb.filter.FilterJob.java

License:Apache License

@Override
public boolean run(Map<String, String> maps) throws Exception {
    Configuration configuration = getConf();
    // ????// w  ww.j  a  v a2s  . c om
    configuration.setBoolean("mapreduce.reduce.speculative", false);
    configuration.setBoolean("mapreduce.map.speculative", false);
    //setting conf
    Path input = new Path(maps.get(INPUT));
    Path back = new Path(maps.get(BACK));
    Path output = new Path(maps.get(OUTPUT_PREFIX));
    configuration.set(OUTPUT_SUFFIX, maps.get(OUTPUT_SUFFIX));
    configuration.set(OUTPUT_PREFIX, output.toString());
    configuration.set(OTHER_PATH, maps.get(OTHER_PATH));

    //        configuration
    //        Path input=new Path("/icntv/log/stb/2014-05-19/stb-2014-05-18-23.lzo_deflate");
    //        Path back=new Path("/icntv/parser/stb/filter/status/2014-05-18/");
    //        Path output=new Path("/icntv/parser/stb/filter/result/2014-05-18/");
    Path[] in = HadoopUtils.createFile(input, back, new PathFilter() {
        @Override
        public boolean accept(Path path) {
            return path.getName().endsWith(file_success_suffix); //To change body of implemented methods use File | Settings | File Templates.
        }
    }, file_success_suffix, parseing_suffix, parsed_suffix);
    if (null == in || in.length == 0) {
        logger.info("input not exist;");
        return false;
    }
    List<Path> inTemp = Lists.newArrayList(in);
    String ye = DateUtils.addDay(input.getName(), "yyyy-MM-dd", -1);
    Path prefix = new Path(input.getParent() + File.separator + ye, "stb-" + ye + "-23.lzo");
    logger.info("prefix path ={}", prefix.toString());
    if (HadoopUtils.isExist(prefix)) {
        logger.info("add today path= {}", prefix.toString());
        inTemp.add(prefix);
    }
    String day = DateUtils.addDay(input.getName(), "yyyy-MM-dd", 1);
    Path nextPath = new Path(input.getParent() + File.separator + day, "stb-" + day + "-00.lzo");
    logger.info("next path ={},writed path={}", nextPath.toString(),
            new Path(input.getParent() + File.separator + day, "stb-" + day + "-00.lzo.writed"));
    if (HadoopUtils
            .isExist(new Path(input.getParent() + File.separator + day, "stb-" + day + "-00.lzo.writed"))) {
        logger.info("add today path= {}", nextPath.toString());
        inTemp.add(nextPath);
    }

    logger.info("input size = {}", inTemp.size());
    //        inTemp.add(new Path(input.getParent()+ File.separator+ DateTime.now().toString("yyyy-MM-dd"),"")
    Job stbFilterJob = Job.getInstance(configuration, "stb parser first:filter by rule file");
    //setting job configuration .....
    stbFilterJob.setMapperClass(FilterMapper.class);
    stbFilterJob.setOutputKeyClass(NullWritable.class);
    stbFilterJob.setOutputValueClass(Text.class);
    FileInputFormat.setInputPaths(stbFilterJob, inTemp.toArray(new Path[inTemp.size()]));
    stbFilterJob.setJarByClass(getClass());

    FileOutputFormat.setOutputPath(stbFilterJob, output);
    LazyOutputFormat.setOutputFormatClass(stbFilterJob, TextOutputFormat.class);

    stbFilterJob.setNumReduceTasks(0);

    if (stbFilterJob.waitForCompletion(true)) {
        ;
        for (Path path : in) {
            HadoopUtils.rename(new Path(path + parseing_suffix), new Path(path + parsed_suffix));
        }
        return true;
    }
    return false;

}