Example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass

List of usage examples for org.apache.hadoop.mapreduce Job setOutputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass.

Prototype

public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException 

Source Link

Document

Set the OutputFormat for the job.

Usage

From source file:com.lightboxtechnologies.spectrum.ExtentsExtractor.java

License:Apache License

public static int run(String imageID, String friendlyName, String outDir) throws Exception {

    Job job = SKJobFactory.createJob(imageID, friendlyName, "ExtentsExtractor");

    job.setJarByClass(ExtentsExtractor.class);
    job.setMapperClass(ExtentsExtractorMapper.class);

    job.setNumReduceTasks(1);//from  w  ww . j  av  a2s .c  o m
    job.setReducerClass(Reducer.class);
    job.setOutputKeyClass(LongWritable.class);
    job.setOutputValueClass(JsonWritable.class);
    job.setInputFormatClass(FsEntryHBaseInputFormat.class);
    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputPath(job, new Path(outDir));

    FsEntryHBaseInputFormat.setupJob(job, imageID);

    System.out.println("Spinning off ExtentsExtractor Job...");
    job.waitForCompletion(true);
    return 0;
}

From source file:com.lightboxtechnologies.spectrum.ExtractData.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 4) {
        System.err.println("Usage: ExtractData <imageID> <friendly_name> <extents_file> <evidence file>");
        return 2;
    }//w w  w.  j  ava 2  s.c o  m

    final String imageID = args[0];
    final String friendlyName = args[1];
    final String extentsPath = args[2];
    final String image = args[3];

    Configuration conf = getConf();

    final Job job = SKJobFactory.createJobFromConf(imageID, friendlyName, "ExtractData", conf);
    job.setJarByClass(ExtractData.class);
    job.setMapperClass(ExtractDataMapper.class);
    job.setReducerClass(KeyValueSortReducer.class);
    job.setNumReduceTasks(1);

    // job ctor copies the Configuration we pass it, get the real one
    conf = job.getConfiguration();

    conf.setLong("timestamp", System.currentTimeMillis());

    job.setInputFormatClass(RawFileInputFormat.class);
    RawFileInputFormat.addInputPath(job, new Path(image));

    job.setOutputFormatClass(HFileOutputFormat.class);
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(KeyValue.class);

    conf.setInt("mapreduce.job.jvm.numtasks", -1);

    final FileSystem fs = FileSystem.get(conf);
    Path hfileDir = new Path("/texaspete/ev/tmp", UUID.randomUUID().toString());
    hfileDir = hfileDir.makeQualified(fs);
    LOG.info("Hashes will be written temporarily to " + hfileDir);

    HFileOutputFormat.setOutputPath(job, hfileDir);

    final Path extp = new Path(extentsPath);
    final URI extents = extp.toUri();
    LOG.info("extents file is " + extents);

    DistributedCache.addCacheFile(extents, conf);
    conf.set("com.lbt.extentsname", extp.getName());
    // job.getConfiguration().setBoolean("mapred.task.profile", true);
    // job.getConfiguration().setBoolean("mapreduce.task.profile", true);

    HBaseTables.summon(conf, HBaseTables.HASH_TBL_B, HBaseTables.HASH_COLFAM_B);

    HBaseTables.summon(conf, HBaseTables.ENTRIES_TBL_B, HBaseTables.ENTRIES_COLFAM_B);

    final boolean result = job.waitForCompletion(true);
    if (result) {
        LoadIncrementalHFiles loader = new LoadIncrementalHFiles(conf);
        HBaseConfiguration.addHbaseResources(conf);
        loader.setConf(conf);
        LOG.info("Loading hashes into hbase");
        chmodR(fs, hfileDir);
        loader.doBulkLoad(hfileDir, new HTable(conf, HBaseTables.HASH_TBL_B));
        //      result = fs.delete(hfileDir, true);
    }
    return result ? 0 : 1;
}

From source file:com.lightboxtechnologies.spectrum.FolderCount.java

License:Apache License

public static void main(String[] args) throws Exception {
    final Configuration conf = new Configuration();
    final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    if (otherArgs.length != 2) {
        System.err.println("Usage: FolderCount <table> <outpath>");
        System.exit(2);//from   ww  w . j  a  v a2s  .c  o  m
    }

    final Job job = new Job(conf, "FolderCount");
    job.setJarByClass(FolderCount.class);
    job.setMapperClass(FolderCountMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setNumReduceTasks(1);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    job.setInputFormatClass(FsEntryHBaseInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    final Scan scan = new Scan();
    scan.addFamily(HBaseTables.ENTRIES_COLFAM_B);
    job.getConfiguration().set(TableInputFormat.INPUT_TABLE, otherArgs[0]);
    job.getConfiguration().set(TableInputFormat.SCAN, convertScanToString(scan));

    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.lightboxtechnologies.spectrum.JsonImport.java

License:Apache License

public static int run(String jsonPath, String imageHash, String friendlyName, Configuration conf)
        throws Exception {
    if (conf == null) {
        conf = HBaseConfiguration.create();
    }//w  w  w.  ja  va2 s. c  o  m
    conf.set(HBaseTables.ENTRIES_TBL_VAR, HBaseTables.ENTRIES_TBL);

    final Job job = SKJobFactory.createJobFromConf(imageHash, friendlyName, "JsonImport", conf);
    job.setJarByClass(JsonImport.class);
    job.setMapperClass(FsEntryMapLoader.class);
    job.setNumReduceTasks(0);
    job.setOutputFormatClass(FsEntryHBaseOutputFormat.class);
    FileInputFormat.addInputPath(job, new Path(jsonPath));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.lightboxtechnologies.spectrum.MRCoffeeJob.java

License:Apache License

public static int run(String imageID, String outpath, String[] command, Configuration conf)
        throws ClassNotFoundException, DecoderException, IOException, InterruptedException {
    conf.setStrings("command", command);
    conf.setLong("timestamp", System.currentTimeMillis());

    final Job job = new Job(conf, "MRCoffeeJob");
    job.setJarByClass(MRCoffeeJob.class);

    job.setMapperClass(MRCoffeeMapper.class);

    //    job.setReducerClass(KeyValueSortReducer.class);
    //    job.setNumReduceTasks(1);
    job.setNumReduceTasks(0);/* w  ww. jav  a  2 s  .c  om*/

    FsEntryHBaseInputFormat.setupJob(job, imageID);
    job.setInputFormatClass(FsEntryHBaseInputFormat.class);

    job.setOutputKeyClass(ImmutableHexWritable.class);
    //    job.setOutputValueClass(KeyValue.class);
    job.setOutputValueClass(JsonWritable.class);
    //    job.setOutputFormatClass(HFileOutputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    //    HFileOutputFormat.setOutputPath(job, new Path(outpath));
    TextOutputFormat.setOutputPath(job, new Path(outpath));

    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.lightboxtechnologies.spectrum.PythonJob.java

License:Apache License

public static int run(String imageID, String friendlyName, String outpath, String pymap, String pyred,
        String format, Configuration conf) throws Exception {
    if (conf == null) {
        conf = HBaseConfiguration.create();
    }//from  w  ww .ja  v a  2s .c o m
    final Job job = SKJobFactory.createJobFromConf(imageID, friendlyName, "PythonJob", conf);
    job.setJarByClass(PythonJob.class);

    job.setMapperClass(PythonMapper.class);
    PyEngine py = new PyEngine();
    configPyTask(job, py, "map", pymap);
    job.setMapOutputKeyClass(py.getKeyClass());
    job.setMapOutputValueClass(py.getValueClass());

    int numReduces = 1;
    job.setOutputKeyClass(py.getKeyClass());
    job.setOutputValueClass(py.getValueClass());
    if (pyred.equals("none")) {
        numReduces = 0;
    } else if (pyred.equals("identity")) {
        job.setReducerClass(Reducer.class);
        job.setOutputKeyClass(py.getKeyClass());
        job.setOutputValueClass(py.getValueClass());
    } else if (pyred.equals("LongSumReducer")) {
        job.setReducerClass(LongSumReducer.class);
        job.setCombinerClass(LongSumReducer.class);
    } else {
        job.setReducerClass(PythonReducer.class);
        configPyTask(job, py, "reduce", pyred);
        job.setOutputKeyClass(py.getKeyClass());
        job.setOutputValueClass(py.getValueClass());
    }
    job.setNumReduceTasks(numReduces);

    // it is possible to run over a flat json file...
    // String input = otherArgs[0];
    // if (input.endsWith(".json") == true) {
    //   job.setInputFormatClass(FsEntryJsonInputFormat.class);
    //   FsEntryJsonInputFormat.addInputPath(job, new Path(input));
    // }
    // else {

    FsEntryHBaseInputFormat.setupJob(job, imageID);
    job.setInputFormatClass(FsEntryHBaseInputFormat.class);

    if (format != null && format.equals("SequenceFileOutputFormat")) {
        job.setOutputFormatClass(SequenceFileOutputFormat.class);
        SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);
    } else {
        job.setOutputFormatClass(TextOutputFormat.class);
    }
    FileOutputFormat.setOutputPath(job, new Path(outpath));
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.lightboxtechnologies.spectrum.SequenceFileExport.java

License:Apache License

public static void main(String[] args) throws Exception {
    final Configuration conf = new Configuration();

    final String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();

    String imageID;/*from   w  ww  .ja v a 2 s . c om*/
    String outpath;
    String friendlyname;
    final Set<String> exts = new HashSet<String>();

    if ("-f".equals(otherArgs[0])) {
        if (otherArgs.length != 4) {
            die();
        }

        // load extensions from file
        final Path extpath = new Path(otherArgs[1]);

        InputStream in = null;
        try {
            in = extpath.getFileSystem(conf).open(extpath);

            Reader r = null;
            try {
                r = new InputStreamReader(in);

                BufferedReader br = null;
                try {
                    br = new BufferedReader(r);

                    String line;
                    while ((line = br.readLine()) != null) {
                        exts.add(line.trim().toLowerCase());
                    }

                    br.close();
                } finally {
                    IOUtils.closeQuietly(br);
                }

                r.close();
            } finally {
                IOUtils.closeQuietly(r);
            }

            in.close();
        } finally {
            IOUtils.closeQuietly(in);
        }

        imageID = otherArgs[2];
        friendlyname = otherArgs[3];
        outpath = otherArgs[4];
    } else {
        if (otherArgs.length < 3) {
            die();
        }

        // read extensions from trailing args
        imageID = otherArgs[0];
        friendlyname = otherArgs[1];
        outpath = otherArgs[2];

        // lowercase all file extensions
        for (int i = 2; i < otherArgs.length; ++i) {
            exts.add(otherArgs[i].toLowerCase());
        }
    }

    conf.setStrings("extensions", exts.toArray(new String[exts.size()]));

    final Job job = SKJobFactory.createJobFromConf(imageID, friendlyname, "SequenceFileExport", conf);
    job.setJarByClass(SequenceFileExport.class);
    job.setMapperClass(SequenceFileExportMapper.class);
    job.setNumReduceTasks(0);

    job.setOutputKeyClass(BytesWritable.class);
    job.setOutputValueClass(MapWritable.class);

    job.setInputFormatClass(FsEntryHBaseInputFormat.class);
    FsEntryHBaseInputFormat.setupJob(job, imageID);

    job.setOutputFormatClass(SequenceFileOutputFormat.class);
    SequenceFileOutputFormat.setOutputCompressionType(job, SequenceFile.CompressionType.BLOCK);

    FileOutputFormat.setOutputPath(job, new Path(outpath));
    System.exit(job.waitForCompletion(true) ? 0 : 1);
}

From source file:com.linkedin.cubert.io.avro.AvroStorage.java

License:Open Source License

@Override
public void prepareOutput(Job job, Configuration conf, JsonNode params, BlockSchema schema, Path path) {
    Schema avroSchema = null;//from   w w  w  .j  a  v  a  2s  . co m

    // we can specify the avro schema of the output directly, instead of deriving
    // it from the BlockSchema
    // if (json.has("avroSchema"))
    // {
    // avroSchema =
    // new org.apache.avro.Schema.Parser().parse(JsonUtils.getText(json,
    // "avroSchema"));
    // }
    // else
    // {
    // schema = new BlockSchema(json.get("schema"));
    avroSchema = AvroUtils.convertFromBlockSchema("record", schema);

    conf.set("cubert.avro.output.schema", avroSchema.toString());

    job.setOutputFormatClass(PigAvroOutputFormatAdaptor.class);
    // AvroJob.setOutputKeySchema(job, avroSchema);
    // AvroJob.setOutputValueSchema(job, Schema.create(Type.NULL));
    // job.setOutputFormatClass(AvroKeyOutputFormat.class);
}

From source file:com.linkedin.cubert.io.rubix.RubixStorage.java

License:Open Source License

@Override
public void prepareOutput(Job job, Configuration conf, JsonNode params, BlockSchema schema, Path path) {
    Class<?> tupleClass = TupleFactory.getInstance().newTuple().getClass();
    job.setOutputKeyClass(tupleClass);/*from  w  w w .  jav  a  2 s  . c om*/
    job.setOutputValueClass(tupleClass);

    job.setOutputFormatClass(RubixOutputFormat.class);

    if (params.has("compact"))
        conf.setBoolean(CubertStrings.USE_COMPACT_SERIALIZATION,
                Boolean.parseBoolean(JsonUtils.getText(params, "compact")));
}

From source file:com.linkedin.cubert.io.text.TextStorage.java

License:Open Source License

@Override
public void prepareOutput(Job job, Configuration conf, JsonNode params, BlockSchema schema, Path path) {
    if (params.has("separator")) {
        conf.set(CubertStrings.TEXT_OUTPUT_SEPARATOR, JsonUtils.getText(params, "separator"));
    }//from   w  w w  .  ja  v  a 2  s.  c  o m

    job.setOutputFormatClass(PigTextOutputFormatWrapper.class);
}