Example usage for org.apache.hadoop.mapred JobConf setOutputFormat

List of usage examples for org.apache.hadoop.mapred JobConf setOutputFormat

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputFormat.

Prototype

public void setOutputFormat(Class<? extends OutputFormat> theClass) 

Source Link

Document

Set the OutputFormat implementation for the map-reduce job.

Usage

From source file:org.acacia.partitioner.java.EdgelistPartitioner.java

License:Apache License

@SuppressWarnings("unused")
public static void main(String[] args) throws IOException {
    JobConf conf = new JobConf(EdgelistPartitioner.class);

    if (conf == null) {
        return;//ww  w . j av a  2  s .c o m
    }
    String dir1 = "/user/miyuru/merged";
    String dir2 = "/user/miyuru/merged-out";

    //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1 = FileSystem.get(new JobConf());
    //only delete dir2 because dir1 is uploaded externally.
    if (fs1.exists(new Path(dir2))) {
        fs1.delete(new Path(dir2), true);
    }

    conf.setInputFormat(WholeFileInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    WholeFileInputFormat.setInputPaths(conf, new Path(dir1));
    SequenceFileOutputFormat.setOutputPath(conf, new Path(dir2));

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(SequenceFileMapper.class);
    conf.setReducerClass(MultipleOutputsInvertedReducer.class);
    conf.setOutputFormat(NullOutputFormat.class);

    conf.setJobName("EdgelistPartitioner");

    MultipleOutputs.addMultiNamedOutput(conf, "partition", TextOutputFormat.class, NullWritable.class,
            Text.class);

    JobClient.runJob(conf);
}

From source file:org.ahanna.DoubleConversionMapper.java

License:Apache License

public static void main(String[] args) {
    JobConf conf = new JobConf(DoubleConversion.class);
    conf.setJobName("DoubleConversation");

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(DoubleConversionMapper.class);
    conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class);

    // KeyValueTextInputFormat treats each line as an input record, 
    // and splits the line by the tab character to separate it into key and value 
    conf.setInputFormat(KeyValueTextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    try {/*from  w w  w  .jav  a2  s .  c  om*/
        JobClient.runJob(conf);
    } catch (IOException e) {
        // do nothing
    }
}

From source file:org.apache.ambari.servicemonitor.jobs.FileUsingJobRunner.java

License:Apache License

public int run(String[] args) throws Exception {
    // Configuration processed by ToolRunner
    Configuration conf = getConf();

    CommandLine commandLine = getCommandLine();
    // Create a JobConf using the processed conf
    JobConf jobConf = new JobConf(conf, FileUsingJobRunner.class);

    //tune the config
    if (jobConf.get(JobKeys.RANGEINPUTFORMAT_ROWS) == null) {
        jobConf.setInt(JobKeys.RANGEINPUTFORMAT_ROWS, 1);
    }//from ww  w  .  j av  a2  s . co m

    // Process custom command-line options
    String name = OptionHelper.getStringOption(commandLine, "n", "File Using Job");
    if (commandLine.hasOption('x')) {
        //delete the output directory
        String destDir = jobConf.get(JobKeys.MAPRED_OUTPUT_DIR);
        FileSystem fs = FileSystem.get(jobConf);
        fs.delete(new Path(destDir), true);
    }

    // Specify various job-specific parameters     
    jobConf.setMapperClass(FileUsingMapper.class);
    jobConf.setReducerClass(FileUsingReducer.class);
    jobConf.setMapOutputKeyClass(IntWritable.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputFormat(TextOutputFormat.class);
    jobConf.setInputFormat(RangeInputFormat.class);
    //jobConf.setPartitionerClass(SleepJob.class);
    jobConf.setSpeculativeExecution(false);
    jobConf.setJobName(name);
    jobConf.setJarByClass(this.getClass());
    FileInputFormat.addInputPath(jobConf, new Path("ignored"));

    // Submit the job, then poll for progress until the job is complete
    RunningJob runningJob = JobClient.runJob(jobConf);
    runningJob.waitForCompletion();
    return runningJob.isSuccessful() ? 0 : 1;
}

From source file:org.apache.avro.mapred.AvroJob.java

License:Apache License

private static void configureAvroOutput(JobConf job) {
    if (job.get("mapred.output.format.class") == null)
        job.setOutputFormat(AvroOutputFormat.class);

    if (job.getReducerClass() == IdentityReducer.class)
        job.setReducerClass(HadoopReducer.class);

    job.setOutputKeyClass(AvroWrapper.class);
    configureAvroShuffle(job);//from   w  ww  .  j a v a2 s .c  o  m
}

From source file:org.apache.avro.mapred.TestAvroTextSort.java

License:Apache License

@Test
/**/*from  www  .j ava2 s . co m*/
 * Run the identity job on a "bytes" Avro file using AvroAsTextInputFormat
 * and AvroTextOutputFormat to produce a sorted "bytes" Avro file.
 */
public void testSort() throws Exception {
    JobConf job = new JobConf();
    String dir = System.getProperty("test.dir", ".") + "/mapred";
    Path outputPath = new Path(dir + "/out");

    outputPath.getFileSystem(job).delete(outputPath);
    WordCountUtil.writeLinesBytesFile();

    job.setInputFormat(AvroAsTextInputFormat.class);
    job.setOutputFormat(AvroTextOutputFormat.class);
    job.setOutputKeyClass(Text.class);

    FileInputFormat.setInputPaths(job, new Path(dir + "/in"));
    FileOutputFormat.setOutputPath(job, outputPath);

    JobClient.runJob(job);

    WordCountUtil.validateSortedFile();
}

From source file:org.apache.avro.mapred.TestGenericJob.java

License:Apache License

@Test
public void testJob() throws Exception {
    JobConf job = new JobConf();
    Path outputPath = new Path(dir + "/out");
    outputPath.getFileSystem(job).delete(outputPath);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, dir + "/in");

    job.setMapperClass(AvroTestConverter.class);
    job.setNumReduceTasks(0);/*  w  ww  .  ja  v a2s . c o m*/

    FileOutputFormat.setOutputPath(job, outputPath);
    System.out.println(createSchema());
    AvroJob.setOutputSchema(job, Pair.getPairSchema(Schema.create(Schema.Type.LONG), createSchema()));
    job.setOutputFormat(AvroOutputFormat.class);

    JobClient.runJob(job);
}

From source file:org.apache.avro.mapred.TestSequenceFileReader.java

License:Apache License

@Test
public void testNonAvroReducer() throws Exception {
    JobConf job = new JobConf();
    Path output = new Path(System.getProperty("test.dir", ".") + "/seq-out");

    output.getFileSystem(job).delete(output);

    // configure input for Avro from sequence file
    AvroJob.setInputSequenceFile(job);//from  ww  w  . java 2  s  .c om
    AvroJob.setInputSchema(job, SCHEMA);
    FileInputFormat.setInputPaths(job, FILE.toURI().toString());

    // mapper is default, identity

    // use a hadoop reducer that consumes Avro input
    AvroJob.setMapOutputSchema(job, SCHEMA);
    job.setReducerClass(NonAvroReducer.class);

    // configure output for non-Avro SequenceFile
    job.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, output);

    // output key/value classes are default, LongWritable/Text

    JobClient.runJob(job);

    checkFile(new SequenceFileReader<Long, CharSequence>(new File(output.toString() + "/part-00000")));
}

From source file:org.apache.avro.mapred.tether.TetherJob.java

License:Apache License

private static void setupTetherJob(JobConf job) throws IOException {
    job.setMapRunnerClass(TetherMapRunner.class);
    job.setPartitionerClass(TetherPartitioner.class);
    job.setReducerClass(TetherReducer.class);

    job.setInputFormat(TetherInputFormat.class);
    job.setOutputFormat(TetherOutputFormat.class);

    job.setOutputKeyClass(TetherData.class);
    job.setOutputKeyComparatorClass(TetherKeyComparator.class);
    job.setMapOutputValueClass(NullWritable.class);

    // set the map output key class to TetherData
    job.setMapOutputKeyClass(TetherData.class);

    // add TetherKeySerialization to io.serializations
    Collection<String> serializations = job.getStringCollection("io.serializations");
    if (!serializations.contains(TetherKeySerialization.class.getName())) {
        serializations.add(TetherKeySerialization.class.getName());
        job.setStrings("io.serializations", serializations.toArray(new String[0]));
    }/*from   www  .j  a  v a 2 s.c  om*/

    // determine whether the executable should be added to the cache.
    if (job.getBoolean(TETHER_EXEC_CACHED, false)) {
        DistributedCache.addCacheFile(getExecutable(job), job);
    }
}

From source file:org.apache.hcatalog.hbase.TestHBaseBulkOutputFormat.java

License:Apache License

@Test
public void hbaseBulkOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException {
    String testName = "hbaseBulkOutputFormatTest";
    Path methodTestDir = new Path(getTestDir(), testName);
    LOG.info("starting: " + testName);

    String tableName = newTableName(testName).toLowerCase();
    String familyName = "my_family";
    byte[] familyNameBytes = Bytes.toBytes(familyName);

    //include hbase config in conf file
    Configuration conf = new Configuration(allConf);

    //create table
    conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName);
    conf.set("yarn.scheduler.capacity.root.queues", "default");
    conf.set("yarn.scheduler.capacity.root.default.capacity", "100");
    createTable(tableName, new String[] { familyName });

    String data[] = { "1,english:one,spanish:uno", "2,english:two,spanish:dos",
            "3,english:three,spanish:tres" };

    // input/output settings
    Path inputPath = new Path(methodTestDir, "mr_input");
    FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt"));
    for (String line : data)
        os.write(Bytes.toBytes(line + "\n"));
    os.close();/*from  w  w w .j  a va  2s.c  om*/
    Path interPath = new Path(methodTestDir, "inter");
    //create job
    JobConf job = new JobConf(conf);
    job.setWorkingDirectory(new Path(methodTestDir, "mr_work"));
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapWriteOldMapper.class);

    job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);
    org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath);

    job.setOutputFormat(HBaseBulkOutputFormat.class);
    org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(job, interPath);
    job.setOutputCommitter(HBaseBulkOutputCommitter.class);

    //manually create transaction
    RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf);
    try {
        OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null);
        Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName));
        outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY,
                HCatUtil.serialize(txn));
        job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo));
    } finally {
        rm.close();
    }

    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(HCatRecord.class);

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(HCatRecord.class);

    job.setNumReduceTasks(0);

    RunningJob runJob = JobClient.runJob(job);
    runJob.waitForCompletion();
    assertTrue(runJob.isSuccessful());

    //verify
    HTable table = new HTable(conf, tableName);
    Scan scan = new Scan();
    scan.addFamily(familyNameBytes);
    ResultScanner scanner = table.getScanner(scan);
    int index = 0;
    for (Result result : scanner) {
        String vals[] = data[index].toString().split(",");
        for (int i = 1; i < vals.length; i++) {
            String pair[] = vals[i].split(":");
            assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0])));
            assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0]))));
        }
        index++;
    }
    //test if load count is the same
    assertEquals(data.length, index);
    //test if scratch directory was erased
    assertFalse(FileSystem.get(job).exists(interPath));
}