Example usage for org.apache.hadoop.mapred JobConf setOutputFormat

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setOutputFormat.

Prototype

public void setOutputFormat(Class<? extends OutputFormat> theClass)

Source Link

Document

Set the OutputFormat implementation for the map-reduce job.

Usage

From source file:org.acacia.partitioner.java.EdgelistPartitioner.java

License:Apache License

@SuppressWarnings("unused")
public static void main(String[] args) throws IOException {
    JobConf conf = new JobConf(EdgelistPartitioner.class);

    if (conf == null) {
        return;//ww  w . j av a  2  s .c o m
    }
    String dir1 = "/user/miyuru/merged";
    String dir2 = "/user/miyuru/merged-out";

    //We first delete the temporary directories if they exist on the HDFS
    FileSystem fs1 = FileSystem.get(new JobConf());
    //only delete dir2 because dir1 is uploaded externally.
    if (fs1.exists(new Path(dir2))) {
        fs1.delete(new Path(dir2), true);
    }

    conf.setInputFormat(WholeFileInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    WholeFileInputFormat.setInputPaths(conf, new Path(dir1));
    SequenceFileOutputFormat.setOutputPath(conf, new Path(dir2));

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(SequenceFileMapper.class);
    conf.setReducerClass(MultipleOutputsInvertedReducer.class);
    conf.setOutputFormat(NullOutputFormat.class);

    conf.setJobName("EdgelistPartitioner");

    MultipleOutputs.addMultiNamedOutput(conf, "partition", TextOutputFormat.class, NullWritable.class,
            Text.class);

    JobClient.runJob(conf);
}

From source file:org.ahanna.DoubleConversionMapper.java

License:Apache License

public static void main(String[] args) {
    JobConf conf = new JobConf(DoubleConversion.class);
    conf.setJobName("DoubleConversation");

    conf.setMapOutputKeyClass(Text.class);
    conf.setMapOutputValueClass(Text.class);

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(DoubleConversionMapper.class);
    conf.setReducerClass(org.apache.hadoop.mapred.lib.IdentityReducer.class);

    // KeyValueTextInputFormat treats each line as an input record, 
    // and splits the line by the tab character to separate it into key and value 
    conf.setInputFormat(KeyValueTextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    try {/*from  w w  w  .jav  a2  s .  c  om*/
        JobClient.runJob(conf);
    } catch (IOException e) {
        // do nothing
    }
}

From source file:org.apache.ambari.servicemonitor.jobs.FileUsingJobRunner.java

License:Apache License

public int run(String[] args) throws Exception {
    // Configuration processed by ToolRunner
    Configuration conf = getConf();

    CommandLine commandLine = getCommandLine();
    // Create a JobConf using the processed conf
    JobConf jobConf = new JobConf(conf, FileUsingJobRunner.class);

    //tune the config
    if (jobConf.get(JobKeys.RANGEINPUTFORMAT_ROWS) == null) {
        jobConf.setInt(JobKeys.RANGEINPUTFORMAT_ROWS, 1);
    }//from ww  w  .  j av  a2  s . co m

    // Process custom command-line options
    String name = OptionHelper.getStringOption(commandLine, "n", "File Using Job");
    if (commandLine.hasOption('x')) {
        //delete the output directory
        String destDir = jobConf.get(JobKeys.MAPRED_OUTPUT_DIR);
        FileSystem fs = FileSystem.get(jobConf);
        fs.delete(new Path(destDir), true);
    }

    // Specify various job-specific parameters     
    jobConf.setMapperClass(FileUsingMapper.class);
    jobConf.setReducerClass(FileUsingReducer.class);
    jobConf.setMapOutputKeyClass(IntWritable.class);
    jobConf.setMapOutputValueClass(IntWritable.class);
    jobConf.setOutputFormat(TextOutputFormat.class);
    jobConf.setInputFormat(RangeInputFormat.class);
    //jobConf.setPartitionerClass(SleepJob.class);
    jobConf.setSpeculativeExecution(false);
    jobConf.setJobName(name);
    jobConf.setJarByClass(this.getClass());
    FileInputFormat.addInputPath(jobConf, new Path("ignored"));

    // Submit the job, then poll for progress until the job is complete
    RunningJob runningJob = JobClient.runJob(jobConf);
    runningJob.waitForCompletion();
    return runningJob.isSuccessful() ? 0 : 1;
}

From source file:org.apache.avro.mapred.AvroJob.java

License:Apache License

private static void configureAvroOutput(JobConf job) {
    if (job.get("mapred.output.format.class") == null)
        job.setOutputFormat(AvroOutputFormat.class);

    if (job.getReducerClass() == IdentityReducer.class)
        job.setReducerClass(HadoopReducer.class);

    job.setOutputKeyClass(AvroWrapper.class);
    configureAvroShuffle(job);//from   w  ww  .  j a v a2 s .c  o  m
}

From source file:org.apache.avro.mapred.TestAvroTextSort.java

License:Apache License

@Test
/**/*from  www  .j ava2 s . co m*/
 * Run the identity job on a "bytes" Avro file using AvroAsTextInputFormat
 * and AvroTextOutputFormat to produce a sorted "bytes" Avro file.
 */
public void testSort() throws Exception {
    JobConf job = new JobConf();
    String dir = System.getProperty("test.dir", ".") + "/mapred";
    Path outputPath = new Path(dir + "/out");

    outputPath.getFileSystem(job).delete(outputPath);
    WordCountUtil.writeLinesBytesFile();

    job.setInputFormat(AvroAsTextInputFormat.class);
    job.setOutputFormat(AvroTextOutputFormat.class);
    job.setOutputKeyClass(Text.class);

    FileInputFormat.setInputPaths(job, new Path(dir + "/in"));
    FileOutputFormat.setOutputPath(job, outputPath);

    JobClient.runJob(job);

    WordCountUtil.validateSortedFile();
}

From source file:org.apache.avro.mapred.TestGenericJob.java

License:Apache License

@Test
public void testJob() throws Exception {
    JobConf job = new JobConf();
    Path outputPath = new Path(dir + "/out");
    outputPath.getFileSystem(job).delete(outputPath);

    job.setInputFormat(TextInputFormat.class);
    FileInputFormat.setInputPaths(job, dir + "/in");

    job.setMapperClass(AvroTestConverter.class);
    job.setNumReduceTasks(0);/*  w  ww  .  ja  v a2s . c o m*/

    FileOutputFormat.setOutputPath(job, outputPath);
    System.out.println(createSchema());
    AvroJob.setOutputSchema(job, Pair.getPairSchema(Schema.create(Schema.Type.LONG), createSchema()));
    job.setOutputFormat(AvroOutputFormat.class);

    JobClient.runJob(job);
}

From source file:org.apache.avro.mapred.TestSequenceFileReader.java

License:Apache License

@Test
public void testNonAvroReducer() throws Exception {
    JobConf job = new JobConf();
    Path output = new Path(System.getProperty("test.dir", ".") + "/seq-out");

    output.getFileSystem(job).delete(output);

    // configure input for Avro from sequence file
    AvroJob.setInputSequenceFile(job);//from  ww  w  . java 2  s  .c om
    AvroJob.setInputSchema(job, SCHEMA);
    FileInputFormat.setInputPaths(job, FILE.toURI().toString());

    // mapper is default, identity

    // use a hadoop reducer that consumes Avro input
    AvroJob.setMapOutputSchema(job, SCHEMA);
    job.setReducerClass(NonAvroReducer.class);

    // configure output for non-Avro SequenceFile
    job.setOutputFormat(SequenceFileOutputFormat.class);
    FileOutputFormat.setOutputPath(job, output);

    // output key/value classes are default, LongWritable/Text

    JobClient.runJob(job);

    checkFile(new SequenceFileReader<Long, CharSequence>(new File(output.toString() + "/part-00000")));
}

From source file:org.apache.avro.mapred.tether.TetherJob.java

License:Apache License

private static void setupTetherJob(JobConf job) throws IOException {
    job.setMapRunnerClass(TetherMapRunner.class);
    job.setPartitionerClass(TetherPartitioner.class);
    job.setReducerClass(TetherReducer.class);

    job.setInputFormat(TetherInputFormat.class);
    job.setOutputFormat(TetherOutputFormat.class);

    job.setOutputKeyClass(TetherData.class);
    job.setOutputKeyComparatorClass(TetherKeyComparator.class);
    job.setMapOutputValueClass(NullWritable.class);

    // set the map output key class to TetherData
    job.setMapOutputKeyClass(TetherData.class);

    // add TetherKeySerialization to io.serializations
    Collection<String> serializations = job.getStringCollection("io.serializations");
    if (!serializations.contains(TetherKeySerialization.class.getName())) {
        serializations.add(TetherKeySerialization.class.getName());
        job.setStrings("io.serializations", serializations.toArray(new String[0]));
    }/*from   www  .j  a  v a 2 s.c  om*/

    // determine whether the executable should be added to the cache.
    if (job.getBoolean(TETHER_EXEC_CACHED, false)) {
        DistributedCache.addCacheFile(getExecutable(job), job);
    }
}

From source file:org.apache.hcatalog.hbase.TestHBaseBulkOutputFormat.java

License:Apache License

@Test
public void hbaseBulkOutputFormatTest() throws IOException, ClassNotFoundException, InterruptedException {
    String testName = "hbaseBulkOutputFormatTest";
    Path methodTestDir = new Path(getTestDir(), testName);
    LOG.info("starting: " + testName);

    String tableName = newTableName(testName).toLowerCase();
    String familyName = "my_family";
    byte[] familyNameBytes = Bytes.toBytes(familyName);

    //include hbase config in conf file
    Configuration conf = new Configuration(allConf);

    //create table
    conf.set(HBaseConstants.PROPERTY_OUTPUT_TABLE_NAME_KEY, tableName);
    conf.set("yarn.scheduler.capacity.root.queues", "default");
    conf.set("yarn.scheduler.capacity.root.default.capacity", "100");
    createTable(tableName, new String[] { familyName });

    String data[] = { "1,english:one,spanish:uno", "2,english:two,spanish:dos",
            "3,english:three,spanish:tres" };

    // input/output settings
    Path inputPath = new Path(methodTestDir, "mr_input");
    FSDataOutputStream os = getFileSystem().create(new Path(inputPath, "inputFile.txt"));
    for (String line : data)
        os.write(Bytes.toBytes(line + "\n"));
    os.close();/*from  w  w w .j  a va  2s.c  om*/
    Path interPath = new Path(methodTestDir, "inter");
    //create job
    JobConf job = new JobConf(conf);
    job.setWorkingDirectory(new Path(methodTestDir, "mr_work"));
    job.setJarByClass(this.getClass());
    job.setMapperClass(MapWriteOldMapper.class);

    job.setInputFormat(org.apache.hadoop.mapred.TextInputFormat.class);
    org.apache.hadoop.mapred.TextInputFormat.setInputPaths(job, inputPath);

    job.setOutputFormat(HBaseBulkOutputFormat.class);
    org.apache.hadoop.mapred.SequenceFileOutputFormat.setOutputPath(job, interPath);
    job.setOutputCommitter(HBaseBulkOutputCommitter.class);

    //manually create transaction
    RevisionManager rm = HBaseRevisionManagerUtil.getOpenedRevisionManager(conf);
    try {
        OutputJobInfo outputJobInfo = OutputJobInfo.create("default", tableName, null);
        Transaction txn = rm.beginWriteTransaction(tableName, Arrays.asList(familyName));
        outputJobInfo.getProperties().setProperty(HBaseConstants.PROPERTY_WRITE_TXN_KEY,
                HCatUtil.serialize(txn));
        job.set(HCatConstants.HCAT_KEY_OUTPUT_INFO, HCatUtil.serialize(outputJobInfo));
    } finally {
        rm.close();
    }

    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(HCatRecord.class);

    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(HCatRecord.class);

    job.setNumReduceTasks(0);

    RunningJob runJob = JobClient.runJob(job);
    runJob.waitForCompletion();
    assertTrue(runJob.isSuccessful());

    //verify
    HTable table = new HTable(conf, tableName);
    Scan scan = new Scan();
    scan.addFamily(familyNameBytes);
    ResultScanner scanner = table.getScanner(scan);
    int index = 0;
    for (Result result : scanner) {
        String vals[] = data[index].toString().split(",");
        for (int i = 1; i < vals.length; i++) {
            String pair[] = vals[i].split(":");
            assertTrue(result.containsColumn(familyNameBytes, Bytes.toBytes(pair[0])));
            assertEquals(pair[1], Bytes.toString(result.getValue(familyNameBytes, Bytes.toBytes(pair[0]))));
        }
        index++;
    }
    //test if load count is the same
    assertEquals(data.length, index);
    //test if scratch directory was erased
    assertFalse(FileSystem.get(job).exists(interPath));
}