Example usage for org.apache.hadoop.mapred.lib LazyOutputFormat setOutputFormatClass

List of usage examples for org.apache.hadoop.mapred.lib LazyOutputFormat setOutputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred.lib LazyOutputFormat setOutputFormatClass.

Prototype

@SuppressWarnings("unchecked")
public static void setOutputFormatClass(JobConf job, Class<? extends OutputFormat> theClass) 

Source Link

Document

Set the underlying output format for LazyOutputFormat.

Usage

From source file:com.hazelcast.jet.hadoop.impl.WriteHdfsPTest.java

License:Open Source License

@Test
public void testWriteFile() throws Exception {
    int messageCount = 320;
    String mapName = randomMapName();
    JetInstance instance = createJetMember();
    createJetMember();//from   w  w w  . ja v a 2  s  .c om

    Map<IntWritable, IntWritable> map = IntStream.range(0, messageCount).boxed()
            .collect(toMap(IntWritable::new, IntWritable::new));
    instance.getMap(mapName).putAll(map);

    Path path = getPath();

    JobConf conf = new JobConf();
    conf.setOutputFormat(outputFormatClass);
    conf.setOutputCommitter(FileOutputCommitter.class);
    conf.setOutputKeyClass(IntWritable.class);
    conf.setOutputValueClass(IntWritable.class);

    if (outputFormatClass.equals(LazyOutputFormat.class)) {
        LazyOutputFormat.setOutputFormatClass(conf, TextOutputFormat.class);
    }

    FileOutputFormat.setOutputPath(conf, path);

    Pipeline p = Pipeline.create();
    p.drawFrom(Sources.map(mapName)).drainTo(HdfsSinks.hdfs(conf))
            // we use higher value to increase the race chance for LazyOutputFormat
            .setLocalParallelism(8);

    Future<Void> future = instance.newJob(p).getFuture();
    assertCompletesEventually(future);

    JobConf readJobConf = new JobConf();
    readJobConf.setInputFormat(inputFormatClass);
    FileInputFormat.addInputPath(readJobConf, path);

    p = Pipeline.create();
    p.drawFrom(HdfsSources.hdfs(readJobConf)).drainTo(Sinks.list("results"));

    future = instance.newJob(p).getFuture();
    assertCompletesEventually(future);

    IList<Object> results = instance.getList("results");
    assertEquals(messageCount, results.size());
}

From source file:it.crs4.pydoop.pipes.Submitter.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    CommandLineParser cli = new CommandLineParser();
    if (args.length == 0) {
        cli.printUsage();/*from  w w w .  j a  v  a  2s  .co m*/
        return 1;
    }
    cli.addOption("input", false, "input path to the maps", "path");
    cli.addOption("output", false, "output path from the reduces", "path");

    cli.addOption("jar", false, "job jar file", "path");
    cli.addOption("inputformat", false, "java classname of InputFormat", "class");
    //cli.addArgument("javareader", false, "is the RecordReader in Java");
    cli.addOption("map", false, "java classname of Mapper", "class");
    cli.addOption("partitioner", false, "java classname of Partitioner", "class");
    cli.addOption("reduce", false, "java classname of Reducer", "class");
    cli.addOption("writer", false, "java classname of OutputFormat", "class");
    cli.addOption("program", false, "URI to application executable", "class");
    cli.addOption("reduces", false, "number of reduces", "num");
    cli.addOption("jobconf", false,
            "\"n1=v1,n2=v2,..\" (Deprecated) Optional. Add or override a JobConf property.", "key=val");
    cli.addOption("lazyOutput", false, "Optional. Create output lazily", "boolean");
    Parser parser = cli.createParser();
    try {

        GenericOptionsParser genericParser = new GenericOptionsParser(getConf(), args);
        CommandLine results = parser.parse(cli.options, genericParser.getRemainingArgs());

        JobConf job = new JobConf(getConf());

        if (results.hasOption("input")) {
            FileInputFormat.setInputPaths(job, results.getOptionValue("input"));
        }
        if (results.hasOption("output")) {
            FileOutputFormat.setOutputPath(job, new Path(results.getOptionValue("output")));
        }
        if (results.hasOption("jar")) {
            job.setJar(results.getOptionValue("jar"));
        }
        if (results.hasOption("inputformat")) {
            setIsJavaRecordReader(job, true);
            job.setInputFormat(getClass(results, "inputformat", job, InputFormat.class));
        }
        if (results.hasOption("javareader")) {
            setIsJavaRecordReader(job, true);
        }
        if (results.hasOption("map")) {
            setIsJavaMapper(job, true);
            job.setMapperClass(getClass(results, "map", job, Mapper.class));
        }
        if (results.hasOption("partitioner")) {
            job.setPartitionerClass(getClass(results, "partitioner", job, Partitioner.class));
        }
        if (results.hasOption("reduce")) {
            setIsJavaReducer(job, true);
            job.setReducerClass(getClass(results, "reduce", job, Reducer.class));
        }
        if (results.hasOption("reduces")) {
            job.setNumReduceTasks(Integer.parseInt(results.getOptionValue("reduces")));
        }
        if (results.hasOption("writer")) {
            setIsJavaRecordWriter(job, true);
            job.setOutputFormat(getClass(results, "writer", job, OutputFormat.class));
        }

        if (results.hasOption("lazyOutput")) {
            if (Boolean.parseBoolean(results.getOptionValue("lazyOutput"))) {
                LazyOutputFormat.setOutputFormatClass(job, job.getOutputFormat().getClass());
            }
        }

        if (results.hasOption("program")) {
            setExecutable(job, results.getOptionValue("program"));
        }
        if (results.hasOption("jobconf")) {
            LOG.warn("-jobconf option is deprecated, please use -D instead.");
            String options = results.getOptionValue("jobconf");
            StringTokenizer tokenizer = new StringTokenizer(options, ",");
            while (tokenizer.hasMoreTokens()) {
                String keyVal = tokenizer.nextToken().trim();
                String[] keyValSplit = keyVal.split("=");
                job.set(keyValSplit[0], keyValSplit[1]);
            }
        }
        // if they gave us a jar file, include it into the class path
        String jarFile = job.getJar();
        if (jarFile != null) {
            final URL[] urls = new URL[] { FileSystem.getLocal(job).pathToFile(new Path(jarFile)).toURL() };
            //FindBugs complains that creating a URLClassLoader should be
            //in a doPrivileged() block. 
            ClassLoader loader = AccessController.doPrivileged(new PrivilegedAction<ClassLoader>() {
                public ClassLoader run() {
                    return new URLClassLoader(urls);
                }
            });
            job.setClassLoader(loader);
        }

        runJob(job);
        return 0;
    } catch (ParseException pe) {
        LOG.info("Error : " + pe);
        cli.printUsage();
        return 1;
    }

}