Example usage for org.apache.hadoop.mapreduce Job submit

List of usage examples for org.apache.hadoop.mapreduce Job submit

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job submit.

Prototype

public void submit() throws IOException, InterruptedException, ClassNotFoundException 

Source Link

Document

Submit the job to the cluster and return immediately.

Usage

From source file:TestBAM.java

License:Open Source License

public int run(String[] args) throws Exception {
    final Configuration conf = getConf();

    conf.set(MyOutputFormat.HEADER_FROM_FILE, args[0]);
    DistributedCache.addFileToClassPath(new Path("hdfs:///libjars/hadoop-bam-7.0.0-jar-with-dependencies.jar"),
            conf);//from  w w  w. j a v a2 s.  c om

    final Job job = new Job(conf);

    job.setJarByClass(TestBAM.class);
    job.setMapperClass(TestBAMMapper.class);
    job.setReducerClass(TestBAMReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(SAMRecordWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(SAMRecordWritable.class);

    job.setInputFormatClass(AnySAMInputFormat.class);
    job.setOutputFormatClass(TestBAM.MyOutputFormat.class);

    org.apache.hadoop.mapreduce.lib.input.FileInputFormat.addInputPath(job, new Path(args[0]));

    org.apache.hadoop.mapreduce.lib.output.FileOutputFormat.setOutputPath(job, new Path(args[1]));
    job.submit();

    if (!job.waitForCompletion(true)) {
        System.err.println("sort :: Job failed.");
        return 1;
    }

    return 0;
}

From source file:BU.MET.CS755.SpeciesIterDriver2.java

static boolean MRGraphBuilder(String args[], int iterCnt) {
    Job theJob = null;

    conf = new JobConf(SpeciesIterDriver2.class);
    conf.setJobName("Species Graph Builder");
    conf.setNumReduceTasks(5);/* w  w w  . j  a  v a 2 s .  co  m*/
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);
    conf.setMapperClass(SpeciesGraphBuilderMapper.class);
    conf.setReducerClass(SpeciesGraphBuilderReducer.class);

    // Reading in XML.
    conf.setInputFormat(StreamInputFormat.class);
    conf.set("stream.recordreader.class", "org.apache.hadoop.streaming.StreamXmlRecordReader");

    // Look for the <page> record in the XML.
    conf.set("stream.recordreader.begin", "<page>");
    conf.set("stream.recordreader.end", "</page>");

    inputpath = args[0];
    outputpath = args[1] + iterCnt;

    FileInputFormat.setInputPaths(conf, new Path(inputpath));
    FileOutputFormat.setOutputPath(conf, new Path(outputpath));

    try {
        theJob = new Job(conf, "SpeciesIter");
        theJob.submit();
    } catch (Exception e) {
        e.printStackTrace();
    }

    try {
        if (theJob != null) {
            theJob.waitForCompletion(true);
        }
    } catch (Exception e) {
        e.printStackTrace();
    }

    return true;
}

From source file:cienciaCelularMR.Main.java

@Override
public int run(String[] args) throws Exception {

    for (int i = 0; i < args.length; i++) {
        System.out.println("Hadoop - arg[" + i + "] es: " + args[i]);
    }//from  w  w  w.ja  v  a 2  s. c o m
    //Configuracin de memoria de YARN
    Configuration conf = new Configuration();
    conf.set("mapreduce.map.memory.mb", "1400");
    conf.set("mapreduce.reduce.memory.mb", "2800");
    conf.set("mapreduce.map.java.opts", "-Xmx1120m");
    conf.set("mapreduce.reduce.java.opts", "-Xmx2240m");
    conf.set("yarn.app.mapreduce.am.resource.mb", "2800");
    conf.set("yarn.app.mapreduce.am.command-opts", "-Xmx2240m");
    conf.set("yarn.nodemanager.resource.memory-mb", "5040");
    conf.set("yarn.scheduler.minimum-allocation-mb", "1400");
    conf.set("yarn.scheduler.maximum-allocation-mb", "5040");
    conf.set("mapreduce.task.timeout", "18000000");//5 horas

    //Creacin del Job
    Job job = Job.getInstance(conf);
    job.setInputFormatClass(WholeFileInputFormat.class);
    FileInputFormat.setInputPaths(job, new Path(args[5]));
    FileOutputFormat.setOutputPath(job, new Path(args[6]));

    //Salidas alternativas de Mapper para brindar informacin
    MultipleOutputs.addNamedOutput(job, "controloutput", TextOutputFormat.class, KeyMcell.class, Text.class);
    MultipleOutputs.addNamedOutput(job, "errormcell", TextOutputFormat.class, KeyMcell.class, Text.class);

    //Archivos copiados a cache de los nodos
    job.addCacheFile(new Path("wasb:///mcell.exe").toUri());
    job.addCacheFile(new Path("wasb:///fernet.exe").toUri());
    job.addCacheFile(new Path("wasb:///fernet.cfg").toUri());
    job.addCacheFile(new Path("wasb:///libconfig_d.dll").toUri());
    job.addCacheFile(new Path("wasb:///libtiff3.dll").toUri());
    job.addCacheFile(new Path("wasb:///jpeg62.dll").toUri());
    job.addCacheFile(new Path("wasb:///zlib1.dll").toUri());
    job.addCacheFile(new Path("wasb:///msvcr100d.dll").toUri());

    job.setJarByClass(Main.class);

    Configuration mapAConf = new Configuration(false);
    ChainMapper.addMapper(job, McellMapper.class, KeyMcell.class, BytesWritable.class, KeyMcell.class,
            Text.class, mapAConf);

    Configuration mapBConf = new Configuration(false);
    ChainMapper.addMapper(job, FernetMapper.class, KeyMcell.class, Text.class, KeyMcell.class,
            FernetOutput.class, mapBConf);

    job.setReducerClass(ResultReducer.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(BytesWritable.class);

    job.submit();
    return 0;
}

From source file:co.cask.cdap.internal.app.runtime.batch.MapReduceRuntimeService.java

License:Apache License

@Override
protected void startUp() throws Exception {
    // Creates a temporary directory locally for storing all generated files.
    File tempDir = createTempDirectory();
    cleanupTask = createCleanupTask(tempDir);

    try {//from  w  w  w  .  j a va2 s.c o m
        Job job = createJob(new File(tempDir, "mapreduce"));
        Configuration mapredConf = job.getConfiguration();

        classLoader = new MapReduceClassLoader(injector, cConf, mapredConf,
                context.getProgram().getClassLoader(), context.getPlugins(), context.getPluginInstantiator());
        cleanupTask = createCleanupTask(cleanupTask, classLoader);

        mapredConf.setClassLoader(new WeakReferenceDelegatorClassLoader(classLoader));
        ClassLoaders.setContextClassLoader(mapredConf.getClassLoader());

        context.setJob(job);

        beforeSubmit(job);

        // Localize additional resources that users have requested via BasicMapReduceContext.localize methods
        Map<String, String> localizedUserResources = localizeUserResources(job, tempDir);

        // Override user-defined job name, since we set it and depend on the name.
        // https://issues.cask.co/browse/CDAP-2441
        String jobName = job.getJobName();
        if (!jobName.isEmpty()) {
            LOG.warn("Job name {} is being overridden.", jobName);
        }
        job.setJobName(getJobName(context));

        // Create a temporary location for storing all generated files through the LocationFactory.
        Location tempLocation = createTempLocationDirectory();
        cleanupTask = createCleanupTask(cleanupTask, tempLocation);

        // For local mode, everything is in the configuration classloader already, hence no need to create new jar
        if (!MapReduceTaskContextProvider.isLocal(mapredConf)) {
            // After calling beforeSubmit, we know what plugins are needed for the program, hence construct the proper
            // ClassLoader from here and use it for setting up the job
            Location pluginArchive = createPluginArchive(tempLocation);
            if (pluginArchive != null) {
                job.addCacheArchive(pluginArchive.toURI());
                mapredConf.set(Constants.Plugin.ARCHIVE, pluginArchive.getName());
            }
        }

        // set resources for the job
        TaskType.MAP.setResources(mapredConf, context.getMapperResources());
        TaskType.REDUCE.setResources(mapredConf, context.getReducerResources());

        // replace user's Mapper & Reducer's with our wrappers in job config
        MapperWrapper.wrap(job);
        ReducerWrapper.wrap(job);

        // packaging job jar which includes cdap classes with dependencies
        File jobJar = buildJobJar(job, tempDir);
        job.setJar(jobJar.toURI().toString());

        Location programJar = programJarLocation;
        if (!MapReduceTaskContextProvider.isLocal(mapredConf)) {
            // Copy and localize the program jar in distributed mode
            programJar = copyProgramJar(tempLocation);
            job.addCacheFile(programJar.toURI());

            List<String> classpath = new ArrayList<>();

            // Localize logback.xml
            Location logbackLocation = createLogbackJar(tempLocation);
            if (logbackLocation != null) {
                job.addCacheFile(logbackLocation.toURI());
                classpath.add(logbackLocation.getName());
            }

            // Generate and localize the launcher jar to control the classloader of MapReduce containers processes
            classpath.add("job.jar/lib/*");
            classpath.add("job.jar/classes");
            Location launcherJar = createLauncherJar(
                    Joiner.on(",").join(MapReduceContainerHelper.getMapReduceClassPath(mapredConf, classpath)),
                    tempLocation);
            job.addCacheFile(launcherJar.toURI());

            // The only thing in the container classpath is the launcher.jar
            // The MapReduceContainerLauncher inside the launcher.jar will creates a MapReduceClassLoader and launch
            // the actual MapReduce AM/Task from that
            // We explicitly localize the mr-framwork, but not use it with the classpath
            URI frameworkURI = MapReduceContainerHelper.getFrameworkURI(mapredConf);
            if (frameworkURI != null) {
                job.addCacheArchive(frameworkURI);
            }

            mapredConf.unset(MRJobConfig.MAPREDUCE_APPLICATION_FRAMEWORK_PATH);
            mapredConf.set(MRJobConfig.MAPREDUCE_APPLICATION_CLASSPATH, launcherJar.getName());
            mapredConf.set(YarnConfiguration.YARN_APPLICATION_CLASSPATH, launcherJar.getName());
        }

        MapReduceContextConfig contextConfig = new MapReduceContextConfig(mapredConf);
        // We start long-running tx to be used by mapreduce job tasks.
        Transaction tx = txClient.startLong();
        try {
            // We remember tx, so that we can re-use it in mapreduce tasks
            CConfiguration cConfCopy = cConf;
            contextConfig.set(context, cConfCopy, tx, programJar.toURI(), localizedUserResources);

            LOG.info("Submitting MapReduce Job: {}", context);
            // submits job and returns immediately. Shouldn't need to set context ClassLoader.
            job.submit();

            this.job = job;
            this.transaction = tx;
        } catch (Throwable t) {
            Transactions.invalidateQuietly(txClient, tx);
            throw t;
        }
    } catch (Throwable t) {
        LOG.error("Exception when submitting MapReduce Job: {}", context, t);
        cleanupTask.run();
        throw t;
    }
}

From source file:com.accumulobook.advanced.mapreduce.WordCount.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    Job job = Job.getInstance(new Configuration());
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);

    job.setMapperClass(WordCountMapper.class);
    job.setCombinerClass(WordCountCombiner.class);
    job.setReducerClass(WordCountReducer.class);

    // input/*w w  w  . j a  v  a  2  s .  co  m*/
    job.setInputFormatClass(AccumuloInputFormat.class);

    ClientConfiguration zkiConfig = new ClientConfiguration().withInstance(args[0]).withZkHosts(args[1]);

    AccumuloInputFormat.setInputTableName(job, WikipediaConstants.ARTICLES_TABLE);
    List<Pair<Text, Text>> columns = new ArrayList<>();
    columns.add(new Pair(WikipediaConstants.CONTENTS_FAMILY_TEXT, new Text("")));

    AccumuloInputFormat.fetchColumns(job, columns);
    AccumuloInputFormat.setZooKeeperInstance(job, zkiConfig);
    AccumuloInputFormat.setConnectorInfo(job, args[2], new PasswordToken(args[3]));

    // output
    job.setOutputFormatClass(AccumuloOutputFormat.class);

    BatchWriterConfig config = new BatchWriterConfig();

    AccumuloOutputFormat.setBatchWriterOptions(job, config);
    AccumuloOutputFormat.setZooKeeperInstance(job, zkiConfig);
    AccumuloOutputFormat.setConnectorInfo(job, args[2], new PasswordToken(args[3]));
    AccumuloOutputFormat.setDefaultTableName(job, WikipediaConstants.WORD_COUNT_TABLE);
    AccumuloOutputFormat.setCreateTables(job, true);

    job.setJarByClass(WordCount.class);

    job.submit();
    return 0;
}

From source file:com.basho.riak.hadoop.RiakWordCount.java

License:Apache License

public int run(String[] args) throws Exception {
    String[] keys = new String[10000];

    for (int i = 0; i < 10000; i++) {
        keys[i] = String.valueOf(i + 1000);
    }/*from ww  w. ja  v  a 2s  . com*/
    Configuration conf = getConf();
    conf = RiakConfig.setKeyLister(conf, new BucketKeyLister("wordcount"));
    conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 11087));
    conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 12087));
    conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 13087));
    conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 14087));
    conf = RiakConfig.addLocation(conf, new RiakLocation("127.0.0.1", 15087));
    conf = RiakConfig.setOutputBucket(conf, "wordcount_out");
    conf = RiakConfig.setHadoopClusterSize(conf, 4);

    Job job = new Job(conf, "Riak-WordCount");

    job.setJarByClass(RiakWordCount.class);

    job.setInputFormatClass(RiakInputFormat.class);
    job.setMapperClass(TokenCounterMapper.class);

    job.setReducerClass(TokenCounterReducer.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);

    job.setOutputFormatClass(RiakOutputFormat.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(WordCountResult.class);

    job.setNumReduceTasks(4);

    job.submit();
    return job.waitForCompletion(true) ? 0 : 1;
}

From source file:com.blackberry.logdriver.util.Cat.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//from   w w w .j a  v  a2  s  . c o  m

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 2) {
        System.out.println("usage: [genericOptions] input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    for (int i = 0; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Cat.class);
    jobConf.setIfUnset("mapred.job.name", "Cat Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }
        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(CatMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);

    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.blackberry.logdriver.util.FastSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }/*  w  w  w. j  a va  2  s . c  o m*/

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchString = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchString input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchString = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(FastSearch.class);
    jobConf.setIfUnset("mapred.job.name", "Search Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string", Base64.encodeBase64String(searchString.getBytes("UTF-8")));

    job.setInputFormatClass(AvroBlockInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        AvroBlockInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}

From source file:com.blackberry.logdriver.util.Grep.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//from   w  ww  . ja  va  2  s .  c  o m

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String regex = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] regex input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    regex = args[0];
    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }
    outputDir = new Path(args[args.length - 1]);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(Grep.class);
    jobConf.setIfUnset("mapred.job.name", "Grep Files");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.grep.regex", Base64.encodeBase64String(regex.getBytes("UTF-8")));

    job.setInputFormatClass(BoomInputFormat.class);
    job.setMapperClass(GrepMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    // And set the output as usual
    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        BoomInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }

}

From source file:com.blackberry.logdriver.util.MultiSearch.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Configuration conf = getConf(); // Configuration processed by ToolRunner
    // If run by Oozie, then load the Oozie conf too
    if (System.getProperty("oozie.action.conf.xml") != null) {
        conf.addResource(new URL("file://" + System.getProperty("oozie.action.conf.xml")));
    }//from  w  w  w . j  ava 2s  . co  m

    FileSystem fs = FileSystem.get(conf);

    // The command line options
    String searchStringDir = null;
    List<Path> paths = new ArrayList<Path>();
    Path outputDir = null;

    // Load input files from the command line
    if (args.length < 3) {
        System.out.println("usage: [genericOptions] searchStringDirectory input [input ...] output");
        System.exit(1);
    }

    // Get the files we need from the command line.
    searchStringDir = args[0];
    // We are going to be reading all the files in this directory a lot. So
    // let's up the replication factor by a lot so that they're easy to read.
    for (FileStatus f : fs.listStatus(new Path(searchStringDir))) {
        fs.setReplication(f.getPath(), (short) 16);
    }

    for (int i = 1; i < args.length - 1; i++) {
        for (FileStatus f : fs.globStatus(new Path(args[i]))) {
            paths.add(f.getPath());
        }
    }

    outputDir = new Path(args[args.length - 1]);

    @SuppressWarnings("deprecation")
    Job job = new Job(conf);
    Configuration jobConf = job.getConfiguration();

    job.setJarByClass(MultiSearch.class);
    jobConf.setIfUnset("mapred.job.name", "MultiSearch");

    // To propagate credentials within Oozie
    if (System.getenv("HADOOP_TOKEN_FILE_LOCATION") != null) {
        jobConf.set("mapreduce.job.credentials.binary", System.getenv("HADOOP_TOKEN_FILE_LOCATION"));
    }

    // Good output separators include things that are unsupported by XML. So we
    // just send the byte value of the character through. The restriction here
    // is that it can't be more than 1 byte when UTF-8 encoded, since it will be
    // read by Pig which only deals with single byte separators.
    {
        String outputSeparator = jobConf.get("logdriver.output.field.separator", DEFAULT_OUTPUT_SEPARATOR);
        byte[] bytes = outputSeparator.getBytes(UTF_8);
        if (bytes.length != 1) {
            LOG.error("The output separator must be a single byte in UTF-8.");
            return 1;
        }

        jobConf.set("logdriver.output.field.separator", Byte.toString(bytes[0]));
    }

    jobConf.set("logdriver.search.string.dir", searchStringDir);

    // This search is generally too fast to make good use of 128MB blocks, so
    // let's set the value to 256MB (if it's not set already)
    if (jobConf.get("mapred.max.split.size") == null) {
        jobConf.setLong("mapred.max.split.size", 256 * 1024 * 1024);
    }

    job.setInputFormatClass(AvroBlockInputFormat.class);
    job.setMapperClass(SearchMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(NullWritable.class);

    job.setNumReduceTasks(0);

    job.setOutputFormatClass(TextOutputFormat.class);
    TextOutputFormat.setOutputPath(job, outputDir);
    for (Path path : paths) {
        AvroBlockInputFormat.addInputPath(job, path);
    }

    // Run the job.
    if (conf.getBoolean("job.wait", DEFAULT_WAIT_JOB)) {
        return job.waitForCompletion(true) ? 0 : 1;
    } else {
        job.submit();
        return 0;
    }
}