org.apache.avro.tool.TetherTool.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.avro.tool.TetherTool.java

Source

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.avro.tool;

import java.io.File;
import java.io.InputStream;
import java.io.PrintStream;
import java.util.ArrayList;
import java.util.List;

import org.apache.avro.Schema;
import org.apache.avro.mapred.AvroJob;
import org.apache.avro.mapred.tether.TetherJob;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileOutputFormat;
import org.apache.hadoop.mapred.JobConf;

import org.apache.commons.cli.Options;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.GnuParser;
import org.apache.commons.cli.HelpFormatter;

@SuppressWarnings("deprecation")
public class TetherTool implements Tool {
    public TetherJob job;

    @Override
    public String getName() {
        return "tether";
    }

    @Override
    public String getShortDescription() {
        return "Run a tethered mapreduce job.";
    }

    @Override
    public int run(InputStream ins, PrintStream outs, PrintStream err, List<String> args) throws Exception {

        String[] argarry = args.toArray(new String[0]);
        Options opts = new Options();

        Option helpopt = OptionBuilder.hasArg(false).withDescription("print this message").create("help");

        Option inopt = OptionBuilder.hasArg().isRequired().withDescription("comma-separated input paths")
                .create("in");

        Option outopt = OptionBuilder.hasArg().isRequired().withDescription("The output path.").create("out");

        Option pargs = OptionBuilder.hasArg().withDescription(
                "A string containing the command line arguments to pass to the tethered process. String should be enclosed in quotes")
                .create("exec_args");

        Option popt = OptionBuilder.hasArg().isRequired().withDescription("executable program, usually in HDFS")
                .create("program");

        Option outscopt = OptionBuilder.withType(File.class).hasArg().isRequired()
                .withDescription("schema file for output of reducer").create("outschema");

        Option outscmapopt = OptionBuilder.withType(File.class).hasArg()
                .withDescription("(optional) map output schema file,  if different from outschema")
                .create("outschemamap");

        Option redopt = OptionBuilder.withType(Integer.class).hasArg()
                .withDescription("(optional) number of reducers").create("reduces");

        Option cacheopt = OptionBuilder.withType(Boolean.class).hasArg().withDescription(
                "(optional) boolean indicating whether or not the exectuable should be distributed via distributed cache")
                .create("exec_cached");

        Option protoopt = OptionBuilder.hasArg()
                .withDescription("(optional) specifies the transport protocol 'http' or 'sasl'").create("protocol");

        opts.addOption(redopt);
        opts.addOption(outscopt);
        opts.addOption(popt);
        opts.addOption(pargs);
        opts.addOption(inopt);
        opts.addOption(outopt);
        opts.addOption(helpopt);
        opts.addOption(outscmapopt);
        opts.addOption(cacheopt);
        opts.addOption(protoopt);

        CommandLineParser parser = new GnuParser();

        String[] genargs = null;
        CommandLine line = null;
        HelpFormatter formatter = new HelpFormatter();

        JobConf job = new JobConf();

        try {
            line = parser.parse(opts, argarry);

            if (line.hasOption("help")) {
                formatter.printHelp("tether", opts);
                return 0;
            }

            genargs = line.getArgs();

            FileInputFormat.addInputPaths(job, line.getOptionValue("in"));
            FileOutputFormat.setOutputPath(job, new Path(line.getOptionValue("out")));

            List<String> exargs = null;
            Boolean cached = false;

            if (line.hasOption("exec_args")) {
                String[] splitargs = line.getOptionValue("exec_args").split(" ");
                exargs = new ArrayList<String>();
                for (String item : splitargs) {
                    exargs.add(item);
                }
            }
            if (line.hasOption("exec_cached")) {
                cached = Boolean.parseBoolean(line.getOptionValue("exec_cached"));
            }
            TetherJob.setExecutable(job, new File(line.getOptionValue("program")), exargs, cached);

            File outschema = (File) line.getParsedOptionValue("outschema");
            job.set(AvroJob.OUTPUT_SCHEMA, Schema.parse(outschema).toString());
            if (line.hasOption("outschemamap")) {
                job.set(AvroJob.MAP_OUTPUT_SCHEMA,
                        Schema.parse((File) line.getParsedOptionValue("outschemamap")).toString());
            }
            if (line.hasOption("reduces")) {
                job.setNumReduceTasks((Integer) line.getParsedOptionValue("reduces"));
            }
            if (line.hasOption("protocol")) {
                TetherJob.setProtocol(job, line.getOptionValue("protocol"));
            }
        } catch (Exception exp) {
            System.out.println("Unexpected exception: " + exp.getMessage());
            formatter.printHelp("tether", opts);
            return -1;
        }

        TetherJob.runJob(job);
        return 0;
    }

}