Example usage for org.apache.hadoop.util GenericOptionsParser printGenericCommandUsage

List of usage examples for org.apache.hadoop.util GenericOptionsParser printGenericCommandUsage


In this page you can find the example usage for org.apache.hadoop.util GenericOptionsParser printGenericCommandUsage.


public static void printGenericCommandUsage(PrintStream out) 

Source Link


Print the usage message for generic command-line options supported.


From source file:$.WordCount.java

License:Open Source License

public int run(String[] args) throws Exception {

        if (args.length != 2) {
            System.err.println(//from w  w w . j  a va 2s. com
                    "Usage: hadoop jar ${artifactId}-${version}-job.jar" + " [generic options] input output");
            return 1;

        String inputPath = args[0];
        String outputPath = args[1];

        // Create an object to coordinate pipeline creation and execution.
        Pipeline pipeline = new MRPipeline(WordCount.class, getConf());

        // Reference a given text file as a collection of Strings.
        PCollection<String> lines = pipeline.readTextFile(inputPath);

        // Define a function that splits each line in a PCollection of Strings into
        // a PCollection made up of the individual words in the file.
        // The second argument sets the serialization format.
        PCollection<String> words = lines.parallelDo(new Tokenizer(), Writables.strings());

        // Take the collection of words and remove known stop words.
        PCollection<String> noStopWords = words.filter(new StopWordFilter());

        // The count method applies a series of Crunch primitives and returns
        // a map of the unique words in the input PCollection to their counts.
        PTable<String, Long> counts = noStopWords.count();

        // Instruct the pipeline to write the resulting counts to a text file.
        pipeline.writeTextFile(counts, outputPath);

        // Execute the pipeline as a MapReduce.
        PipelineResult result = pipeline.done();

        return result.succeeded() ? 0 : 1;

From source file:cn.edu.bjtu.cit.recommender.Recommender.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length < 2) {
        System.err.println();/*  ww  w  .  j  a  v a 2  s .  c om*/
        System.err.println("Usage: " + this.getClass().getName()
                + " [generic options] input output [profiling] [estimation] [clustersize]");

        return 1;
    OptionParser parser = new OptionParser(args);

    Pipeline pipeline = new MRPipeline(Recommender.class, getConf());

    if (parser.hasOption(CLUSTER_SIZE)) {

    if (parser.hasOption(PROFILING)) {
        pipeline.getConfiguration().setBoolean(Profiler.IS_PROFILE, true);
        this.profileFilePath = parser.getOption(PROFILING).getValue();


    if (parser.hasOption(ESTIMATION)) {
        estFile = parser.getOption(ESTIMATION).getValue();
        est = new Estimator(estFile, clusterSize);

    if (parser.hasOption(OPT_REDUCE)) {
        pipeline.getConfiguration().setBoolean(OPT_REDUCE, true);

    if (parser.hasOption(OPT_MSCR)) {
        pipeline.getConfiguration().setBoolean(OPT_MSCR, true);

    if (parser.hasOption(ACTIVE_THRESHOLD)) {
        threshold = Integer.parseInt(parser.getOption("at").getValue());

    if (parser.hasOption(TOP)) {
        top = Integer.parseInt(parser.getOption("top").getValue());

    profiler = new Profiler(pipeline);
     * input node
    PCollection<String> lines = pipeline.readTextFile(args[0]);

    if (profiler.isProfiling() && lines.getSize() > 10 * 1024 * 1024) {
        lines = lines.sample(0.1);

     * S0 + GBK
    PGroupedTable<Long, Long> userWithPrefs = lines.parallelDo(new MapFn<String, Pair<Long, Long>>() {

        public Pair<Long, Long> map(String input) {
            String[] split = input.split(Estimator.DELM);
            long userID = Long.parseLong(split[0]);
            long itemID = Long.parseLong(split[1]);
            return Pair.of(userID, itemID);

        public float scaleFactor() {
            return est.getScaleFactor("S0").sizeFactor;

        public float scaleFactorByRecord() {
            return est.getScaleFactor("S0").recsFactor;
    }, Writables.tableOf(Writables.longs(), Writables.longs())).groupByKey(est.getClusterSize());

     * S1
    PTable<Long, Vector> userVector = userWithPrefs
            .parallelDo(new MapFn<Pair<Long, Iterable<Long>>, Pair<Long, Vector>>() {
                public Pair<Long, Vector> map(Pair<Long, Iterable<Long>> input) {
                    Vector userVector = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
                    for (long itemPref : input.second()) {
                        userVector.set((int) itemPref, 1.0f);
                    return Pair.of(input.first(), userVector);

                public float scaleFactor() {
                    return est.getScaleFactor("S1").sizeFactor;

                public float scaleFactorByRecord() {
                    return est.getScaleFactor("S1").recsFactor;
            }, Writables.tableOf(Writables.longs(), Writables.vectors()));

    userVector = profiler.profile("S0-S1", pipeline, userVector, ProfileConverter.long_vector(),
            Writables.tableOf(Writables.longs(), Writables.vectors()));

     * S2
    PTable<Long, Vector> filteredUserVector = userVector
            .parallelDo(new DoFn<Pair<Long, Vector>, Pair<Long, Vector>>() {

                public void process(Pair<Long, Vector> input, Emitter<Pair<Long, Vector>> emitter) {
                    if (input.second().getNumNondefaultElements() > threshold) {

                public float scaleFactor() {
                    return est.getScaleFactor("S2").sizeFactor;

                public float scaleFactorByRecord() {
                    return est.getScaleFactor("S2").recsFactor;

            }, Writables.tableOf(Writables.longs(), Writables.vectors()));

    filteredUserVector = profiler.profile("S2", pipeline, filteredUserVector, ProfileConverter.long_vector(),
            Writables.tableOf(Writables.longs(), Writables.vectors()));

     * S3 + GBK
    PGroupedTable<Integer, Integer> coOccurencePairs = filteredUserVector
            .parallelDo(new DoFn<Pair<Long, Vector>, Pair<Integer, Integer>>() {
                public void process(Pair<Long, Vector> input, Emitter<Pair<Integer, Integer>> emitter) {
                    Iterator<Vector.Element> it = input.second().iterateNonZero();
                    while (it.hasNext()) {
                        int index1 = it.next().index();
                        Iterator<Vector.Element> it2 = input.second().iterateNonZero();
                        while (it2.hasNext()) {
                            int index2 = it2.next().index();
                            emitter.emit(Pair.of(index1, index2));

                public float scaleFactor() {
                    float size = est.getScaleFactor("S3").sizeFactor;
                    return size;

                public float scaleFactorByRecord() {
                    float recs = est.getScaleFactor("S3").recsFactor;
                    return recs;
            }, Writables.tableOf(Writables.ints(), Writables.ints())).groupByKey(est.getClusterSize());

     * S4
    PTable<Integer, Vector> coOccurenceVector = coOccurencePairs
            .parallelDo(new MapFn<Pair<Integer, Iterable<Integer>>, Pair<Integer, Vector>>() {
                public Pair<Integer, Vector> map(Pair<Integer, Iterable<Integer>> input) {
                    Vector cooccurrenceRow = new RandomAccessSparseVector(Integer.MAX_VALUE, 100);
                    for (int itemIndex2 : input.second()) {
                        cooccurrenceRow.set(itemIndex2, cooccurrenceRow.get(itemIndex2) + 1.0);
                    return Pair.of(input.first(), cooccurrenceRow);

                public float scaleFactor() {
                    return est.getScaleFactor("S4").sizeFactor;

                public float scaleFactorByRecord() {
                    return est.getScaleFactor("S4").recsFactor;
            }, Writables.tableOf(Writables.ints(), Writables.vectors()));

    coOccurenceVector = profiler.profile("S3-S4", pipeline, coOccurenceVector, ProfileConverter.int_vector(),
            Writables.tableOf(Writables.ints(), Writables.vectors()));

     * S5 Wrapping co-occurrence columns
    PTable<Integer, VectorOrPref> wrappedCooccurrence = coOccurenceVector
            .parallelDo(new MapFn<Pair<Integer, Vector>, Pair<Integer, VectorOrPref>>() {

                public Pair<Integer, VectorOrPref> map(Pair<Integer, Vector> input) {
                    return Pair.of(input.first(), new VectorOrPref(input.second()));

                public float scaleFactor() {
                    return est.getScaleFactor("S5").sizeFactor;

                public float scaleFactorByRecord() {
                    return est.getScaleFactor("S5").recsFactor;

            }, Writables.tableOf(Writables.ints(), VectorOrPref.vectorOrPrefs()));

    wrappedCooccurrence = profiler.profile("S5", pipeline, wrappedCooccurrence, ProfileConverter.int_vopv(),
            Writables.tableOf(Writables.ints(), VectorOrPref.vectorOrPrefs()));

     * S6 Splitting user vectors
    PTable<Integer, VectorOrPref> userVectorSplit = filteredUserVector
            .parallelDo(new DoFn<Pair<Long, Vector>, Pair<Integer, VectorOrPref>>() {

                public void process(Pair<Long, Vector> input, Emitter<Pair<Integer, VectorOrPref>> emitter) {
                    long userID = input.first();
                    Vector userVector = input.second();
                    Iterator<Vector.Element> it = userVector.iterateNonZero();
                    while (it.hasNext()) {
                        Vector.Element e = it.next();
                        int itemIndex = e.index();
                        float preferenceValue = (float) e.get();
                        emitter.emit(Pair.of(itemIndex, new VectorOrPref(userID, preferenceValue)));

                public float scaleFactor() {
                    return est.getScaleFactor("S6").sizeFactor;

                public float scaleFactorByRecord() {
                    return est.getScaleFactor("S6").recsFactor;
            }, Writables.tableOf(Writables.ints(), VectorOrPref.vectorOrPrefs()));

    userVectorSplit = profiler.profile("S6", pipeline, userVectorSplit, ProfileConverter.int_vopp(),
            Writables.tableOf(Writables.ints(), VectorOrPref.vectorOrPrefs()));

     * S7 Combine VectorOrPrefs
    PTable<Integer, VectorAndPrefs> combinedVectorOrPref = wrappedCooccurrence.union(userVectorSplit)
            .parallelDo(new DoFn<Pair<Integer, Iterable<VectorOrPref>>, Pair<Integer, VectorAndPrefs>>() {

                public void process(Pair<Integer, Iterable<VectorOrPref>> input,
                        Emitter<Pair<Integer, VectorAndPrefs>> emitter) {
                    Vector vector = null;
                    List<Long> userIDs = Lists.newArrayList();
                    List<Float> values = Lists.newArrayList();
                    for (VectorOrPref vop : input.second()) {
                        if (vector == null) {
                            vector = vop.getVector();
                        long userID = vop.getUserID();
                        if (userID != Long.MIN_VALUE) {
                        float value = vop.getValue();
                        if (!Float.isNaN(value)) {
                    emitter.emit(Pair.of(input.first(), new VectorAndPrefs(vector, userIDs, values)));

                public float scaleFactor() {
                    return est.getScaleFactor("S7").sizeFactor;

                public float scaleFactorByRecord() {
                    return est.getScaleFactor("S7").recsFactor;
            }, Writables.tableOf(Writables.ints(), VectorAndPrefs.vectorAndPrefs()));

    combinedVectorOrPref = profiler.profile("S5+S6-S7", pipeline, combinedVectorOrPref,
            ProfileConverter.int_vap(), Writables.tableOf(Writables.ints(), VectorAndPrefs.vectorAndPrefs()));
     * S8 Computing partial recommendation vectors
    PTable<Long, Vector> partialMultiply = combinedVectorOrPref
            .parallelDo(new DoFn<Pair<Integer, VectorAndPrefs>, Pair<Long, Vector>>() {
                public void process(Pair<Integer, VectorAndPrefs> input, Emitter<Pair<Long, Vector>> emitter) {
                    Vector cooccurrenceColumn = input.second().getVector();
                    List<Long> userIDs = input.second().getUserIDs();
                    List<Float> prefValues = input.second().getValues();
                    for (int i = 0; i < userIDs.size(); i++) {
                        long userID = userIDs.get(i);
                        if (userID != Long.MIN_VALUE) {
                            float prefValue = prefValues.get(i);
                            Vector partialProduct = cooccurrenceColumn.times(prefValue);
                            emitter.emit(Pair.of(userID, partialProduct));

                public float scaleFactor() {
                    return est.getScaleFactor("S8").sizeFactor;

                public float scaleFactorByRecord() {
                    return est.getScaleFactor("S8").recsFactor;

            }, Writables.tableOf(Writables.longs(), Writables.vectors())).groupByKey(est.getClusterSize())
            .combineValues(new CombineFn<Long, Vector>() {

                public void process(Pair<Long, Iterable<Vector>> input, Emitter<Pair<Long, Vector>> emitter) {
                    Vector partial = null;
                    for (Vector vector : input.second()) {
                        partial = partial == null ? vector : partial.plus(vector);
                    emitter.emit(Pair.of(input.first(), partial));

                public float scaleFactor() {
                    return est.getScaleFactor("combine").sizeFactor;

                public float scaleFactorByRecord() {
                    return est.getScaleFactor("combine").recsFactor;

    partialMultiply = profiler.profile("S8-combine", pipeline, partialMultiply, ProfileConverter.long_vector(),
            Writables.tableOf(Writables.longs(), Writables.vectors()));

     * S9 Producing recommendations from vectors
    PTable<Long, RecommendedItems> recommendedItems = partialMultiply
            .parallelDo(new DoFn<Pair<Long, Vector>, Pair<Long, RecommendedItems>>() {

                public void process(Pair<Long, Vector> input, Emitter<Pair<Long, RecommendedItems>> emitter) {
                    Queue<RecommendedItem> topItems = new PriorityQueue<RecommendedItem>(11,
                    Iterator<Vector.Element> recommendationVectorIterator = input.second().iterateNonZero();
                    while (recommendationVectorIterator.hasNext()) {
                        Vector.Element element = recommendationVectorIterator.next();
                        int index = element.index();
                        float value = (float) element.get();
                        if (topItems.size() < top) {
                            topItems.add(new GenericRecommendedItem(index, value));
                        } else if (value > topItems.peek().getValue()) {
                            topItems.add(new GenericRecommendedItem(index, value));
                    List<RecommendedItem> recommendations = new ArrayList<RecommendedItem>(topItems.size());
                    Collections.sort(recommendations, BY_PREFERENCE_VALUE);
                    emitter.emit(Pair.of(input.first(), new RecommendedItems(recommendations)));

                public float scaleFactor() {
                    return est.getScaleFactor("S9").sizeFactor;

                public float scaleFactorByRecord() {
                    return est.getScaleFactor("S9").recsFactor;

            }, Writables.tableOf(Writables.longs(), RecommendedItems.recommendedItems()));

    recommendedItems = profiler.profile("S9", pipeline, recommendedItems, ProfileConverter.long_ri(),
            Writables.tableOf(Writables.longs(), RecommendedItems.recommendedItems()));

     * Profiling
    if (profiler.isProfiling()) {
        return 0;
     * asText
    pipeline.writeTextFile(recommendedItems, args[1]);
    PipelineResult result = pipeline.done();
    return result.succeeded() ? 0 : 1;

From source file:com.cloudera.castagna.crunch.AverageBytesByIP.java

License:Apache License

public int run(String[] args) throws Exception {

    if (args.length != 2) {
        System.err.println();/* ww w .j  ava 2s.c  o m*/
        System.err.println("Two and only two arguments are accepted.");
        System.err.println("Usage: " + this.getClass().getName() + " [generic options] input output");
        return 1;

    Pipeline pipeline = new MRPipeline(AverageBytesByIP.class, getConf());
    PCollection<String> lines = pipeline.readTextFile(args[0]);
    Aggregator<Pair<Long, Long>> agg = pairAggregator(SUM_LONGS(), SUM_LONGS());

    PTable<String, Pair<Long, Long>> remoteAddrResponseSize = lines
                            Writables.pairs(Writables.longs(), Writables.longs())))

    PTable<String, Double> avgs = remoteAddrResponseSize.parallelDo(calulateAverage,
            Writables.tableOf(Writables.strings(), Writables.doubles()));
    pipeline.writeTextFile(avgs.top(100), args[1]);
    PipelineResult result = pipeline.done();

    return result.succeeded() ? 0 : 1;

From source file:com.cloudera.castagna.crunch.PageViews.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println();//from  w  w  w . j av a  2 s .c o m
        System.err.println("Two and only two arguments are accepted.");
        System.err.println("Usage: " + this.getClass().getName() + " [generic options] input output");
        return 1;

    Pipeline pipeline = new MRPipeline(PageViews.class, getConf());
    PCollection<String> lines = pipeline.readTextFile(args[0]);
    CombineFn<String, Long> longSumCombiner = CombineFn.SUM_LONGS();

    PTable<String, Long> pageViews = lines
            .parallelDo(extractIPResponseSize, Writables.tableOf(Writables.strings(), Writables.longs()))

    pipeline.writeTextFile(pageViews, args[1]);
    PipelineResult result = pipeline.done();

    return result.succeeded() ? 0 : 1;

From source file:com.cloudera.castagna.crunch.TotalBytesByIP.java

License:Apache License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println();// ww  w. j av  a  2  s. com
        System.err.println("Two and only two arguments are accepted.");
        System.err.println("Usage: " + this.getClass().getName() + " [generic options] input output");
        return 1;

    Pipeline pipeline = new MRPipeline(TotalBytesByIP.class, getConf());
    PCollection<String> lines = pipeline.readTextFile(args[0]);
    CombineFn<String, Long> longSumCombiner = CombineFn.SUM_LONGS();

    PTable<String, Long> ipAddrResponseSize = lines
            .parallelDo(extractIPResponseSize, Writables.tableOf(Writables.strings(), Writables.longs()))

    pipeline.writeTextFile(ipAddrResponseSize, args[1]);
    PipelineResult result = pipeline.done();

    return result.succeeded() ? 0 : 1;

From source file:com.cloudera.crunch.examples.AverageBytesByIP.java

License:Open Source License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println();//from  w w w. j  av  a 2  s  .  co  m
        System.err.println("Two and only two arguments are accepted.");
        System.err.println("Usage: " + this.getClass().getName() + " [generic options] input output");
        return 1;
    // Create an object to coordinate pipeline creation and execution.
    Pipeline pipeline = new MRPipeline(AverageBytesByIP.class, getConf());
    // Reference a given text file as a collection of Strings.
    PCollection<String> lines = pipeline.readTextFile(args[0]);

    // Combiner used for summing up response size and count
    CombineFn<String, Pair<Long, Long>> stringPairOfLongsSumCombiner = CombineFn
            .pairAggregator(CombineFn.SUM_LONGS, CombineFn.SUM_LONGS);

    // Table of (ip, sum(response size), count)
    PTable<String, Pair<Long, Long>> remoteAddrResponseSize = lines
                            Writables.pairs(Writables.longs(), Writables.longs())))

    // Calculate average response size by ip address
    PTable<String, Double> avgs = remoteAddrResponseSize.parallelDo(calulateAverage,
            Writables.tableOf(Writables.strings(), Writables.doubles()));

    // write the result to a text file
    pipeline.writeTextFile(avgs, args[1]);
    // Execute the pipeline as a MapReduce.
    return 0;

From source file:com.cloudera.crunch.examples.TotalBytesByIP.java

License:Open Source License

public int run(String[] args) throws Exception {
    if (args.length != 2) {
        System.err.println();/*from   w w w  .j  a v a  2 s  .  c om*/
        System.err.println("Two and only two arguments are accepted.");
        System.err.println("Usage: " + this.getClass().getName() + " [generic options] input output");
        return 1;
    // Create an object to coordinate pipeline creation and execution.
    Pipeline pipeline = new MRPipeline(TotalBytesByIP.class, getConf());
    // Reference a given text file as a collection of Strings.
    PCollection<String> lines = pipeline.readTextFile(args[0]);

    // Combiner used for summing up response size
    CombineFn<String, Long> longSumCombiner = CombineFn.SUM_LONGS();

    // Table of (ip, sum(response size))
    PTable<String, Long> ipAddrResponseSize = lines
            .parallelDo(extractIPResponseSize, Writables.tableOf(Writables.strings(), Writables.longs()))

    pipeline.writeTextFile(ipAddrResponseSize, args[1]);
    // Execute the pipeline as a MapReduce.
    return 0;

From source file:com.cloudera.fts.App.java

License:Open Source License

private void printUsage() {
    System.err.println("Basic Usage: [avro,proto,text2pb,count] <inputdir> <outputdir>");
    System.exit(1);/*from  w w w .j  a  v  a2  s .c om*/

From source file:com.cloudera.fts.App.java

License:Open Source License

private void printAvroUsage() {
    System.err.println(/*from  w w  w  .j  av a2 s .c  o  m*/
            "Avro requires one extra argument, the event file name: avro <inputdir> <events_file> <outputdir>");

From source file:com.cloudera.hadoop.hdfs.nfs.nfs4.NFS4Server.java

License:Apache License

public int run(String[] args) throws Exception {
    int port;//from  w  ww  .ja  v  a  2 s  .c  o  m
    try {
        port = Integer.parseInt(args[0]);
    } catch (Exception e) {
        System.err.println(this.getClass().getName() + " port");
        return 1;
    start(null, port);
    while (mRPCServer.isAlive()) {
    return 0;