Example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce.lib.output FileOutputFormat setOutputPath.

Prototype

public static void setOutputPath(Job job, Path outputDir)

Source Link

Document

Set the Path of the output directory for the map-reduce job.

Usage

From source file:com.cloudera.sa.giraph.examples.kmeans.Job.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length != 4) {
        System.out.println("KMeans Help:");
        System.out.println("Parameters: <numbersOfWorkers> <inputLocation> <outputLocation> <k>");
        System.out.println("Example: 1 inputFolder outputFolder 3");
        return;//from ww  w  .  j av  a  2s.  c om
    }

    String numberOfWorkers = args[0];
    String inputLocation = args[1];
    String outputLocation = args[2];
    int k = Integer.parseInt(args[3]);

    GiraphJob bspJob = new GiraphJob(new Configuration(), Job.class.getName());

    bspJob.getConfiguration().setInt(Constants.K, k);

    bspJob.getConfiguration().setVertexClass(KMeansVertex.class);
    bspJob.getConfiguration().setMasterComputeClass(MasterCompute.class);
    bspJob.getConfiguration().setVertexInputFormatClass(InputFormat.class);
    GiraphFileInputFormat.addVertexInputPath(bspJob.getConfiguration(), new Path(inputLocation));

    bspJob.getConfiguration().setVertexOutputFormatClass(OutputFormat.class);

    int minWorkers = Integer.parseInt(numberOfWorkers);
    int maxWorkers = Integer.parseInt(numberOfWorkers);
    bspJob.getConfiguration().setWorkerConfiguration(minWorkers, maxWorkers, 100.0f);

    FileOutputFormat.setOutputPath(bspJob.getInternalJob(), new Path(outputLocation));
    boolean verbose = true;

    if (bspJob.run(verbose)) {
        System.out.println("Ended well");
    } else {
        System.out.println("Ended with Failure");
    }

}

From source file:com.cloudera.sa.giraph.examples.ktrusses.Job.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length != 4) {
        System.out.println("KTrusses Help:");
        System.out.println("Parameters: <numbersOfWorkers> <inputLocation> <outputLocation> <k>");
        System.out.println("Example: 1 inputFolder outputFolder 4");
        return;//from w  w  w . j  a  v  a  2s.  c o  m
    }

    String numberOfWorkers = args[0];
    String inputLocation = args[1];
    String outputLocation = args[2];
    int k = Integer.parseInt(args[3]);

    GiraphJob bspJob = new GiraphJob(new Configuration(), Job.class.getName());

    bspJob.getConfiguration().setVertexClass(KTrussVertex.class);
    bspJob.getConfiguration().setVertexInputFormatClass(InputFormat.class);
    GiraphFileInputFormat.addVertexInputPath(bspJob.getConfiguration(), new Path(inputLocation));
    bspJob.getConfiguration().setMasterComputeClass(MasterCompute.class);
    bspJob.getConfiguration().setVertexOutputFormatClass(OutputFormat.class);

    int minWorkers = Integer.parseInt(numberOfWorkers);
    int maxWorkers = Integer.parseInt(numberOfWorkers);
    bspJob.getConfiguration().setWorkerConfiguration(minWorkers, maxWorkers, 100.0f);

    bspJob.getConfiguration().setInt(Constants.K, k);

    FileOutputFormat.setOutputPath(bspJob.getInternalJob(), new Path(outputLocation));
    boolean verbose = true;

    if (bspJob.run(verbose)) {
        System.out.println("Ended well");
    } else {
        System.out.println("Ended with Failure");
    }

}

From source file:com.cloudera.sa.giraph.examples.triangles.Job.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length != 3) {
        System.out.println("Componentisation Help:");
        System.out.println("Parameters: Componentisation <numbersOfWorkers> <inputLocaiton> <outputLocation>");
        System.out.println("Example: Componentisation 1 inputFolder outputFolder");
        return;/*  w ww . ja v  a  2  s  .  c  o  m*/
    }

    String numberOfWorkers = args[0];
    String inputLocation = args[1];
    String outputLocation = args[2];

    GiraphJob bspJob = new GiraphJob(new Configuration(), Job.class.getName());

    bspJob.getConfiguration().setVertexClass(TrianglesVertex.class);
    bspJob.getConfiguration().setVertexInputFormatClass(InputFormat.class);
    GiraphFileInputFormat.addVertexInputPath(bspJob.getConfiguration(), new Path(inputLocation));

    bspJob.getConfiguration().setVertexOutputFormatClass(OutputFormat.class);

    int minWorkers = Integer.parseInt(numberOfWorkers);
    int maxWorkers = Integer.parseInt(numberOfWorkers);
    bspJob.getConfiguration().setWorkerConfiguration(minWorkers, maxWorkers, 100.0f);

    FileOutputFormat.setOutputPath(bspJob.getInternalJob(), new Path(outputLocation));
    boolean verbose = true;

    if (bspJob.run(verbose)) {
        System.out.println("Ended well");
    } else {
        System.out.println("Ended with Failure");
    }

}

From source file:com.cloudera.sa.giraph.examples.wordcount.Job.java

License:Apache License

public static void main(String[] args) throws Exception {

    if (args.length != 3) {
        System.out.println("Componentisation Help:");
        System.out.println("Parameters: <numbersOfWorkers> <inputLocation> <outputLocation>");
        System.out.println("Example: 1 inputFolder outputFolder");
        return;/*from   ww  w  .j  a  v a  2 s.  c o  m*/
    }

    String numberOfWorkers = args[0];
    String inputLocation = args[1];
    String outputLocation = args[2];

    GiraphJob bspJob = new GiraphJob(new Configuration(), Job.class.getName());

    bspJob.getConfiguration().setVertexClass(WordCountVertex.class);
    bspJob.getConfiguration().setEdgeInputFormatClass(InputFormat.class);
    GiraphFileInputFormat.addEdgeInputPath(bspJob.getConfiguration(), new Path(inputLocation));

    bspJob.getConfiguration().setVertexOutputFormatClass(OutputFormat.class);

    int minWorkers = Integer.parseInt(numberOfWorkers);
    int maxWorkers = Integer.parseInt(numberOfWorkers);
    bspJob.getConfiguration().setWorkerConfiguration(minWorkers, maxWorkers, 100.0f);

    FileOutputFormat.setOutputPath(bspJob.getInternalJob(), new Path(outputLocation));
    boolean verbose = true;

    if (bspJob.run(verbose)) {
        System.out.println("Ended well");
    } else {
        System.out.println("Ended with Failure");
    }

}

From source file:com.cloudera.sa.hbasebulkload.HBASEBulkLoadDriver.java

@Override
public int run(String[] args) throws Exception {
    Configuration config = getConf();
    args = new GenericOptionsParser(config, args).getRemainingArgs();

    if (args.length < 6) {
        /*System.out.println("hadoop jar HBASEBulkLoad.jar "
         + "com.cloudera.sa.hbasebulkload.HBASEBulkLoadDriver"
         + " <inputpath> <outputpath> <hbaseTable> <hbaseColumnFamily"
         + " \"<hbaseColumns (delimiter seperated)>\" <column delimiter>");*/
        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }//  w  w  w  .j a v a 2 s.com

    String hbaseTab = args[2];
    String hbaseColumnFamily = args[3];
    String hbaseColumns = args[4];
    String hbaseColumnSeperator = args[5];
    config.set(HBASEBulkLoadConstants.HBASE_TABLE_KEY, hbaseTab.trim().toLowerCase(Locale.ENGLISH));
    config.set(HBASEBulkLoadConstants.HBASE_COLUMN_FAMILY_KEY, hbaseColumnFamily);
    config.set(HBASEBulkLoadConstants.HBASE_COLUMNS_KEY, hbaseColumns.trim().toLowerCase(Locale.ENGLISH));
    config.set(HBASEBulkLoadConstants.HBASE_COLUMN_SEPERATOR_KEY, hbaseColumnSeperator);
    System.out.println(2);
    Job job = Job.getInstance(config, this.getClass().getName() + "-" + hbaseTab);
    HBaseConfiguration.addHbaseResources(config);

    job.setInputFormatClass(TextInputFormat.class);
    job.setJarByClass(HBASEBulkLoadDriver.class);
    job.setMapperClass(HBASEBulkLoadKeyValueMapper.class);
    job.setMapOutputKeyClass(ImmutableBytesWritable.class);
    job.setMapOutputValueClass(Put.class);
    job.setCombinerClass(PutCombiner.class);
    job.setReducerClass(PutSortReducer.class);

    Connection connection = ConnectionFactory.createConnection(config);
    Table hTab = connection.getTable(TableName.valueOf(hbaseTab));

    FileSystem.get(getConf()).delete(new Path(args[1]), true);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));

    //job.setOutputFormatClass(HFileOutputFormat2.class);
    TableMapReduceUtil.initTableReducerJob(hTab.getName().getNameAsString(), null, job);
    //job.setNumReduceTasks(0);
    TableMapReduceUtil.addDependencyJars(job);
    HFileOutputFormat2.configureIncrementalLoadMap(job, hTab);

    int exitCode = job.waitForCompletion(true) ? HBASEBulkLoadConstants.SUCCESS
            : HBASEBulkLoadConstants.FAILURE;
    System.out.println(8);
    if (HBASEBulkLoadConstants.SUCCESS == exitCode) {
        LoadIncrementalHFiles loader = new LoadIncrementalHFiles(config);
        loader.doBulkLoad(new Path(args[1]), (HTable) hTab);
        connection.close();
    }
    return exitCode;
}

From source file:com.cloudera.sa.securewordcount.SecureWordCountDriver.java

@Override
public int run(String[] args) throws Exception {
    Configuration config = getConf();
    args = new GenericOptionsParser(config, args).getRemainingArgs();

    if (args.length < 2) {

        ToolRunner.printGenericCommandUsage(System.out);
        return 2;
    }//from ww w  .  j a v a 2s .  c  o  m
    Job job = Job.getInstance(config, this.getClass().getName() + "-wordcount");
    job.setJarByClass(SecureWordCountDriver.class);
    job.setInputFormatClass(TextInputFormat.class);
    job.setMapperClass(TokenizerMapper.class);
    job.setCombinerClass(IntSumReducer.class);
    job.setReducerClass(IntSumReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(IntWritable.class);
    FileInputFormat.addInputPath(job, new Path(args[0]));
    FileOutputFormat.setOutputPath(job, new Path(args[1]));
    return job.waitForCompletion(true) ? 0 : 1;

}

From source file:com.cloudera.science.matching.graph.BipartiteMatchingRunner.java

License:Open Source License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 3) {
        System.err.println("Usage: <input> <output> <numworkers>");
        System.err.println("The input should be the output of the InputPreparer Crunch pipeline.");
        System.err.println("The output is the directory where the output of the matching will be");
        System.err.println("written, and the numworkers should be <= the number of map slots available");
        System.err.println("on your Hadoop cluster.");
        return 1;
    }//from  w w w  . j ava2s  . co m

    GiraphJob job = new GiraphJob(getConf(), getClass().getName());
    GiraphConfiguration conf = job.getConfiguration();
    conf.setVertexClass(BipartiteMatchingVertex.class);
    conf.setVertexInputFormatClass(BipartiteMatchingVertexInputFormat.class);
    conf.setVertexOutputFormatClass(BipartiteMatchingVertexOutputFormat.class);
    conf.setComputationClass(BipartiteMatching.class);
    conf.setOutEdgesClass(ArrayListEdges.class);
    GiraphFileInputFormat.addVertexInputPath(job.getConfiguration(), new Path(args[0]));
    FileOutputFormat.setOutputPath(job.getInternalJob(), new Path(args[1]));

    int numWorkers = Integer.parseInt(args[2]);
    job.getConfiguration().setWorkerConfiguration(numWorkers, numWorkers, 100.0f);

    return job.run(true) ? 0 : -1;
}

From source file:com.cloudera.sqoop.mapreduce.db.TestDataDrivenDBInputFormat.java

License:Apache License

public void testDateSplits() throws Exception {
    Statement s = connection.createStatement();
    final String DATE_TABLE = "datetable";
    final String COL = "foo";
    try {//from w w w .  j  ava  2s. c o m
        try {
            // delete the table if it already exists.
            s.executeUpdate("DROP TABLE " + DATE_TABLE);
        } catch (SQLException e) {
            // Ignored; proceed regardless of whether we deleted the table;
            // it may have simply not existed.
        }

        // Create the table.
        s.executeUpdate("CREATE TABLE " + DATE_TABLE + "(" + COL + " TIMESTAMP)");
        s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-01')");
        s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-04-02')");
        s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2010-05-01')");
        s.executeUpdate("INSERT INTO " + DATE_TABLE + " VALUES('2011-04-01')");

        // commit this tx.
        connection.commit();

        Configuration conf = new Configuration();
        conf.set("fs.defaultFS", "file:///");
        FileSystem fs = FileSystem.getLocal(conf);
        fs.delete(new Path(OUT_DIR), true);

        // now do a dd import
        Job job = new Job(conf);
        job.setMapperClass(ValMapper.class);
        job.setReducerClass(Reducer.class);
        job.setMapOutputKeyClass(DateCol.class);
        job.setMapOutputValueClass(NullWritable.class);
        job.setOutputKeyClass(DateCol.class);
        job.setOutputValueClass(NullWritable.class);
        job.setNumReduceTasks(1);
        job.getConfiguration().setInt("mapreduce.map.tasks", 2);
        FileOutputFormat.setOutputPath(job, new Path(OUT_DIR));
        DBConfiguration.configureDB(job.getConfiguration(), DRIVER_CLASS, DB_URL, (String) null, (String) null);
        DataDrivenDBInputFormat.setInput(job, DateCol.class, DATE_TABLE, null, COL, COL);

        boolean ret = job.waitForCompletion(true);
        assertTrue("job failed", ret);

        // Check to see that we imported as much as we thought we did.
        assertEquals("Did not get all the records", 4, job.getCounters()
                .findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue());
    } finally {
        s.close();
    }
}

From source file:com.cloudera.sqoop.mapreduce.MergeJob.java

License:Apache License

public boolean runMergeJob() throws IOException {
    Configuration conf = options.getConf();
    Job job = new Job(conf);

    String userClassName = options.getClassName();
    if (null == userClassName) {
        // Shouldn't get here.
        throw new IOException("Record class name not specified with " + "--class-name.");
    }/*from w  w w .  j a  v  a  2  s  . c o  m*/

    // Set the external jar to use for the job.
    String existingJar = options.getExistingJarName();
    if (existingJar != null) {
        // User explicitly identified a jar path.
        LOG.debug("Setting job jar to user-specified jar: " + existingJar);
        job.getConfiguration().set("mapred.jar", existingJar);
    } else {
        // Infer it from the location of the specified class, if it's on the
        // classpath.
        try {
            Class<? extends Object> userClass = conf.getClassByName(userClassName);
            if (null != userClass) {
                String userJar = Jars.getJarPathForClass(userClass);
                LOG.debug("Setting job jar based on user class " + userClassName + ": " + userJar);
                job.getConfiguration().set("mapred.jar", userJar);
            } else {
                LOG.warn("Specified class " + userClassName + " is not in a jar. "
                        + "MapReduce may not find the class");
            }
        } catch (ClassNotFoundException cnfe) {
            throw new IOException(cnfe);
        }
    }

    try {
        Path oldPath = new Path(options.getMergeOldPath());
        Path newPath = new Path(options.getMergeNewPath());

        Configuration jobConf = job.getConfiguration();
        FileSystem fs = FileSystem.get(jobConf);
        oldPath = oldPath.makeQualified(fs);
        newPath = newPath.makeQualified(fs);

        FileInputFormat.addInputPath(job, oldPath);
        FileInputFormat.addInputPath(job, newPath);

        jobConf.set(MERGE_OLD_PATH_KEY, oldPath.toString());
        jobConf.set(MERGE_NEW_PATH_KEY, newPath.toString());
        jobConf.set(MERGE_KEY_COL_KEY, options.getMergeKeyCol());
        jobConf.set(MERGE_SQOOP_RECORD_KEY, userClassName);

        FileOutputFormat.setOutputPath(job, new Path(options.getTargetDir()));

        if (ExportJobBase.isSequenceFiles(jobConf, newPath)) {
            job.setInputFormatClass(SequenceFileInputFormat.class);
            job.setOutputFormatClass(SequenceFileOutputFormat.class);
            job.setMapperClass(MergeRecordMapper.class);
        } else {
            job.setMapperClass(MergeTextMapper.class);
            job.setOutputFormatClass(RawKeyTextOutputFormat.class);
        }

        jobConf.set("mapred.output.key.class", userClassName);
        job.setOutputValueClass(NullWritable.class);

        job.setReducerClass(MergeReducer.class);

        // Set the intermediate data types.
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(MergeRecord.class);

        // Make sure Sqoop and anything else we need is on the classpath.
        cacheJars(job, null);
        return this.runJob(job);
    } catch (InterruptedException ie) {
        throw new IOException(ie);
    } catch (ClassNotFoundException cnfe) {
        throw new IOException(cnfe);
    }
}

From source file:com.cloudera.traffic.AveragerRunner.java

License:Apache License

public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    Job job = new Job(conf);
    job.setJarByClass(AveragerRunner.class);
    job.setMapperClass(AveragerMapper.class);
    job.setReducerClass(AveragerReducer.class);
    job.setCombinerClass(AveragerReducer.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(AverageWritable.class);
    job.setInputFormatClass(TextInputFormat.class);

    FileInputFormat.addInputPath(job, new Path(otherArgs[0]));
    FileOutputFormat.setOutputPath(job, new Path(otherArgs[1]));

    job.waitForCompletion(true);//from   www .j  a  v a 2s. c  o  m
}