Example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job setOutputFormatClass.

Prototype

public void setOutputFormatClass(Class<? extends OutputFormat> cls) throws IllegalStateException

Source Link

Document

Set the OutputFormat for the job.

Usage

From source file:com.soteradefense.dga.louvain.mapreduce.CommunityCompression.java

License:Apache License

public int run(String[] args) throws Exception {
    Configuration mrConf = this.getConf();
    for (java.util.Map.Entry<String, String> entry : dgaConfiguration.getSystemProperties().entrySet()) {
        mrConf.set(entry.getKey(), entry.getValue());
    }/*from w  w  w  .  ja  va 2s .  c om*/

    Job job = Job.getInstance(mrConf);
    job.setJarByClass(CommunityCompression.class);
    Path in = new Path(inputPath);
    Path out = new Path(outputPath);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);
    job.setJobName("CommunityCompression");

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LouvainVertexWritable.class);

    job.setMapperClass(CommunityCompression.Map.class);
    job.setReducerClass(CommunityCompression.Reduce.class);

    logger.debug("Running Mapreduce step with job configuration: {}", job);

    return job.waitForCompletion(false) ? 0 : 1;
}

From source file:com.soteradefense.dga.louvain.mapreduce.LouvainTableSynthesizer.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Job job = null;
    try {//from ww w .  j a va  2 s .  co  m
        int iteration = 0;
        if (!basePath.endsWith("/"))
            basePath = basePath + "/";
        String inputPath = basePath + GIRAPH_FOLDER_BASE_NAME + FILE_NAME_SEPARATOR + iteration;
        String joinPath = basePath + GIRAPH_FOLDER_BASE_NAME + FILE_NAME_SEPARATOR + (iteration + 1);
        String outputPath = basePath + TABLE_BASE_NAME + FILE_NAME_SEPARATOR + iteration;
        Configuration mrConf = this.getConf();
        job = Job.getInstance(mrConf);

        for (Map.Entry<String, String> entry : dgaConfiguration.getSystemProperties().entrySet()) {
            mrConf.set(entry.getKey(), entry.getValue());
        }

        FileSystem fs = FileSystem.get(job.getConfiguration());
        boolean nextFileExists = fs.exists(new Path(joinPath));
        while (nextFileExists) {
            System.out.println("Processing " + inputPath + " and " + joinPath);
            job = Job.getInstance(mrConf);
            job.setJobName("Louvain Table Synthesizer " + iteration);

            job.setJarByClass(LouvainTableSynthesizer.class);

            job.setMapperClass(LouvainTableSynthesizerMapper.class);
            job.setReducerClass(LouvainTableSynthesizerReducer.class);

            job.setInputFormatClass(TextInputFormat.class);
            job.setOutputFormatClass(TextOutputFormat.class);

            job.setMapOutputKeyClass(Text.class);
            job.setMapOutputValueClass(Text.class);

            //Reducer Output
            job.setOutputKeyClass(Text.class);
            job.setOutputValueClass(NullWritable.class);

            //Add both input folders
            Path in = new Path(inputPath);
            Path joinIn = new Path(joinPath);
            Path out = new Path(outputPath);
            FileInputFormat.addInputPath(job, in);
            FileInputFormat.addInputPath(job, joinIn);
            FileOutputFormat.setOutputPath(job, out);

            job.waitForCompletion(true);
            //Set the new temp input path
            inputPath = outputPath;
            iteration++;
            outputPath = basePath + TABLE_BASE_NAME + FILE_NAME_SEPARATOR + iteration;
            joinPath = basePath + GIRAPH_FOLDER_BASE_NAME + FILE_NAME_SEPARATOR + (iteration + 1);
            nextFileExists = fs.exists(new Path(joinPath));
        }

    } catch (IOException e) {
        e.printStackTrace();
        return -1;
    } catch (InterruptedException e) {
        e.printStackTrace();
        return -1;
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
        return -1;
    }
    return 0;
}

From source file:com.soteradefense.dga.LouvainRunner.java

License:Apache License

private int runMapreduceJob(String inputPath, String outputPath, DGAConfiguration conf) throws Exception {
    Configuration mrConf = new Configuration();
    for (Map.Entry<String, String> entry : conf.getSystemProperties().entrySet()) {
        mrConf.set(entry.getKey(), entry.getValue());
    }/*w  w w  .ja v a 2s. co  m*/

    Job job = Job.getInstance(configuration);
    job.setJarByClass(LouvainRunner.class);
    Path in = new Path(inputPath);
    Path out = new Path(outputPath);

    FileInputFormat.setInputPaths(job, in);
    FileOutputFormat.setOutputPath(job, out);
    job.setJobName("CommunityCompression");

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(LouvainVertexWritable.class);

    job.setMapperClass(CommunityCompression.Map.class);
    job.setReducerClass(CommunityCompression.Reduce.class);

    logger.debug("Running Mapreduce step with job configuration: {}", job);

    return job.waitForCompletion(false) ? 0 : 1;
}

From source file:com.splicemachine.derby.stream.spark.SparkExportDataSetWriter.java

License:Apache License

@Override
public DataSet<LocatedRow> write() throws StandardException {
    Configuration conf = new Configuration(HConfiguration.unwrapDelegate());
    ByteDataOutput bdo = new ByteDataOutput();
    Job job;
    String encoded;//from  ww  w.j a v a 2  s .com

    try {
        bdo.writeObject(exportFunction);
        encoded = Base64.encodeBase64String(bdo.toByteArray());
        conf.set("exportFunction", encoded);
        job = Job.getInstance(conf);
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    job.setOutputKeyClass(Void.class);
    job.setOutputValueClass(LocatedRow.class);
    job.setOutputFormatClass(SparkDataSet.EOutputFormat.class);
    job.getConfiguration().set("mapred.output.dir", directory);

    JavaRDD<V> cached = rdd.cache();
    long writtenRows = cached.count();
    rdd.keyBy(new NullFunction<V>()).setName(String.format("Export Directory: %s", directory))
            .saveAsNewAPIHadoopDataset(job.getConfiguration());
    cached.unpersist();

    ValueRow valueRow = new ValueRow(2);
    valueRow.setColumn(1, new SQLLongint(writtenRows));
    valueRow.setColumn(2, new SQLInteger(0));
    return new SparkDataSet<>(
            SpliceSpark.getContext().parallelize(Collections.singletonList(new LocatedRow(valueRow)), 1));
}

From source file:com.splicemachine.mrio.api.SpliceTableMapReduceUtil.java

License:Apache License

/**
 * Use this before submitting a TableReduce job. It will
 * appropriately set up the JobConf./*from ww w.  j  a v  a2 s.  c  o  m*/
 *
 * @param table  The output Splice table name, The format should be Schema.tableName.
 * @param reducer  The reducer class to use.
 * @param job  The current job to adjust.  Make sure the passed job is
 * carrying all necessary configuration.
 * @param partitioner  Partitioner to use. Pass <code>null</code> to use
 * default partitioner.
 * @param quorumAddress Distant cluster to write to; default is null for
 * output to the cluster that is designated in <code>hbase-site.xml</code>.
 * Set this String to the zookeeper ensemble of an alternate remote cluster
 * when you would have the reduce write a cluster that is other than the
 * default; e.g. copying tables between clusters, the source would be
 * designated by <code>hbase-site.xml</code> and this param would have the
 * ensemble address of the remote cluster.  The format to pass is particular.
 * Pass <code> &lt;hbase.zookeeper.quorum>:&lt;hbase.zookeeper.client.port>:&lt;zookeeper.znode.parent>
 * </code> such as <code>server,server2,server3:2181:/hbase</code>.
 * @param serverClass redefined hbase.regionserver.class
 * @param serverImpl redefined hbase.regionserver.client
 * @param addDependencyJars upload HBase jars and jars for any of the configured
 *           job classes via the distributed cache (tmpjars).
 * @throws IOException When determining the region count fails.
 * @throws SQLException
 */
public static void initTableReducerJob(String table, Class<? extends Reducer> reducer, Job job,
        Class partitioner, String quorumAddress, String serverClass, String serverImpl,
        boolean addDependencyJars, Class<? extends OutputFormat> outputformatClass) throws IOException {

    Configuration conf = job.getConfiguration();
    job.setOutputFormatClass(outputformatClass);
    if (reducer != null)
        job.setReducerClass(reducer);
    conf.set(MRConstants.SPLICE_OUTPUT_TABLE_NAME, table);
    if (sqlUtil == null)
        sqlUtil = SMSQLUtil.getInstance(conf.get(MRConstants.SPLICE_JDBC_STR));
    // If passed a quorum/ensemble address, pass it on to TableOutputFormat.
    String hbaseTableID = null;
    try {
        hbaseTableID = sqlUtil.getConglomID(table);
    } catch (SQLException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
        throw new IOException(e);
    }
    conf.set(MRConstants.HBASE_OUTPUT_TABLE_NAME, table);

    if (quorumAddress != null) {
        // Calling this will validate the format
        HBasePlatformUtils.validateClusterKey(quorumAddress);
        conf.set(TableOutputFormat.QUORUM_ADDRESS, quorumAddress);
    }
    if (serverClass != null && serverImpl != null) {
        conf.set(TableOutputFormat.REGION_SERVER_CLASS, serverClass);
        conf.set(TableOutputFormat.REGION_SERVER_IMPL, serverImpl);

    }
    job.setOutputKeyClass(ImmutableBytesWritable.class);
    job.setOutputValueClass(Object.class);
    if (partitioner == HRegionPartitioner.class) {
        job.setPartitionerClass(HRegionPartitioner.class);
        // TODO Where are the keys?
        int regions = getReduceNumberOfRegions(hbaseTableID);
        if (job.getNumReduceTasks() > regions) {
            job.setNumReduceTasks(regions);
        }
    } else if (partitioner != null) {
        job.setPartitionerClass(partitioner);
    }

    if (addDependencyJars) {
        addDependencyJars(job);
    }

    //initCredentials(job);
}

From source file:com.spotify.hdfs2cass.crunch.cql.CQLTarget.java

License:Open Source License

@Override
public void configureForMapReduce(final Job job, final PType<?> pType, final Path outputPath,
        final String name) {

    if (name == null) {
        throw new CrunchRuntimeException("'name' arguments should not be null. We don't know why tho");
    }//from ww  w. j av  a 2s  .c om

    FileOutputFormat.setOutputPath(job, outputPath);
    job.setOutputFormatClass(CrunchCqlBulkOutputFormat.class);

    JobConf conf = new JobConf();
    params.configure(conf);

    for (Map.Entry<String, String> e : extraConf.entrySet()) {
        conf.set(e.getKey(), e.getValue());
    }

    FormatBundle<CrunchCqlBulkOutputFormat> bundle = FormatBundle.forOutput(CrunchCqlBulkOutputFormat.class);
    for (Map.Entry<String, String> e : conf) {
        bundle.set(e.getKey(), e.getValue());
    }

    Configuration jobConfiguration = job.getConfiguration();

    // we don't know why exactly this is needed, but without this, the actual streaming will not
    // see the the throttling and buffer size arguments
    params.configure(jobConfiguration);

    CrunchConfigHelper.setOutputColumnFamily(jobConfiguration, params.getKeyspace(), params.getColumnFamily());
    CrunchCqlBulkOutputFormat.setColumnFamilySchema(jobConfiguration, params.getColumnFamily(),
            params.getSchema());
    CrunchCqlBulkOutputFormat.setColumnFamilyInsertStatement(jobConfiguration, params.getColumnFamily(),
            params.getStatement());

    String[] colNames = params.getColumnNames();
    for (int i = 0; i < colNames.length; i++) {
        CrunchCqlBulkOutputFormat.setColumnIndex(jobConfiguration, params.getColumnFamily(), colNames[i], i);
    }

    CrunchOutputs.addNamedOutput(job, name, bundle, ByteBuffer.class, List.class);
}

From source file:com.spotify.hdfs2cass.crunch.thrift.ThriftTarget.java

License:Open Source License

@Override
public void configureForMapReduce(final Job job, final PType<?> pType, final Path outputPath,
        final String name) {

    if (name == null) {
        throw new CrunchRuntimeException("'name' arguments should not be null. We don't know why tho");
    }//from w w w. ja  v a 2s  .  co m

    FileOutputFormat.setOutputPath(job, outputPath);
    job.setOutputFormatClass(CrunchBulkOutputFormat.class);

    JobConf conf = new JobConf();
    params.configure(conf);

    for (Map.Entry<String, String> e : extraConf.entrySet()) {
        conf.set(e.getKey(), e.getValue());
    }

    FormatBundle<CrunchBulkOutputFormat> bundle = FormatBundle.forOutput(CrunchBulkOutputFormat.class);
    for (Map.Entry<String, String> e : conf) {
        bundle.set(e.getKey(), e.getValue());
    }

    Configuration jobConfiguration = job.getConfiguration();

    // we don't know why exactly this is needed, but without this, the actual streaming will not
    // see the the throttling and buffer size arguments
    params.configure(jobConfiguration);

    CrunchConfigHelper.setOutputColumnFamily(jobConfiguration, params.getKeyspace(), params.getColumnFamily());

    CrunchOutputs.addNamedOutput(job, name, bundle, ByteBuffer.class, List.class);
}

From source file:com.springsource.insight.plugin.hadoop.WordCount.java

License:Open Source License

public int run(String[] args) throws Exception {
    String INPUT = "src/test/resources";
    String OUTPUT = "target/out";

    Configuration conf = new Configuration();
    File targetFolder = FileUtil.detectTargetFolder(getClass());
    if (targetFolder == null) {
        throw new IllegalStateException("Cannot detect target folder");
    }//from   ww w  .  j  a  v a  2  s .  com
    File tempFolder = new File(targetFolder, "temp");
    conf.set("hadoop.tmp.dir", tempFolder.getAbsolutePath());

    Job job = new Job(conf, "wordcount");
    job.setJarByClass(WordCount.class);

    job.setMapperClass(WordCountMapper.class);
    job.setCombinerClass(WordCountReducer.class);
    job.setReducerClass(WordCountReducer.class);

    job.setInputFormatClass(TextInputFormat.class);
    job.setOutputFormatClass(TextOutputFormat.class);

    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(LongWritable.class);

    FileUtils.deleteDirectory(new File(OUTPUT)); // delete old output data
    FileInputFormat.addInputPath(job, new Path(INPUT));
    FileOutputFormat.setOutputPath(job, new Path(OUTPUT));

    return job.waitForCompletion(true) ? 0 : -1;
}

From source file:com.streamsets.pipeline.stage.destination.mapreduce.jobtype.avroconvert.AvroConversionBaseCreator.java

License:Apache License

@Override
public Job call() throws Exception {
    // We're explicitly disabling speculative execution
    conf.set("mapreduce.map.speculative", "false");
    conf.set("mapreduce.map.maxattempts", "1");

    conf.set("mapreduce.job.user.classpath.first", "true");
    conf.set("mapreduce.task.classpath.user.precedence", "true");
    conf.set("mapreduce.task.classpath.first", "true");

    addNecessaryJarsToJob(conf);//from  w  w  w  .  jav a 2  s.  c o m

    Job job = Job.getInstance(conf);

    // IO formats
    job.setInputFormatClass(getInputFormatClass());
    job.setOutputFormatClass(NullOutputFormat.class);

    // Mapper & job output
    job.setMapperClass(getMapperClass());
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(NullWritable.class);

    // It's map only job
    job.setNumReduceTasks(0);

    // General configuration
    job.setJarByClass(getClass());

    return job;
}

From source file:com.stride.cartrek.core.hbase.RowKeyDistributorTestBase.java

License:Apache License

private void testMapReduceInternal(long origKeyPrefix, Scan scan, int numValues, int startWithValue,
        int seekIntervalMinValue, int seekIntervalMaxValue)
        throws IOException, InterruptedException, ClassNotFoundException {
    int valuesCountInSeekInterval = writeTestData(origKeyPrefix, numValues, startWithValue,
            seekIntervalMinValue, seekIntervalMaxValue);

    // Reading data
    Configuration conf = testingUtility.getConfiguration();
    Job job = new Job(conf, "testMapReduceInternal()-Job");
    job.setJarByClass(this.getClass());
    TableMapReduceUtil.initTableMapperJob(TABLE_NAME, scan, RowCounterMapper.class,
            ImmutableBytesWritable.class, Result.class, job);

    // Substituting standard TableInputFormat which was set in
    // TableMapReduceUtil.initTableMapperJob(...)
    job.setInputFormatClass(WdTableInputFormat.class);
    keyDistributor.addInfo(job.getConfiguration());

    job.setOutputFormatClass(NullOutputFormat.class);
    job.setNumReduceTasks(0);//from  w ww  .  j ava 2s . co  m

    boolean succeeded = job.waitForCompletion(true);
    Assert.assertTrue(succeeded);

    long mapInputRecords = job.getCounters().findCounter(RowCounterMapper.Counters.ROWS).getValue();
    Assert.assertEquals(valuesCountInSeekInterval, mapInputRecords);
}