Example usage for org.apache.hadoop.mapred JobConf setClass

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf setClass.

Prototype

public void setClass(String name, Class<?> theClass, Class<?> xface)

Source Link

Document

Set the value of the name property to the name of a theClass implementing the given interface xface.

Usage

From source file:com.alexholmes.hadooputils.sort.Sort.java

License:Apache License

/**
 * The driver for the sort MapReduce job.
 *
 * @param jobConf           sort configuration
 * @param numMapTasks       number of map tasks
 * @param numReduceTasks    number of reduce tasks
 * @param sampler           sampler, if required
 * @param codecClass        the compression codec for compressing final outputs
 * @param mapCodecClass     the compression codec for compressing intermediary map outputs
 * @param createLzopIndexes whether or not a MR job should be launched to create LZOP indexes
 *                          for the job output files
 * @param inputDirAsString  input directory in CSV-form
 * @param outputDirAsString output directory
 * @return true if the job completed successfully
 * @throws IOException        if something went wrong
 * @throws URISyntaxException if a URI wasn't correctly formed
 *///w  w  w  .  j av  a 2s .  com
public boolean runJob(final JobConf jobConf, final Integer numMapTasks, final Integer numReduceTasks,
        final InputSampler.Sampler<K, V> sampler, final Class<? extends CompressionCodec> codecClass,
        final Class<? extends CompressionCodec> mapCodecClass, final boolean createLzopIndexes,
        final String inputDirAsString, final String outputDirAsString) throws IOException, URISyntaxException {

    jobConf.setJarByClass(Sort.class);
    jobConf.setJobName("sorter");

    JobClient client = new JobClient(jobConf);
    ClusterStatus cluster = client.getClusterStatus();

    if (numMapTasks != null) {
        jobConf.setNumMapTasks(numMapTasks);
    }
    if (numReduceTasks != null) {
        jobConf.setNumReduceTasks(numReduceTasks);
    } else {
        int numReduces = (int) (cluster.getMaxReduceTasks() * 0.9);
        String sortReduces = jobConf.get("test.sort.reduces_per_host");
        if (sortReduces != null) {
            numReduces = cluster.getTaskTrackers() * Integer.parseInt(sortReduces);
        }

        // Set user-supplied (possibly default) job configs
        jobConf.setNumReduceTasks(numReduces);
    }

    jobConf.setMapperClass(IdentityMapper.class);
    jobConf.setReducerClass(SortReduce.class);

    jobConf.setInputFormat(SortInputFormat.class);

    jobConf.setMapOutputKeyClass(Text.class);
    jobConf.setMapOutputValueClass(Text.class);
    jobConf.setOutputKeyClass(Text.class);
    jobConf.setOutputValueClass(Text.class);

    if (mapCodecClass != null) {
        jobConf.setMapOutputCompressorClass(mapCodecClass);
    }

    if (codecClass != null) {
        jobConf.setBoolean("mapred.output.compress", true);
        jobConf.setClass("mapred.output.compression.codec", codecClass, CompressionCodec.class);
    }

    FileInputFormat.setInputPaths(jobConf, inputDirAsString);
    FileOutputFormat.setOutputPath(jobConf, new Path(outputDirAsString));

    if (sampler != null) {
        System.out.println("Sampling input to effect total-order sort...");
        jobConf.setPartitionerClass(TotalOrderPartitioner.class);
        Path inputDir = FileInputFormat.getInputPaths(jobConf)[0];

        FileSystem fileSystem = FileSystem.get(jobConf);

        if (fileSystem.exists(inputDir) && fileSystem.isFile(inputDir)) {
            inputDir = inputDir.getParent();
        }
        inputDir = inputDir.makeQualified(inputDir.getFileSystem(jobConf));
        Path partitionFile = new Path(inputDir, "_sortPartitioning");
        TotalOrderPartitioner.setPartitionFile(jobConf, partitionFile);
        InputSampler.writePartitionFile(jobConf, sampler);
        URI partitionUri = new URI(partitionFile.toString() + "#" + "_sortPartitioning");
        DistributedCache.addCacheFile(partitionUri, jobConf);
        DistributedCache.createSymlink(jobConf);
    }

    System.out.println("Running on " + cluster.getTaskTrackers() + " nodes to sort from "
            + FileInputFormat.getInputPaths(jobConf)[0] + " into " + FileOutputFormat.getOutputPath(jobConf)
            + " with " + jobConf.getNumReduceTasks() + " reduces.");
    Date startTime = new Date();
    System.out.println("Job started: " + startTime);
    jobResult = JobClient.runJob(jobConf);
    Date endTime = new Date();
    System.out.println("Job ended: " + endTime);
    System.out.println("The job took "
            + TimeUnit.MILLISECONDS.toSeconds(endTime.getTime() - startTime.getTime()) + " seconds.");

    if (jobResult.isSuccessful()) {
        if (createLzopIndexes && codecClass != null && LzopCodec.class.equals(codecClass)) {
            new LzoIndexer(jobConf).index(new Path(outputDirAsString));
        }
        return true;
    }
    return false;
}

From source file:com.bianfeng.bfas.hive.io.RealtimeInputFormat2.java

License:Apache License

/**
 * Set a PathFilter to be applied to the input paths for the map-reduce job.
 *
 * @param filter the PathFilter class use for filtering the input paths.
 *//*from  w w w .  ja  v a 2 s. c o  m*/
public static void setInputPathFilter(JobConf conf, Class<? extends PathFilter> filter) {
    conf.setClass("mapred.input.pathFilter.class", filter, PathFilter.class);
}

From source file:com.ebay.erl.mobius.core.JobSetup.java

License:Apache License

/**
 * Setup the output path of the given <code>job</code> to
 * <code>outputFolder</code> with the given 
 * <code>outputFormat</code>.
 *///from   ww  w .j a  va2  s.  c o m
public static void setupOutputs(JobConf job, Path outputFolder, Class<? extends FileOutputFormat> outputFormat)
        throws IOException {
    FileOutputFormat.setOutputPath(job, outputFolder);
    job.setClass("mapred.output.format.class", outputFormat, FileOutputFormat.class);
    job.setOutputKeyClass(NullWritable.class);
    job.setOutputValueClass(Tuple.class);

    ensureOutputDelete(outputFolder, job);
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

public static void setInputInfo(JobConf job, byte input, InputInfo inputinfo, int brlen, int bclen,
        ConvertTarget target) {/*w w  w. j a  v  a2 s .c o  m*/
    Class<? extends Converter> converterClass = getConverterClass(inputinfo, brlen, bclen, target);
    job.setClass(INPUT_CONVERTER_CLASS_PREFIX_CONFIG + input, converterClass, Converter.class);
    job.setClass(INPUT_KEY_CLASS_PREFIX_CONFIG + input, inputinfo.inputKeyClass, Writable.class);
    job.setClass(INPUT_VALUE_CLASS_PREFIX_CONFIG + input, inputinfo.inputValueClass, Writable.class);
}

From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java

License:Open Source License

public static void setOutputInfo(JobConf job, int i, OutputInfo outputinfo, boolean sourceInBlock)
        throws DMLRuntimeException {
    Class<? extends Converter> converterClass;
    if (sourceInBlock) {
        if (outputinfo.outputValueClass.equals(MatrixCell.class))
            converterClass = BinaryBlockToBinaryCellConverter.class;
        else if (outputinfo.outputValueClass.equals(Text.class))
            converterClass = BinaryBlockToTextCellConverter.class;
        else if (outputinfo.outputValueClass.equals(MatrixBlock.class))
            converterClass = IdenticalConverter.class;
        else if (outputinfo.outputValueClass.equals(IntWritable.class))
            converterClass = WeightedCellToSortInputConverter.class;
        else if (outputinfo.outputValueClass.equals(WeightedPair.class))
            converterClass = IdenticalConverter.class;
        else/* ww w .  j a v a2  s  .com*/
            converterClass = IdenticalConverter.class;
    } else {
        if (outputinfo.outputValueClass.equals(MatrixCell.class))
            converterClass = IdenticalConverter.class;
        else if (outputinfo.outputValueClass.equals(Text.class))
            converterClass = BinaryCellToTextConverter.class;
        else if (outputinfo.outputValueClass.equals(IntWritable.class))
            converterClass = WeightedCellToSortInputConverter.class;
        else if (outputinfo.outputValueClass.equals(WeightedPair.class))
            converterClass = IdenticalConverter.class;
        else
            throw new DMLRuntimeException("unsupported conversion: " + outputinfo.outputValueClass);
        // converterClass=IdenticalConverter.class; 
    }
    job.setClass(OUTPUT_CONVERTER_CLASS_PREFIX_CONFIG + i, converterClass, Converter.class);
}

From source file:com.ibm.bi.dml.runtime.matrix.sort.CompactInputFormat.java

License:Open Source License

public static void setKeyValueClasses(JobConf job, Class<? extends WritableComparable> keyClass,
        Class<? extends Writable> valueClass) {
    job.setClass(KEY_CLASS, keyClass, WritableComparable.class);
    job.setClass(VALUE_CLASS, valueClass, Writable.class);
}

From source file:com.ibm.bi.dml.runtime.matrix.sort.PickFromCompactInputFormat.java

License:Open Source License

@SuppressWarnings("rawtypes")
public static void setKeyValueClasses(JobConf job, Class<? extends WritableComparable> keyClass,
        Class<? extends Writable> valueClass) {
    //   job.setClass(KEY_CLASS, keyClass, WritableComparable.class);
    job.setClass(VALUE_CLASS, valueClass, Writable.class);
}

From source file:com.ibm.bi.dml.runtime.matrix.sort.SamplingSortMRInputFormat.java

License:Open Source License

public static void setTargetKeyValueClasses(JobConf job, Class<? extends WritableComparable> keyClass,
        Class<? extends Writable> valueClass) {
    job.setClass(TARGET_KEY_CLASS, keyClass, WritableComparable.class);
    job.setClass(TARGET_VALUE_CLASS, valueClass, Writable.class);
}

From source file:com.ibm.jaql.io.hadoop.FileOutputConfigurator.java

License:Apache License

public void setSequential(JobConf conf) throws Exception {
    registerSerializers(conf);//from www .j a v a  2s  .  co m

    // For an expression, the location is the final file name
    Path outPath = new Path(location);
    FileSystem fs = outPath.getFileSystem(conf);
    outPath = outPath.makeQualified(fs);
    if (fs.exists(outPath)) {
        // TODO: Jaql currently has overwrite semantics; add flag to control this
        if (fs.isFile(outPath)) {
            fs.delete(outPath, false);
        } else {
            // Look for a map-reduce output directory
            FileStatus[] nonMR = fs.listStatus(outPath, new PathFilter() {
                boolean onlyOne = true;

                public boolean accept(Path path) {
                    String name = path.getName();
                    if (name.matches("([.][.]?)|([.]part-[0-9]+.crc)|(part-[0-9]+)")) {
                        return false;
                    }
                    if (onlyOne) {
                        onlyOne = false;
                        return true;
                    }
                    return false;
                }
            });
            if (nonMR.length > 0) {
                throw new IOException(
                        "directory exists and is not a map-reduce output directory: " + nonMR[0].getPath());
            }
            fs.delete(outPath, true);
        }
    }

    // In sequential mode, we will write directly to the output file
    // and bypass the _temporary directory and rename of the standard 
    // FileOutputCommitter by using our own DirectFileOutputCommitter.
    FileOutputFormat.setOutputPath(conf, outPath.getParent());
    conf.setClass("mapred.output.committer.class", DirectFileOutputCommiter.class, OutputCommitter.class);
}

From source file:com.ricemap.spateDB.operations.RangeQuery.java

License:Apache License

/**
 * Performs a range query using MapReduce
 * //from ww w. j a  v  a 2  s . com
 * @param fs
 * @param inputFile
 * @param queryRange
 * @param shape
 * @param output
 * @return
 * @throws IOException
 */
public static long rangeQueryMapReduce(FileSystem fs, Path inputFile, Path userOutputPath, Shape queryShape,
        Shape shape, boolean overwrite, boolean background, QueryInput query) throws IOException {
    JobConf job = new JobConf(FileMBR.class);

    FileSystem outFs = inputFile.getFileSystem(job);
    Path outputPath = userOutputPath;
    if (outputPath == null) {
        do {
            outputPath = new Path(
                    inputFile.toUri().getPath() + ".rangequery_" + (int) (Math.random() * 1000000));
        } while (outFs.exists(outputPath));
    } else {
        if (outFs.exists(outputPath)) {
            if (overwrite) {
                outFs.delete(outputPath, true);
            } else {
                throw new RuntimeException("Output path already exists and -overwrite flag is not set");
            }
        }
    }

    job.setJobName("RangeQuery");
    job.setClass(SpatialSite.FilterClass, RangeFilter.class, BlockFilter.class);
    RangeFilter.setQueryRange(job, queryShape); // Set query range for
    // filter

    ClusterStatus clusterStatus = new JobClient(job).getClusterStatus();
    job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5);
    job.setNumReduceTasks(3);

    // Decide which map function to use depending on how blocks are indexed
    // And also which input format to use
    if (SpatialSite.isRTree(fs, inputFile)) {
        // RTree indexed file
        LOG.info("Searching an RTree indexed file");
        job.setInputFormat(RTreeInputFormat.class);
    } else {
        // A file with no local index
        LOG.info("Searching a non local-indexed file");
        job.setInputFormat(ShapeInputFormat.class);
    }

    GlobalIndex<Partition> gIndex = SpatialSite.getGlobalIndex(fs, inputFile);
    // if (gIndex != null && gIndex.isReplicated()){
    // job.setMapperClass(RangeQueryMap.class);

    Class<?> OutputKey = NullWritable.class;
    try {
        Class<?> c = shape.getClass();
        Field f = c.getDeclaredField(query.field);
        f.setAccessible(true);
        if (f.getType().equals(Integer.TYPE)) {
            OutputKey = IntWritable.class;
        } else if (f.getType().equals(Double.TYPE)) {
            OutputKey = DoubleWritable.class;
        } else if (f.getType().equals(Long.TYPE)) {
            OutputKey = LongWritable.class;
        }
    } catch (SecurityException e) {
        e.printStackTrace();
    } catch (NoSuchFieldException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

    job.setMapOutputKeyClass(OutputKey);
    switch (query.type) {
    case Distinct:
        job.setMapperClass(DistinctQueryMap.class);
        job.setReducerClass(DistinctQueryReduce.class);
        job.setMapOutputValueClass(NullWritable.class);
        break;
    case Distribution:
        job.setMapperClass(DistributionQueryMap.class);
        job.setReducerClass(DistributionQueryReduce.class);
        job.setMapOutputValueClass(IntWritable.class);
        break;
    default:
        break;
    }
    // }
    // else
    // job.setMapperClass(RangeQueryMapNoDupAvoidance.class);

    // Set query range for the map function
    job.set(QUERY_SHAPE_CLASS, queryShape.getClass().getName());
    job.set(QUERY_SHAPE, queryShape.toText(new Text()).toString());
    job.set(QUERY_FIELD, query.field);

    // Set shape class for the SpatialInputFormat
    SpatialSite.setShapeClass(job, shape.getClass());

    job.setOutputFormat(TextOutputFormat.class);

    ShapeInputFormat.setInputPaths(job, inputFile);
    TextOutputFormat.setOutputPath(job, outputPath);

    // Submit the job
    if (!background) {
        RunningJob runningJob = JobClient.runJob(job);
        Counters counters = runningJob.getCounters();
        Counter outputRecordCounter = counters.findCounter(Task.Counter.MAP_OUTPUT_RECORDS);
        final long resultCount = outputRecordCounter.getValue();

        // If outputPath not set by user, automatically delete it
        if (userOutputPath == null)
            outFs.delete(outputPath, true);

        return resultCount;
    } else {
        JobClient jc = new JobClient(job);
        lastRunningJob = jc.submitJob(job);
        return -1;
    }
}