Example usage for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults)

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:Top20AZRestaurants.java

@Override
public int run(String[] args) throws Exception {
    Job job1 = new Job(getConf());
    job1.setSortComparatorClass(MyDecreasingDoubleComparator.class);
    job1.setJobName("Top20 AZ Restaurants ChainJob");
    job1.setJarByClass(Top20AZRestaurants.class);

    JobConf map1Conf = new JobConf(false);
    ChainMapper.addMapper(job1, Top20Mapper.class, LongWritable.class, Text.class, Text.class, Text.class,
            map1Conf);/*from  w w w .  j a  v  a2  s  .co  m*/
    JobConf map2Conf = new JobConf(false);
    ChainMapper.addMapper(job1, Top20MapperRedo.class, Text.class, Text.class, DoubleWritable.class, Text.class,
            map2Conf);
    JobConf reduceConf = new JobConf(false);
    ChainReducer.setReducer(job1, Top20ReducerRedo.class, DoubleWritable.class, Text.class, Text.class,
            DoubleWritable.class, reduceConf);
    FileInputFormat.setInputPaths(job1, new Path(args[0]));
    FileOutputFormat.setOutputPath(job1, new Path(args[1]));

    boolean success = job1.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:TestColumnStorageInputFormat.java

License:Open Source License

public static void main(String[] argv) throws IOException, SerDeException {
    try {//from   w ww.j  ava  2 s.c  om
        if (argv.length != 2) {
            System.out.println("TestColumnStorageInputFormat <input> idx");
            System.exit(-1);
        }

        JobConf conf = new JobConf(TestColumnStorageInputFormat.class);

        conf.setJobName("TestColumnStorageInputFormat");

        conf.setNumMapTasks(1);
        conf.setNumReduceTasks(1);

        conf.setOutputKeyClass(LongWritable.class);
        conf.setOutputValueClass(Unit.Record.class);

        conf.setInputFormat(TextInputFormat.class);
        conf.set("mapred.output.compress", "flase");

        conf.set("mapred.input.dir", argv[0]);

        conf.set("hive.io.file.readcolumn.ids", argv[1]);

        FormatStorageSerDe serDe = initSerDe(conf);
        StandardStructObjectInspector oi = (StandardStructObjectInspector) serDe.getObjectInspector();
        List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();

        FileInputFormat.setInputPaths(conf, argv[0]);
        Path outputPath = new Path(argv[1]);
        FileOutputFormat.setOutputPath(conf, outputPath);

        InputFormat inputFormat = new ColumnStorageInputFormat();
        long begin = System.currentTimeMillis();
        InputSplit[] inputSplits = inputFormat.getSplits(conf, 1);
        long end = System.currentTimeMillis();
        System.out.println("getsplit delay " + (end - begin) + " ms");

        if (inputSplits.length == 0) {
            System.out.println("inputSplits is empty");
            return;
        } else {
            System.out.println("get Splits:" + inputSplits.length);
        }

        int size = inputSplits.length;
        System.out.println("getSplits return size:" + size);
        for (int i = 0; i < size; i++) {
            ColumnStorageSplit split = (ColumnStorageSplit) inputSplits[i];
            System.out.printf("split:" + i + " offset:" + split.getStart() + "len:" + split.getLength()
                    + "path:" + split.getPath().toString() + "beginLine:" + split.getBeginLine() + "endLine:"
                    + split.getEndLine());
            if (split.getFileName() != null) {
                System.out.println("fileName:" + split.getFileName());
            } else {
                System.out.println("fileName null");
            }
            if (split.fileList() != null) {
                System.out.println("fileList.num:" + split.fileList().size());
                for (int j = 0; j < split.fileList().size(); j++) {
                    System.out.println("filelist " + j + ":" + split.fileList().get(j));
                }
            }
        }

        while (true) {
            int totalDelay = 0;
            RecordReader<WritableComparable, Writable> currRecReader = null;
            for (int i = 0; i < inputSplits.length; i++) {
                currRecReader = inputFormat.getRecordReader(inputSplits[i], conf, Reporter.NULL);

                WritableComparable key;
                Writable value;

                key = currRecReader.createKey();
                value = currRecReader.createValue();

                begin = System.currentTimeMillis();
                int count = 0;
                while (currRecReader.next(key, value)) {

                    Record record = (Record) value;

                    Object row = serDe.deserialize(record);
                    count++;

                }
                end = System.currentTimeMillis();

                long delay = (end - begin) / 1000;
                totalDelay += delay;
                System.out.println(count + " record read over, delay " + delay + " s");
            }

            System.out.println("total delay:" + totalDelay + "\n");
        }

    } catch (Exception e) {
        e.printStackTrace();
        System.out.println("get exception:" + e.getMessage());
    }
}

From source file:FriendsJob.java

License:Apache License

/**
 * @param args/* www .  jav a2  s .c  o m*/
 */
public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(FriendsJob.class);
    conf.setJobName("anagramcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(FriendsMapper.class);
    // conf.setCombinerClass(AnagramReducer.class);
    conf.setReducerClass(FriendsReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);

}

From source file:AnagramJob.java

License:Apache License

/**
 * @param args//from   w ww.  ja v  a  2s  .c om
 */
public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(AnagramJob.class);
    conf.setJobName("anagramcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(AnagramMapper.class);
    // conf.setCombinerClass(AnagramReducer.class);
    conf.setReducerClass(AnagramReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);

}

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java

License:Apache License

/**
 * This implements matrix this.transpose().times(other)
 * /*from w  w w  . j  a  v a2s  . com*/
 * @param other a DistributedRowMatrix
 * @param outPath path to write result to
 * @return a DistributedRowMatrix containing the product
 */
public DistributedRowMatrix times(DistributedRowMatrix other, Path outPath) throws IOException {
    if (numRows != other.numRows()) {
        throw new CardinalityException(numRows, other.numRows());
    }

    Configuration initialConf = getConf() == null ? new Configuration() : getConf();
    Configuration conf = MatrixMultiplicationJob.createMatrixMultiplyJobConf(initialConf, rowPath,
            other.rowPath, outPath, other.numCols);
    JobClient.runJob(new JobConf(conf));
    DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, numCols, other.numCols());
    out.setConf(conf);
    return out;
}

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java

License:Apache License

/**
 * This implements matrix multiplication A * B using MapReduce tasks on CPU or
 * GPU/*from   w  ww  . j a v  a2  s  .  c o  m*/
 * 
 * @param other a DistributedRowMatrix
 * @param outPath path to write result to
 * @param useGPU use GPU or CPU (default: false, use CPU)
 * @return a DistributedRowMatrix containing the product
 */
public DistributedRowMatrix multiplyMapReduce(DistributedRowMatrix other, Path outPath, boolean useGPU,
        boolean isMatrixATransposed, int tileWidth, boolean isDebugging) throws IOException {
    // Check if cols of MatrixA = rows of MatrixB
    // (l x m) * (m x n) = (l x n)
    if (numCols != other.numRows()) {
        throw new CardinalityException(numCols, other.numRows());
    }

    Configuration initialConf = (getConf() == null) ? new Configuration() : getConf();

    // Transpose Matrix within a new MapReduce Job
    DistributedRowMatrix transposed = this;
    if (!isMatrixATransposed) {
        transposed = transposed.transpose();
    }
    // Debug
    // System.out.println("DistributedRowMatrix transposed:");
    // transposed.printDistributedRowMatrix();

    // Build MatrixMultiplication job configuration
    Configuration conf = null;
    if (!useGPU) {
        conf = MatrixMultiplicationCpu.createMatrixMultiplicationCpuConf(initialConf, transposed.rowPath,
                other.rowPath, outPath, other.numCols, isDebugging);
    } else { // use GPU
        conf = MatrixMultiplicationGpu.createMatrixMultiplicationGpuConf(initialConf, transposed.rowPath,
                other.rowPath, outPath, other.numCols, tileWidth, isDebugging);
    }

    // Multiply Matrix with transposed one
    JobClient.runJob(new JobConf(conf));

    // Read resulting Matrix from HDFS
    DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows, other.numCols());
    out.setConf(conf);

    return out;
}

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java

License:Apache License

public DistributedRowMatrix transpose() throws IOException {
    Path outputPath = new Path(outputTmpBasePath, "transpose-" + (System.nanoTime() & 0xFF));
    Configuration initialConf = getConf() == null ? new Configuration() : getConf();
    Configuration conf = TransposeJob.buildTransposeJobConf(initialConf, rowPath, outputPath, numRows);
    JobClient.runJob(new JobConf(conf));
    DistributedRowMatrix m = new DistributedRowMatrix(outputPath, outputTmpPath, numCols, numRows);
    m.setConf(this.conf);
    return m;// w w w . j a  va  2  s.co  m
}

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java

License:Apache License

@Override
public Vector times(Vector v) {
    try {//from w ww .  j  av  a2s  .  co  m
        Configuration initialConf = getConf() == null ? new Configuration() : getConf();
        Path outputVectorTmpPath = new Path(outputTmpBasePath, new Path(Long.toString(System.nanoTime())));
        Configuration conf = TimesSquaredJob.createTimesJobConf(initialConf, v, numRows, rowPath,
                outputVectorTmpPath);
        JobClient.runJob(new JobConf(conf));
        Vector result = TimesSquaredJob.retrieveTimesSquaredOutputVector(conf);
        if (!keepTempFiles) {
            FileSystem fs = outputVectorTmpPath.getFileSystem(conf);
            fs.delete(outputVectorTmpPath, true);
        }
        return result;
    } catch (IOException ioe) {
        throw new IllegalStateException(ioe);
    }
}

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java

License:Apache License

@Override
public Vector timesSquared(Vector v) {
    try {/*  w w w  .j  ava2  s . co  m*/
        Configuration initialConf = getConf() == null ? new Configuration() : getConf();
        Path outputVectorTmpPath = new Path(outputTmpBasePath, new Path(Long.toString(System.nanoTime())));
        Configuration conf = TimesSquaredJob.createTimesSquaredJobConf(initialConf, v, rowPath,
                outputVectorTmpPath);
        JobClient.runJob(new JobConf(conf));
        Vector result = TimesSquaredJob.retrieveTimesSquaredOutputVector(conf);
        if (!keepTempFiles) {
            FileSystem fs = outputVectorTmpPath.getFileSystem(conf);
            fs.delete(outputVectorTmpPath, true);
        }
        return result;
    } catch (IOException ioe) {
        throw new IllegalStateException(ioe);
    }
}

From source file:average.AverageDriver.java

public static void main(String[] args) {
    JobClient client = new JobClient();
    // Configurations for Job set in this variable
    JobConf conf = new JobConf(average.AverageDriver.class);

    // Name of the Job
    conf.setJobName("BookCrossing1.0");

    // Data type of Output Key and Value
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    // Setting the Mapper and Reducer Class
    conf.setMapperClass(average.AverageMapper.class);
    conf.setReducerClass(average.AverageReducer.class);

    // Formats of the Data Type of Input and output
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    // Specify input and output DIRECTORIES (not files)
    FileInputFormat.setInputPaths(conf, new Path(args[1]));
    FileOutputFormat.setOutputPath(conf, new Path(args[2]));

    client.setConf(conf);/* ww  w. j a  v  a2  s. c o m*/
    try {
        // Running the job with Configurations set in the conf.
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }
}