Example usage for org.apache.hadoop.mapred JobConf JobConf

List of usage examples for org.apache.hadoop.mapred JobConf JobConf

Introduction

In this page you can find the example usage for org.apache.hadoop.mapred JobConf JobConf.

Prototype

public JobConf(boolean loadDefaults) 

Source Link

Document

A new map/reduce configuration where the behavior of reading from the default resources can be turned off.

Usage

From source file:Top20AZRestaurants.java

@Override
public int run(String[] args) throws Exception {
    Job job1 = new Job(getConf());
    job1.setSortComparatorClass(MyDecreasingDoubleComparator.class);
    job1.setJobName("Top20 AZ Restaurants ChainJob");
    job1.setJarByClass(Top20AZRestaurants.class);

    JobConf map1Conf = new JobConf(false);
    ChainMapper.addMapper(job1, Top20Mapper.class, LongWritable.class, Text.class, Text.class, Text.class,
            map1Conf);/*from  w w w .  j a  v  a2  s  .co  m*/
    JobConf map2Conf = new JobConf(false);
    ChainMapper.addMapper(job1, Top20MapperRedo.class, Text.class, Text.class, DoubleWritable.class, Text.class,
            map2Conf);
    JobConf reduceConf = new JobConf(false);
    ChainReducer.setReducer(job1, Top20ReducerRedo.class, DoubleWritable.class, Text.class, Text.class,
            DoubleWritable.class, reduceConf);
    FileInputFormat.setInputPaths(job1, new Path(args[0]));
    FileOutputFormat.setOutputPath(job1, new Path(args[1]));

    boolean success = job1.waitForCompletion(true);
    return success ? 0 : 1;
}

From source file:TestColumnStorageInputFormat.java

License:Open Source License

public static void main(String[] argv) throws IOException, SerDeException {
    try {//from   w ww.j  ava  2 s.c  om
        if (argv.length != 2) {
            System.out.println("TestColumnStorageInputFormat <input> idx");
            System.exit(-1);
        }

        JobConf conf = new JobConf(TestColumnStorageInputFormat.class);

        conf.setJobName("TestColumnStorageInputFormat");

        conf.setNumMapTasks(1);
        conf.setNumReduceTasks(1);

        conf.setOutputKeyClass(LongWritable.class);
        conf.setOutputValueClass(Unit.Record.class);

        conf.setInputFormat(TextInputFormat.class);
        conf.set("mapred.output.compress", "flase");

        conf.set("mapred.input.dir", argv[0]);

        conf.set("hive.io.file.readcolumn.ids", argv[1]);

        FormatStorageSerDe serDe = initSerDe(conf);
        StandardStructObjectInspector oi = (StandardStructObjectInspector) serDe.getObjectInspector();
        List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs();

        FileInputFormat.setInputPaths(conf, argv[0]);
        Path outputPath = new Path(argv[1]);
        FileOutputFormat.setOutputPath(conf, outputPath);

        InputFormat inputFormat = new ColumnStorageInputFormat();
        long begin = System.currentTimeMillis();
        InputSplit[] inputSplits = inputFormat.getSplits(conf, 1);
        long end = System.currentTimeMillis();
        System.out.println("getsplit delay " + (end - begin) + " ms");

        if (inputSplits.length == 0) {
            System.out.println("inputSplits is empty");
            return;
        } else {
            System.out.println("get Splits:" + inputSplits.length);
        }

        int size = inputSplits.length;
        System.out.println("getSplits return size:" + size);
        for (int i = 0; i < size; i++) {
            ColumnStorageSplit split = (ColumnStorageSplit) inputSplits[i];
            System.out.printf("split:" + i + " offset:" + split.getStart() + "len:" + split.getLength()
                    + "path:" + split.getPath().toString() + "beginLine:" + split.getBeginLine() + "endLine:"
                    + split.getEndLine());
            if (split.getFileName() != null) {
                System.out.println("fileName:" + split.getFileName());
            } else {
                System.out.println("fileName null");
            }
            if (split.fileList() != null) {
                System.out.println("fileList.num:" + split.fileList().size());
                for (int j = 0; j < split.fileList().size(); j++) {
                    System.out.println("filelist " + j + ":" + split.fileList().get(j));
                }
            }
        }

        while (true) {
            int totalDelay = 0;
            RecordReader<WritableComparable, Writable> currRecReader = null;
            for (int i = 0; i < inputSplits.length; i++) {
                currRecReader = inputFormat.getRecordReader(inputSplits[i], conf, Reporter.NULL);

                WritableComparable key;
                Writable value;

                key = currRecReader.createKey();
                value = currRecReader.createValue();

                begin = System.currentTimeMillis();
                int count = 0;
                while (currRecReader.next(key, value)) {

                    Record record = (Record) value;

                    Object row = serDe.deserialize(record);
                    count++;

                }
                end = System.currentTimeMillis();

                long delay = (end - begin) / 1000;
                totalDelay += delay;
                System.out.println(count + " record read over, delay " + delay + " s");
            }

            System.out.println("total delay:" + totalDelay + "\n");
        }

    } catch (Exception e) {
        e.printStackTrace();
        System.out.println("get exception:" + e.getMessage());
    }
}

From source file:FriendsJob.java

License:Apache License

/**
 * @param args/* www .  jav a2  s .c  o m*/
 */
public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(FriendsJob.class);
    conf.setJobName("anagramcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(FriendsMapper.class);
    // conf.setCombinerClass(AnagramReducer.class);
    conf.setReducerClass(FriendsReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);

}

From source file:AnagramJob.java

License:Apache License

/**
 * @param args//from   w ww.  ja v  a  2s  .c om
 */
public static void main(String[] args) throws Exception {
    JobConf conf = new JobConf(AnagramJob.class);
    conf.setJobName("anagramcount");

    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    conf.setMapperClass(AnagramMapper.class);
    // conf.setCombinerClass(AnagramReducer.class);
    conf.setReducerClass(AnagramReducer.class);

    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    FileInputFormat.setInputPaths(conf, new Path(args[0]));
    FileOutputFormat.setOutputPath(conf, new Path(args[1]));

    JobClient.runJob(conf);

}

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java

License:Apache License

/**
 * This implements matrix this.transpose().times(other)
 * /*from w  w w  . j  a  v a2s  . com*/
 * @param other a DistributedRowMatrix
 * @param outPath path to write result to
 * @return a DistributedRowMatrix containing the product
 */
public DistributedRowMatrix times(DistributedRowMatrix other, Path outPath) throws IOException {
    if (numRows != other.numRows()) {
        throw new CardinalityException(numRows, other.numRows());
    }

    Configuration initialConf = getConf() == null ? new Configuration() : getConf();
    Configuration conf = MatrixMultiplicationJob.createMatrixMultiplyJobConf(initialConf, rowPath,
            other.rowPath, outPath, other.numCols);
    JobClient.runJob(new JobConf(conf));
    DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, numCols, other.numCols());
    out.setConf(conf);
    return out;
}

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java

License:Apache License

/**
 * This implements matrix multiplication A * B using MapReduce tasks on CPU or
 * GPU/*from   w  ww  . j a v  a2  s  .  c o  m*/
 * 
 * @param other a DistributedRowMatrix
 * @param outPath path to write result to
 * @param useGPU use GPU or CPU (default: false, use CPU)
 * @return a DistributedRowMatrix containing the product
 */
public DistributedRowMatrix multiplyMapReduce(DistributedRowMatrix other, Path outPath, boolean useGPU,
        boolean isMatrixATransposed, int tileWidth, boolean isDebugging) throws IOException {
    // Check if cols of MatrixA = rows of MatrixB
    // (l x m) * (m x n) = (l x n)
    if (numCols != other.numRows()) {
        throw new CardinalityException(numCols, other.numRows());
    }

    Configuration initialConf = (getConf() == null) ? new Configuration() : getConf();

    // Transpose Matrix within a new MapReduce Job
    DistributedRowMatrix transposed = this;
    if (!isMatrixATransposed) {
        transposed = transposed.transpose();
    }
    // Debug
    // System.out.println("DistributedRowMatrix transposed:");
    // transposed.printDistributedRowMatrix();

    // Build MatrixMultiplication job configuration
    Configuration conf = null;
    if (!useGPU) {
        conf = MatrixMultiplicationCpu.createMatrixMultiplicationCpuConf(initialConf, transposed.rowPath,
                other.rowPath, outPath, other.numCols, isDebugging);
    } else { // use GPU
        conf = MatrixMultiplicationGpu.createMatrixMultiplicationGpuConf(initialConf, transposed.rowPath,
                other.rowPath, outPath, other.numCols, tileWidth, isDebugging);
    }

    // Multiply Matrix with transposed one
    JobClient.runJob(new JobConf(conf));

    // Read resulting Matrix from HDFS
    DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows, other.numCols());
    out.setConf(conf);

    return out;
}

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java

License:Apache License

public DistributedRowMatrix transpose() throws IOException {
    Path outputPath = new Path(outputTmpBasePath, "transpose-" + (System.nanoTime() & 0xFF));
    Configuration initialConf = getConf() == null ? new Configuration() : getConf();
    Configuration conf = TransposeJob.buildTransposeJobConf(initialConf, rowPath, outputPath, numRows);
    JobClient.runJob(new JobConf(conf));
    DistributedRowMatrix m = new DistributedRowMatrix(outputPath, outputTmpPath, numCols, numRows);
    m.setConf(this.conf);
    return m;// w w w . j a  va  2  s.co  m
}

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java

License:Apache License

@Override
public Vector times(Vector v) {
    try {//from w ww .  j  av  a2s  .  co  m
        Configuration initialConf = getConf() == null ? new Configuration() : getConf();
        Path outputVectorTmpPath = new Path(outputTmpBasePath, new Path(Long.toString(System.nanoTime())));
        Configuration conf = TimesSquaredJob.createTimesJobConf(initialConf, v, numRows, rowPath,
                outputVectorTmpPath);
        JobClient.runJob(new JobConf(conf));
        Vector result = TimesSquaredJob.retrieveTimesSquaredOutputVector(conf);
        if (!keepTempFiles) {
            FileSystem fs = outputVectorTmpPath.getFileSystem(conf);
            fs.delete(outputVectorTmpPath, true);
        }
        return result;
    } catch (IOException ioe) {
        throw new IllegalStateException(ioe);
    }
}

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java

License:Apache License

@Override
public Vector timesSquared(Vector v) {
    try {/*  w w w  .j  ava2  s . co  m*/
        Configuration initialConf = getConf() == null ? new Configuration() : getConf();
        Path outputVectorTmpPath = new Path(outputTmpBasePath, new Path(Long.toString(System.nanoTime())));
        Configuration conf = TimesSquaredJob.createTimesSquaredJobConf(initialConf, v, rowPath,
                outputVectorTmpPath);
        JobClient.runJob(new JobConf(conf));
        Vector result = TimesSquaredJob.retrieveTimesSquaredOutputVector(conf);
        if (!keepTempFiles) {
            FileSystem fs = outputVectorTmpPath.getFileSystem(conf);
            fs.delete(outputVectorTmpPath, true);
        }
        return result;
    } catch (IOException ioe) {
        throw new IllegalStateException(ioe);
    }
}

From source file:average.AverageDriver.java

public static void main(String[] args) {
    JobClient client = new JobClient();
    // Configurations for Job set in this variable
    JobConf conf = new JobConf(average.AverageDriver.class);

    // Name of the Job
    conf.setJobName("BookCrossing1.0");

    // Data type of Output Key and Value
    conf.setOutputKeyClass(Text.class);
    conf.setOutputValueClass(Text.class);

    // Setting the Mapper and Reducer Class
    conf.setMapperClass(average.AverageMapper.class);
    conf.setReducerClass(average.AverageReducer.class);

    // Formats of the Data Type of Input and output
    conf.setInputFormat(TextInputFormat.class);
    conf.setOutputFormat(TextOutputFormat.class);

    // Specify input and output DIRECTORIES (not files)
    FileInputFormat.setInputPaths(conf, new Path(args[1]));
    FileOutputFormat.setOutputPath(conf, new Path(args[2]));

    client.setConf(conf);/* ww  w. j a  v  a2  s. c o m*/
    try {
        // Running the job with Configurations set in the conf.
        JobClient.runJob(conf);
    } catch (Exception e) {
        e.printStackTrace();
    }
}