List of usage examples for org.apache.hadoop.mapred JobConf JobConf
public JobConf(boolean loadDefaults)
From source file:Top20AZRestaurants.java
@Override public int run(String[] args) throws Exception { Job job1 = new Job(getConf()); job1.setSortComparatorClass(MyDecreasingDoubleComparator.class); job1.setJobName("Top20 AZ Restaurants ChainJob"); job1.setJarByClass(Top20AZRestaurants.class); JobConf map1Conf = new JobConf(false); ChainMapper.addMapper(job1, Top20Mapper.class, LongWritable.class, Text.class, Text.class, Text.class, map1Conf);/*from w w w . j a v a2 s .co m*/ JobConf map2Conf = new JobConf(false); ChainMapper.addMapper(job1, Top20MapperRedo.class, Text.class, Text.class, DoubleWritable.class, Text.class, map2Conf); JobConf reduceConf = new JobConf(false); ChainReducer.setReducer(job1, Top20ReducerRedo.class, DoubleWritable.class, Text.class, Text.class, DoubleWritable.class, reduceConf); FileInputFormat.setInputPaths(job1, new Path(args[0])); FileOutputFormat.setOutputPath(job1, new Path(args[1])); boolean success = job1.waitForCompletion(true); return success ? 0 : 1; }
From source file:TestColumnStorageInputFormat.java
License:Open Source License
public static void main(String[] argv) throws IOException, SerDeException { try {//from w ww.j ava 2 s.c om if (argv.length != 2) { System.out.println("TestColumnStorageInputFormat <input> idx"); System.exit(-1); } JobConf conf = new JobConf(TestColumnStorageInputFormat.class); conf.setJobName("TestColumnStorageInputFormat"); conf.setNumMapTasks(1); conf.setNumReduceTasks(1); conf.setOutputKeyClass(LongWritable.class); conf.setOutputValueClass(Unit.Record.class); conf.setInputFormat(TextInputFormat.class); conf.set("mapred.output.compress", "flase"); conf.set("mapred.input.dir", argv[0]); conf.set("hive.io.file.readcolumn.ids", argv[1]); FormatStorageSerDe serDe = initSerDe(conf); StandardStructObjectInspector oi = (StandardStructObjectInspector) serDe.getObjectInspector(); List<? extends StructField> fieldRefs = oi.getAllStructFieldRefs(); FileInputFormat.setInputPaths(conf, argv[0]); Path outputPath = new Path(argv[1]); FileOutputFormat.setOutputPath(conf, outputPath); InputFormat inputFormat = new ColumnStorageInputFormat(); long begin = System.currentTimeMillis(); InputSplit[] inputSplits = inputFormat.getSplits(conf, 1); long end = System.currentTimeMillis(); System.out.println("getsplit delay " + (end - begin) + " ms"); if (inputSplits.length == 0) { System.out.println("inputSplits is empty"); return; } else { System.out.println("get Splits:" + inputSplits.length); } int size = inputSplits.length; System.out.println("getSplits return size:" + size); for (int i = 0; i < size; i++) { ColumnStorageSplit split = (ColumnStorageSplit) inputSplits[i]; System.out.printf("split:" + i + " offset:" + split.getStart() + "len:" + split.getLength() + "path:" + split.getPath().toString() + "beginLine:" + split.getBeginLine() + "endLine:" + split.getEndLine()); if (split.getFileName() != null) { System.out.println("fileName:" + split.getFileName()); } else { System.out.println("fileName null"); } if (split.fileList() != null) { System.out.println("fileList.num:" + split.fileList().size()); for (int j = 0; j < split.fileList().size(); j++) { System.out.println("filelist " + j + ":" + split.fileList().get(j)); } } } while (true) { int totalDelay = 0; RecordReader<WritableComparable, Writable> currRecReader = null; for (int i = 0; i < inputSplits.length; i++) { currRecReader = inputFormat.getRecordReader(inputSplits[i], conf, Reporter.NULL); WritableComparable key; Writable value; key = currRecReader.createKey(); value = currRecReader.createValue(); begin = System.currentTimeMillis(); int count = 0; while (currRecReader.next(key, value)) { Record record = (Record) value; Object row = serDe.deserialize(record); count++; } end = System.currentTimeMillis(); long delay = (end - begin) / 1000; totalDelay += delay; System.out.println(count + " record read over, delay " + delay + " s"); } System.out.println("total delay:" + totalDelay + "\n"); } } catch (Exception e) { e.printStackTrace(); System.out.println("get exception:" + e.getMessage()); } }
From source file:FriendsJob.java
License:Apache License
/** * @param args/* www . jav a2 s .c o m*/ */ public static void main(String[] args) throws Exception { JobConf conf = new JobConf(FriendsJob.class); conf.setJobName("anagramcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(FriendsMapper.class); // conf.setCombinerClass(AnagramReducer.class); conf.setReducerClass(FriendsReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); }
From source file:AnagramJob.java
License:Apache License
/** * @param args//from w ww. ja v a 2s .c om */ public static void main(String[] args) throws Exception { JobConf conf = new JobConf(AnagramJob.class); conf.setJobName("anagramcount"); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(AnagramMapper.class); // conf.setCombinerClass(AnagramReducer.class); conf.setReducerClass(AnagramReducer.class); conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); FileInputFormat.setInputPaths(conf, new Path(args[0])); FileOutputFormat.setOutputPath(conf, new Path(args[1])); JobClient.runJob(conf); }
From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java
License:Apache License
/** * This implements matrix this.transpose().times(other) * /*from w w w . j a v a2s . com*/ * @param other a DistributedRowMatrix * @param outPath path to write result to * @return a DistributedRowMatrix containing the product */ public DistributedRowMatrix times(DistributedRowMatrix other, Path outPath) throws IOException { if (numRows != other.numRows()) { throw new CardinalityException(numRows, other.numRows()); } Configuration initialConf = getConf() == null ? new Configuration() : getConf(); Configuration conf = MatrixMultiplicationJob.createMatrixMultiplyJobConf(initialConf, rowPath, other.rowPath, outPath, other.numCols); JobClient.runJob(new JobConf(conf)); DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, numCols, other.numCols()); out.setConf(conf); return out; }
From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java
License:Apache License
/** * This implements matrix multiplication A * B using MapReduce tasks on CPU or * GPU/*from w ww . j a v a2 s . c o m*/ * * @param other a DistributedRowMatrix * @param outPath path to write result to * @param useGPU use GPU or CPU (default: false, use CPU) * @return a DistributedRowMatrix containing the product */ public DistributedRowMatrix multiplyMapReduce(DistributedRowMatrix other, Path outPath, boolean useGPU, boolean isMatrixATransposed, int tileWidth, boolean isDebugging) throws IOException { // Check if cols of MatrixA = rows of MatrixB // (l x m) * (m x n) = (l x n) if (numCols != other.numRows()) { throw new CardinalityException(numCols, other.numRows()); } Configuration initialConf = (getConf() == null) ? new Configuration() : getConf(); // Transpose Matrix within a new MapReduce Job DistributedRowMatrix transposed = this; if (!isMatrixATransposed) { transposed = transposed.transpose(); } // Debug // System.out.println("DistributedRowMatrix transposed:"); // transposed.printDistributedRowMatrix(); // Build MatrixMultiplication job configuration Configuration conf = null; if (!useGPU) { conf = MatrixMultiplicationCpu.createMatrixMultiplicationCpuConf(initialConf, transposed.rowPath, other.rowPath, outPath, other.numCols, isDebugging); } else { // use GPU conf = MatrixMultiplicationGpu.createMatrixMultiplicationGpuConf(initialConf, transposed.rowPath, other.rowPath, outPath, other.numCols, tileWidth, isDebugging); } // Multiply Matrix with transposed one JobClient.runJob(new JobConf(conf)); // Read resulting Matrix from HDFS DistributedRowMatrix out = new DistributedRowMatrix(outPath, outputTmpPath, this.numRows, other.numCols()); out.setConf(conf); return out; }
From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java
License:Apache License
public DistributedRowMatrix transpose() throws IOException { Path outputPath = new Path(outputTmpBasePath, "transpose-" + (System.nanoTime() & 0xFF)); Configuration initialConf = getConf() == null ? new Configuration() : getConf(); Configuration conf = TransposeJob.buildTransposeJobConf(initialConf, rowPath, outputPath, numRows); JobClient.runJob(new JobConf(conf)); DistributedRowMatrix m = new DistributedRowMatrix(outputPath, outputTmpPath, numCols, numRows); m.setConf(this.conf); return m;// w w w . j a va 2 s.co m }
From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java
License:Apache License
@Override public Vector times(Vector v) { try {//from w ww . j av a2s . co m Configuration initialConf = getConf() == null ? new Configuration() : getConf(); Path outputVectorTmpPath = new Path(outputTmpBasePath, new Path(Long.toString(System.nanoTime()))); Configuration conf = TimesSquaredJob.createTimesJobConf(initialConf, v, numRows, rowPath, outputVectorTmpPath); JobClient.runJob(new JobConf(conf)); Vector result = TimesSquaredJob.retrieveTimesSquaredOutputVector(conf); if (!keepTempFiles) { FileSystem fs = outputVectorTmpPath.getFileSystem(conf); fs.delete(outputVectorTmpPath, true); } return result; } catch (IOException ioe) { throw new IllegalStateException(ioe); } }
From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java
License:Apache License
@Override public Vector timesSquared(Vector v) { try {/* w w w .j ava2 s . co m*/ Configuration initialConf = getConf() == null ? new Configuration() : getConf(); Path outputVectorTmpPath = new Path(outputTmpBasePath, new Path(Long.toString(System.nanoTime()))); Configuration conf = TimesSquaredJob.createTimesSquaredJobConf(initialConf, v, rowPath, outputVectorTmpPath); JobClient.runJob(new JobConf(conf)); Vector result = TimesSquaredJob.retrieveTimesSquaredOutputVector(conf); if (!keepTempFiles) { FileSystem fs = outputVectorTmpPath.getFileSystem(conf); fs.delete(outputVectorTmpPath, true); } return result; } catch (IOException ioe) { throw new IllegalStateException(ioe); } }
From source file:average.AverageDriver.java
public static void main(String[] args) { JobClient client = new JobClient(); // Configurations for Job set in this variable JobConf conf = new JobConf(average.AverageDriver.class); // Name of the Job conf.setJobName("BookCrossing1.0"); // Data type of Output Key and Value conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); // Setting the Mapper and Reducer Class conf.setMapperClass(average.AverageMapper.class); conf.setReducerClass(average.AverageReducer.class); // Formats of the Data Type of Input and output conf.setInputFormat(TextInputFormat.class); conf.setOutputFormat(TextOutputFormat.class); // Specify input and output DIRECTORIES (not files) FileInputFormat.setInputPaths(conf, new Path(args[1])); FileOutputFormat.setOutputPath(conf, new Path(args[2])); client.setConf(conf);/* ww w. j a v a2 s. c o m*/ try { // Running the job with Configurations set in the conf. JobClient.runJob(conf); } catch (Exception e) { e.printStackTrace(); } }