List of usage examples for org.apache.hadoop.mapred JobConf getInt
public int getInt(String name, int defaultValue)
name
property as an int
. From source file:com.hadoopilluminated.examples.Join.java
License:Apache License
/** * The main driver for sort program. Invoke this method to submit the * map/reduce job./*from w ww . j a v a 2 s.c o m*/ * * @throws IOException When there is communication problems with the job * tracker. */ @Override public int run(String[] args) throws Exception { JobConf jobConf = new JobConf(getConf(), Sort.class); jobConf.setJobName("join"); jobConf.setMapperClass(IdentityMapper.class); jobConf.setReducerClass(IdentityReducer.class); JobClient client = new JobClient(jobConf); ClusterStatus cluster = client.getClusterStatus(); int num_maps = cluster.getTaskTrackers() * jobConf.getInt("test.sort.maps_per_host", 10); int num_reduces = (int) (cluster.getMaxReduceTasks() * 0.9); String sort_reduces = jobConf.get("test.sort.reduces_per_host"); if (sort_reduces != null) { num_reduces = cluster.getTaskTrackers() * Integer.parseInt(sort_reduces); } Class<? extends InputFormat> inputFormatClass = SequenceFileInputFormat.class; Class<? extends OutputFormat> outputFormatClass = SequenceFileOutputFormat.class; Class<? extends WritableComparable> outputKeyClass = BytesWritable.class; Class<? extends Writable> outputValueClass = TupleWritable.class; String op = "inner"; List<String> otherArgs = new ArrayList<String>(); for (int i = 0; i < args.length; ++i) { try { if ("-m".equals(args[i])) { num_maps = Integer.parseInt(args[++i]); } else if ("-r".equals(args[i])) { num_reduces = Integer.parseInt(args[++i]); } else if ("-inFormat".equals(args[i])) { inputFormatClass = Class.forName(args[++i]).asSubclass(InputFormat.class); } else if ("-outFormat".equals(args[i])) { outputFormatClass = Class.forName(args[++i]).asSubclass(OutputFormat.class); } else if ("-outKey".equals(args[i])) { outputKeyClass = Class.forName(args[++i]).asSubclass(WritableComparable.class); } else if ("-outValue".equals(args[i])) { outputValueClass = Class.forName(args[++i]).asSubclass(Writable.class); } else if ("-joinOp".equals(args[i])) { op = args[++i]; } else { otherArgs.add(args[i]); } } catch (NumberFormatException except) { System.out.println("ERROR: Integer expected instead of " + args[i]); return printUsage(); } catch (ArrayIndexOutOfBoundsException except) { System.out.println("ERROR: Required parameter missing from " + args[i - 1]); return printUsage(); // exits } } // Set user-supplied (possibly default) job configs jobConf.setNumMapTasks(num_maps); jobConf.setNumReduceTasks(num_reduces); if (otherArgs.size() < 2) { System.out.println("ERROR: Wrong number of parameters: "); return printUsage(); } FileOutputFormat.setOutputPath(jobConf, new Path(otherArgs.remove(otherArgs.size() - 1))); List<Path> plist = new ArrayList<Path>(otherArgs.size()); for (String s : otherArgs) { plist.add(new Path(s)); } jobConf.setInputFormat(CompositeInputFormat.class); jobConf.set("mapred.join.expr", CompositeInputFormat.compose(op, inputFormatClass, plist.toArray(new Path[0]))); jobConf.setOutputFormat(outputFormatClass); jobConf.setOutputKeyClass(outputKeyClass); jobConf.setOutputValueClass(outputValueClass); Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(jobConf); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println("The job took " + (end_time.getTime() - startTime.getTime()) / 1000 + " seconds."); return 0; }
From source file:com.hdfs.concat.crush.CrushReducer.java
License:Apache License
@Override public void configure(JobConf job) { super.configure(job); this.job = job; taskNum = Integer.parseInt(job.get("mapred.tip.id").replaceFirst(".+_(\\d+)", "$1")); timestamp = Long.parseLong(job.get("crush.timestamp")); outDirPath = job.get("mapred.output.dir"); if (null == outDirPath || outDirPath.isEmpty()) { throw new IllegalArgumentException("mapred.output.dir has no value"); }/* ww w.j a va 2s. c o m*/ /* * The files we write should be rooted in the "crush" subdir of the output directory to distinguish them from the files * created by the collector. */ outDirPath = new Path(outDirPath + "/crush").toUri().getPath(); /* * Configure the regular expressions and replacements we use to convert dir names to crush output file names. Also get the * directory data formats. */ int numSpecs = job.getInt("crush.num.specs", 0); if (numSpecs <= 0) { throw new IllegalArgumentException( "Number of regular expressions must be zero or greater: " + numSpecs); } readCrushSpecs(numSpecs); placeHolderToValue.put("crush.task.num", Integer.toString(taskNum)); placeHolderToValue.put("crush.timestamp", job.get("crush.timestamp")); try { fs = FileSystem.get(job); } catch (RuntimeException e) { throw e; } catch (Exception e) { throw new RuntimeException(e); } }
From source file:com.hyperiongray.ccmr.s3wordcount.WordCountOnlyMapper.java
License:Apache License
public void configure(JobConf job) { String keywordsfileContent = job.get("keywordsFileContent"); contentMatcher = new ContentMatcher(keywordsfileContent); sampleSize = job.getInt("sampleSize", 100); logger.info("Running with sampleSize of:" + sampleSize); }
From source file:com.ibm.bi.dml.runtime.io.WriterBinaryBlock.java
License:Open Source License
/** * /* w ww .j a v a 2 s .c o m*/ * @param path * @param job * @param src * @param rlen * @param clen * @param brlen * @param bclen * @throws IOException * @throws DMLUnsupportedOperationException * @throws DMLRuntimeException */ @SuppressWarnings("deprecation") protected void writeBinaryBlockMatrixToHDFS(Path path, JobConf job, MatrixBlock src, long rlen, long clen, int brlen, int bclen, int replication) throws IOException, DMLRuntimeException, DMLUnsupportedOperationException { boolean sparse = src.isInSparseFormat(); FileSystem fs = FileSystem.get(job); //set up preferred custom serialization framework for binary block format if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION) MRJobConfiguration.addBinaryBlockSerializationFramework(job); // 1) create sequence file writer, with right replication factor // (config via 'dfs.replication' not possible since sequence file internally calls fs.getDefaultReplication()) SequenceFile.Writer writer = null; if (replication > 0) //if replication specified (otherwise default) { //copy of SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class), except for replication writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class, job.getInt("io.file.buffer.size", 4096), (short) replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata()); } else { writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class); } try { // 2) bound check for src block if (src.getNumRows() > rlen || src.getNumColumns() > clen) { throw new IOException("Matrix block [1:" + src.getNumRows() + ",1:" + src.getNumColumns() + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "]."); } //3) reblock and write MatrixIndexes indexes = new MatrixIndexes(); if (rlen <= brlen && clen <= bclen) //opt for single block { //directly write single block indexes.setIndexes(1, 1); writer.append(indexes, src); } else //general case { //initialize blocks for reuse (at most 4 different blocks required) MatrixBlock[] blocks = createMatrixBlocksForReuse(rlen, clen, brlen, bclen, sparse, src.getNonZeros()); //create and write subblocks of matrix for (int blockRow = 0; blockRow < (int) Math.ceil(src.getNumRows() / (double) brlen); blockRow++) for (int blockCol = 0; blockCol < (int) Math .ceil(src.getNumColumns() / (double) bclen); blockCol++) { int maxRow = (blockRow * brlen + brlen < src.getNumRows()) ? brlen : src.getNumRows() - blockRow * brlen; int maxCol = (blockCol * bclen + bclen < src.getNumColumns()) ? bclen : src.getNumColumns() - blockCol * bclen; int row_offset = blockRow * brlen; int col_offset = blockCol * bclen; //get reuse matrix block MatrixBlock block = getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen); //copy submatrix to block src.sliceOperations(row_offset, row_offset + maxRow - 1, col_offset, col_offset + maxCol - 1, block); //append block to sequence file indexes.setIndexes(blockRow + 1, blockCol + 1); writer.append(indexes, block); //reset block for later reuse block.reset(); } } } finally { IOUtilFunctions.closeSilently(writer); } }
From source file:com.ibm.bi.dml.runtime.io.WriterBinaryBlock.java
License:Open Source License
/** * //from ww w. ja va2 s . c om * @param path * @param job * @param src * @param rlen * @param clen * @param brlen * @param bclen * @param replication * @throws IOException * @throws DMLUnsupportedOperationException * @throws DMLRuntimeException */ @SuppressWarnings("deprecation") protected void writeDiagBinaryBlockMatrixToHDFS(Path path, JobConf job, MatrixBlock src, long rlen, long clen, int brlen, int bclen, int replication) throws IOException, DMLRuntimeException, DMLUnsupportedOperationException { boolean sparse = src.isInSparseFormat(); FileSystem fs = FileSystem.get(job); //set up preferred custom serialization framework for binary block format if (MRJobConfiguration.USE_BINARYBLOCK_SERIALIZATION) MRJobConfiguration.addBinaryBlockSerializationFramework(job); // 1) create sequence file writer, with right replication factor // (config via 'dfs.replication' not possible since sequence file internally calls fs.getDefaultReplication()) SequenceFile.Writer writer = null; if (replication > 0) //if replication specified (otherwise default) { //copy of SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class), except for replication writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class, job.getInt("io.file.buffer.size", 4096), (short) replication, fs.getDefaultBlockSize(), null, new SequenceFile.Metadata()); } else { writer = new SequenceFile.Writer(fs, job, path, MatrixIndexes.class, MatrixBlock.class); } try { // 2) bound check for src block if (src.getNumRows() > rlen || src.getNumColumns() > clen) { throw new IOException("Matrix block [1:" + src.getNumRows() + ",1:" + src.getNumColumns() + "] " + "out of overall matrix range [1:" + rlen + ",1:" + clen + "]."); } //3) reblock and write MatrixIndexes indexes = new MatrixIndexes(); if (rlen <= brlen && clen <= bclen) //opt for single block { //directly write single block indexes.setIndexes(1, 1); writer.append(indexes, src); } else //general case { //initialize blocks for reuse (at most 4 different blocks required) MatrixBlock[] blocks = createMatrixBlocksForReuse(rlen, clen, brlen, bclen, sparse, src.getNonZeros()); MatrixBlock emptyBlock = new MatrixBlock(); //create and write subblocks of matrix for (int blockRow = 0; blockRow < (int) Math.ceil(src.getNumRows() / (double) brlen); blockRow++) for (int blockCol = 0; blockCol < (int) Math .ceil(src.getNumColumns() / (double) bclen); blockCol++) { int maxRow = (blockRow * brlen + brlen < src.getNumRows()) ? brlen : src.getNumRows() - blockRow * brlen; int maxCol = (blockCol * bclen + bclen < src.getNumColumns()) ? bclen : src.getNumColumns() - blockCol * bclen; MatrixBlock block = null; if (blockRow == blockCol) //block on diagonal { int row_offset = blockRow * brlen; int col_offset = blockCol * bclen; //get reuse matrix block block = getMatrixBlockForReuse(blocks, maxRow, maxCol, brlen, bclen); //copy submatrix to block src.sliceOperations(row_offset, row_offset + maxRow - 1, col_offset, col_offset + maxCol - 1, block); } else //empty block (not on diagonal) { block = emptyBlock; block.reset(maxRow, maxCol); } //append block to sequence file indexes.setIndexes(blockRow + 1, blockCol + 1); writer.append(indexes, block); //reset block for later reuse if (blockRow != blockCol) block.reset(); } } } finally { IOUtilFunctions.closeSilently(writer); } }
From source file:com.ibm.bi.dml.runtime.matrix.data.UnPaddedOutputFormat.java
License:Open Source License
@Override public RecordWriter<K, V> getRecordWriter(FileSystem ignored, JobConf job, String name, Progressable progress) throws IOException { Path file = FileOutputFormat.getTaskOutputPath(job, name); FileSystem fs = file.getFileSystem(job); FSDataOutputStream fileOut = fs.create(file, true, job.getInt("io.file.buffer.size", 4096), progress); return new UnpaddedRecordWriter<K, V>(fileOut); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static final int getMiscMemRequired(JobConf job) { return job.getInt("io.file.buffer.size", 4096); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static int getNumRowsPerBlock(JobConf job, byte matrixIndex) { return job.getInt(INPUT_BLOCK_NUM_ROW_PREFIX_CONFIG + matrixIndex, 1); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static int getNumColumnsPerBlock(JobConf job, byte matrixIndex) { return job.getInt(INPUT_BLOCK_NUM_COLUMN_PREFIX_CONFIG + matrixIndex, 1); }
From source file:com.ibm.bi.dml.runtime.matrix.mapred.MRJobConfiguration.java
License:Open Source License
public static MatrixCharacteristics getIntermediateMatrixCharactristics(JobConf job, byte tag) { MatrixCharacteristics dim = new MatrixCharacteristics(); dim.setDimension(job.getLong(INTERMEDIATE_MATRIX_NUM_ROW_PREFIX_CONFIG + tag, 0), job.getLong(INTERMEDIATE_MATRIX_NUM_COLUMN_PREFIX_CONFIG + tag, 0)); dim.setBlockSize(job.getInt(INTERMEDIATE_BLOCK_NUM_ROW_PREFIX_CONFIG + tag, 1), job.getInt(INTERMEDIATE_BLOCK_NUM_COLUMN_PREFIX_CONFIG + tag, 1)); return dim;//from w w w . j av a2 s . co m }