List of usage examples for org.apache.hadoop.mapred JobConf setInt
public void setInt(String name, int value)
name
property to an int
. From source file:com.m6d.filecrush.crush.CrushReducerTest.java
License:Apache License
@Test public void missingInputRegex() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_14527_r_1234"); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.setLong("crush.timestamp", 98765); job.setLong("dfs.blocksize", 1024 * 1024 * 64L); job.setInt("crush.num.specs", 2); job.set("crush.0.regex", "foo"); job.set("crush.0.regex.replacement", "bar"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", TextOutputFormat.class.getName()); job.set("crush.1.regex.replacement", "bar"); job.set("crush.1.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.1.output.format", TextOutputFormat.class.getName()); reducer = new CrushReducer(); try {//from ww w .j ava 2 s . co m reducer.configure(job); fail(); } catch (IllegalArgumentException e) { if (!"No input regex: crush.1.regex".equals(e.getMessage())) { throw e; } } }
From source file:com.m6d.filecrush.crush.CrushReducerTest.java
License:Apache License
@Test public void missingOutputRegex() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_14527_r_1234"); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.setLong("crush.timestamp", 98765); job.setLong("dfs.blocksize", 1024 * 1024 * 64L); job.setInt("crush.num.specs", 2); job.set("crush.0.regex", "foo"); job.set("crush.0.regex.replacement", "bar"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", TextOutputFormat.class.getName()); job.set("crush.1.regex", "hello"); job.set("crush.1.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.1.output.format", TextOutputFormat.class.getName()); reducer = new CrushReducer(); try {/*from w w w .j a va 2 s.c om*/ reducer.configure(job); fail(); } catch (IllegalArgumentException e) { if (!"No output replacement: crush.1.regex.replacement".equals(e.getMessage())) { throw e; } } }
From source file:com.m6d.filecrush.crush.CrushReducerTest.java
License:Apache License
@Test public void missingInputFormat() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_14527_r_1234"); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.setLong("crush.timestamp", 98765); job.setLong("dfs.blocksize", 1024 * 1024 * 64L); job.setInt("crush.num.specs", 2); job.set("crush.0.regex", "foo"); job.set("crush.0.regex.replacement", "bar"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", SequenceFileOutputFormat.class.getName()); job.set("crush.1.regex", "hello"); job.set("crush.1.regex.replacement", "hello"); job.set("crush.1.output.format", SequenceFileOutputFormat.class.getName()); reducer = new CrushReducer(); try {/* w w w.j a v a 2s.c om*/ reducer.configure(job); fail(); } catch (IllegalArgumentException e) { if (!"No input format: crush.1.input.format".equals(e.getMessage())) { throw e; } } }
From source file:com.m6d.filecrush.crush.CrushReducerTest.java
License:Apache License
@Test public void inputFormatWrongType() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_14527_r_1234"); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.setLong("crush.timestamp", 98765); job.setLong("dfs.blocksize", 1024 * 1024 * 64L); job.setInt("crush.num.specs", 2); job.set("crush.0.regex", "foo"); job.set("crush.0.regex.replacement", "bar"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", SequenceFileOutputFormat.class.getName()); job.set("crush.1.regex", "hello"); job.set("crush.1.regex.replacement", "hello"); job.set("crush.1.input.format", Object.class.getName()); job.set("crush.1.output.format", SequenceFileOutputFormat.class.getName()); reducer = new CrushReducer(); try {/*from w w w.j a v a 2 s .co m*/ reducer.configure(job); fail(); } catch (IllegalArgumentException e) { if (!"Not a file input format: crush.1.input.format=java.lang.Object".equals(e.getMessage())) { throw e; } } }
From source file:com.m6d.filecrush.crush.CrushReducerTest.java
License:Apache License
@Test public void missingOutputFormat() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_14527_r_1234"); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.setLong("crush.timestamp", 98765); job.setLong("dfs.blocksize", 1024 * 1024 * 64L); job.setInt("crush.num.specs", 2); job.set("crush.0.regex", "foo"); job.set("crush.0.regex.replacement", "bar"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", SequenceFileOutputFormat.class.getName()); job.set("crush.1.regex", "hello"); job.set("crush.1.regex.replacement", "hello"); job.set("crush.1.input.format", SequenceFileInputFormat.class.getName()); reducer = new CrushReducer(); try {/*from ww w .ja v a2 s . c o m*/ reducer.configure(job); fail(); } catch (IllegalArgumentException e) { if (!"No output format: crush.1.output.format".equals(e.getMessage())) { throw e; } } }
From source file:com.m6d.filecrush.crush.CrushReducerTest.java
License:Apache License
@Test public void outputFormatWrongType() { JobConf job = new JobConf(false); job.set("mapred.tip.id", "task_201011081200_14527_r_1234"); job.set("fs.default.name", "file:///"); job.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem"); job.set("mapred.output.dir", outDir.getAbsolutePath()); job.setLong("crush.timestamp", 98765); job.setLong("dfs.blocksize", 1024 * 1024 * 64L); job.setInt("crush.num.specs", 2); job.set("crush.0.regex", "foo"); job.set("crush.0.regex.replacement", "bar"); job.set("crush.0.input.format", SequenceFileInputFormat.class.getName()); job.set("crush.0.output.format", SequenceFileOutputFormat.class.getName()); job.set("crush.1.regex", "hello"); job.set("crush.1.regex.replacement", "hello"); job.set("crush.1.input.format", TextInputFormat.class.getName()); job.set("crush.1.output.format", Object.class.getName()); reducer = new CrushReducer(); try {// w w w . ja v a 2 s .c o m reducer.configure(job); fail(); } catch (IllegalArgumentException e) { if (!"Not an output format: crush.1.output.format=java.lang.Object".equals(e.getMessage())) { throw e; } } }
From source file:com.mycompany.MyHadoopSamples1.TransposeJob.java
License:Apache License
public static Configuration buildTransposeJobConf(Configuration initialConf, Path matrixInputPath, Path matrixOutputPath, int numInputRows) throws IOException { JobConf conf = new JobConf(initialConf, TransposeJob.class); conf.setJobName("TransposeJob: " + matrixInputPath + " transpose -> " + matrixOutputPath); FileSystem fs = FileSystem.get(conf); matrixInputPath = fs.makeQualified(matrixInputPath); matrixOutputPath = fs.makeQualified(matrixOutputPath); conf.setInt(NUM_ROWS_KEY, numInputRows); FileInputFormat.addInputPath(conf, matrixInputPath); conf.setInputFormat(SequenceFileInputFormat.class); FileOutputFormat.setOutputPath(conf, matrixOutputPath); System.out.println("OUTPUT --> " + matrixOutputPath.toString()); conf.setMapperClass(TransposeMapper.class); conf.setMapOutputKeyClass(IntWritable.class); conf.setMapOutputValueClass(VectorWritable.class); conf.setCombinerClass(MergeVectorsCombiner.class); conf.setReducerClass(MergeVectorsReducer.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(IntWritable.class); conf.setOutputValueClass(VectorWritable.class); return conf;/* w ww .j ava2 s . c o m*/ }
From source file:com.pinterest.hdfsbackup.distcp.DistCp.java
License:Apache License
/** * Initialize DFSCopyFileMapper specific job-configuration. * @param conf : The dfs/mapred configuration. * @param jobConf : The handle to the jobConf object to be initialized. * @param args Arguments/*from www .j a v a 2 s . co m*/ */ private static void setup(Configuration conf, JobConf jobConf, final Arguments args) throws IOException { jobConf.set(DST_DIR_LABEL, args.dst.toUri().toString()); //set boolean values final boolean update = args.flags.contains(Options.UPDATE); final boolean overwrite = !update && args.flags.contains(Options.OVERWRITE); jobConf.setBoolean(Options.UPDATE.propertyname, update); jobConf.setBoolean(Options.OVERWRITE.propertyname, overwrite); jobConf.setBoolean(Options.IGNORE_READ_FAILURES.propertyname, args.flags.contains(Options.IGNORE_READ_FAILURES)); jobConf.setBoolean(Options.PRESERVE_STATUS.propertyname, args.flags.contains(Options.PRESERVE_STATUS)); final String randomId = getRandomId(); JobClient jClient = new JobClient(jobConf); Path jobDirectory = new Path(jClient.getSystemDir(), NAME + "_" + randomId); jobConf.set(JOB_DIR_LABEL, jobDirectory.toString()); FileSystem dstfs = args.dst.getFileSystem(conf); boolean dstExists = dstfs.exists(args.dst); boolean dstIsDir = false; if (dstExists) { dstIsDir = dstfs.getFileStatus(args.dst).isDir(); } // default logPath Path logPath = args.log; if (logPath == null) { String filename = "_distcp_logs_" + randomId; if (!dstExists || !dstIsDir) { Path parent = args.dst.getParent(); if (!dstfs.exists(parent)) { dstfs.mkdirs(parent); } logPath = new Path(parent, filename); } else { logPath = new Path(args.dst, filename); } } FileOutputFormat.setOutputPath(jobConf, logPath); // create src list, dst list FileSystem jobfs = jobDirectory.getFileSystem(jobConf); Path srcfilelist = new Path(jobDirectory, "_distcp_src_files"); jobConf.set(SRC_LIST_LABEL, srcfilelist.toString()); SequenceFile.Writer src_writer = SequenceFile.createWriter(jobfs, jobConf, srcfilelist, LongWritable.class, FilePair.class, SequenceFile.CompressionType.NONE); Path dstfilelist = new Path(jobDirectory, "_distcp_dst_files"); SequenceFile.Writer dst_writer = SequenceFile.createWriter(jobfs, jobConf, dstfilelist, Text.class, Text.class, SequenceFile.CompressionType.NONE); Path dstdirlist = new Path(jobDirectory, "_distcp_dst_dirs"); jobConf.set(DST_DIR_LIST_LABEL, dstdirlist.toString()); SequenceFile.Writer dir_writer = SequenceFile.createWriter(jobfs, jobConf, dstdirlist, Text.class, FilePair.class, SequenceFile.CompressionType.NONE); // handle the case where the destination directory doesn't exist // and we've only a single src directory OR we're updating/overwriting // the contents of the destination directory. final boolean special = (args.srcs.size() == 1 && !dstExists) || update || overwrite; int srcCount = 0, cnsyncf = 0, dirsyn = 0; long fileCount = 0L, byteCount = 0L, cbsyncs = 0L; try { for (Iterator<Path> srcItr = args.srcs.iterator(); srcItr.hasNext();) { final Path src = srcItr.next(); FileSystem srcfs = src.getFileSystem(conf); FileStatus srcfilestat = srcfs.getFileStatus(src); Path root = special && srcfilestat.isDir() ? src : src.getParent(); if (srcfilestat.isDir()) { ++srcCount; } Stack<FileStatus> pathstack = new Stack<FileStatus>(); for (pathstack.push(srcfilestat); !pathstack.empty();) { FileStatus cur = pathstack.pop(); FileStatus[] children = srcfs.listStatus(cur.getPath()); for (int i = 0; i < children.length; i++) { boolean skipfile = false; final FileStatus child = children[i]; final String dst = makeRelative(root, child.getPath()); ++srcCount; if (child.isDir()) { pathstack.push(child); } else { //skip file if the src and the dst files are the same. skipfile = update && sameFile(srcfs, child, dstfs, new Path(args.dst, dst)); //skip file if it exceed file limit or size limit skipfile |= fileCount == args.filelimit || byteCount + child.getLen() > args.sizelimit; if (!skipfile) { ++fileCount; byteCount += child.getLen(); if (LOG.isTraceEnabled()) { LOG.trace("adding file " + child.getPath()); } ++cnsyncf; cbsyncs += child.getLen(); if (cnsyncf > SYNC_FILE_MAX || cbsyncs > BYTES_PER_MAP) { src_writer.sync(); dst_writer.sync(); cnsyncf = 0; cbsyncs = 0L; } } } if (!skipfile) { src_writer.append(new LongWritable(child.isDir() ? 0 : child.getLen()), new FilePair(child, dst)); } dst_writer.append(new Text(dst), new Text(child.getPath().toString())); } if (cur.isDir()) { String dst = makeRelative(root, cur.getPath()); dir_writer.append(new Text(dst), new FilePair(cur, dst)); if (++dirsyn > SYNC_FILE_MAX) { dirsyn = 0; dir_writer.sync(); } } } } } finally { checkAndClose(src_writer); checkAndClose(dst_writer); checkAndClose(dir_writer); } FileStatus dststatus = null; try { dststatus = dstfs.getFileStatus(args.dst); } catch (FileNotFoundException fnfe) { LOG.info(args.dst + " does not exist."); } // create dest path dir if copying > 1 file if (dststatus == null) { if (srcCount > 1 && !dstfs.mkdirs(args.dst)) { throw new IOException("Failed to create" + args.dst); } } final Path sorted = new Path(jobDirectory, "_distcp_sorted"); checkDuplication(jobfs, dstfilelist, sorted, conf); if (dststatus != null && args.flags.contains(Options.DELETE)) { deleteNonexisting(dstfs, dststatus, sorted, jobfs, jobDirectory, jobConf, conf); } Path tmpDir = new Path( (dstExists && !dstIsDir) || (!dstExists && srcCount == 1) ? args.dst.getParent() : args.dst, "_distcp_tmp_" + randomId); jobConf.set(TMP_DIR_LABEL, tmpDir.toUri().toString()); LOG.info("srcCount=" + srcCount); jobConf.setInt(SRC_COUNT_LABEL, srcCount); jobConf.setLong(TOTAL_SIZE_LABEL, byteCount); setMapCount(byteCount, jobConf); }
From source file:com.ricemap.spateDB.operations.LineRandomizer.java
License:Apache License
/** * Counts the exact number of lines in a file by issuing a MapReduce job * that does the thing//from ww w . j ava2 s . co m * @param conf * @param infs * @param infile * @return * @throws IOException */ public static void randomizerMapReduce(Path infile, Path outfile, boolean overwrite) throws IOException { JobConf job = new JobConf(LineRandomizer.class); FileSystem outfs = outfile.getFileSystem(job); if (overwrite) outfs.delete(outfile, true); job.setJobName("Randomizer"); job.setMapOutputKeyClass(IntWritable.class); job.setMapOutputValueClass(Text.class); job.setMapperClass(Map.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setReducerClass(Reduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); FileSystem infs = infile.getFileSystem(job); int numOfPartitions = (int) Math .ceil((double) infs.getFileStatus(infile).getLen() / infs.getDefaultBlockSize(outfile)); job.setInt(NumOfPartitions, numOfPartitions); job.setInputFormat(TextInputFormat.class); TextInputFormat.setInputPaths(job, infile); job.setOutputFormat(TextOutputFormat.class); TextOutputFormat.setOutputPath(job, outfile); // Submit the job JobClient.runJob(job); }
From source file:com.ricemap.spateDB.operations.Plot.java
License:Apache License
public static <S extends Shape> void plotMapReduce(Path inFile, Path outFile, Shape shape, int width, int height, Color color, boolean showBorders, boolean showBlockCount, boolean showRecordCount, boolean background) throws IOException { JobConf job = new JobConf(Plot.class); job.setJobName("Plot"); job.setMapperClass(PlotMap.class); ClusterStatus clusterStatus = new JobClient(job).getClusterStatus(); job.setNumMapTasks(clusterStatus.getMaxMapTasks() * 5); job.setReducerClass(PlotReduce.class); job.setNumReduceTasks(Math.max(1, clusterStatus.getMaxReduceTasks())); job.setMapOutputKeyClass(Prism.class); SpatialSite.setShapeClass(job, shape.getClass()); job.setMapOutputValueClass(shape.getClass()); FileSystem inFs = inFile.getFileSystem(job); Prism fileMbr = FileMBR.fileMBRMapReduce(inFs, inFile, shape, false); FileStatus inFileStatus = inFs.getFileStatus(inFile); CellInfo[] cellInfos;// w w w .jav a2 s .c o m GlobalIndex<Partition> gindex = SpatialSite.getGlobalIndex(inFs, inFile); if (gindex == null) { // A heap file. The map function should partition the file GridInfo gridInfo = new GridInfo(fileMbr.t1, fileMbr.x1, fileMbr.y1, fileMbr.t2, fileMbr.x2, fileMbr.y2); gridInfo.calculateCellDimensions(inFileStatus.getLen(), inFileStatus.getBlockSize()); cellInfos = gridInfo.getAllCells(); // Doesn't make sense to show any partition information in a heap file showBorders = showBlockCount = showRecordCount = false; } else { cellInfos = SpatialSite.cellsOf(inFs, inFile); } // Set cell information in the job configuration to be used by the mapper SpatialSite.setCells(job, cellInfos); // Adjust width and height to maintain aspect ratio if ((fileMbr.x2 - fileMbr.x1) / (fileMbr.y2 - fileMbr.y1) > (double) width / height) { // Fix width and change height height = (int) ((fileMbr.y2 - fileMbr.y1) * width / (fileMbr.x2 - fileMbr.x1)); } else { width = (int) ((fileMbr.x2 - fileMbr.x1) * height / (fileMbr.y2 - fileMbr.y1)); } LOG.info("Creating an image of size " + width + "x" + height); ImageOutputFormat.setFileMBR(job, fileMbr); ImageOutputFormat.setImageWidth(job, width); ImageOutputFormat.setImageHeight(job, height); job.setBoolean(ShowBorders, showBorders); job.setBoolean(ShowBlockCount, showBlockCount); job.setBoolean(ShowRecordCount, showRecordCount); job.setInt(StrokeColor, color.getRGB()); // Set input and output job.setInputFormat(ShapeInputFormat.class); ShapeInputFormat.addInputPath(job, inFile); // Set output committer which will stitch images together after all reducers // finish job.setOutputCommitter(PlotOutputCommitter.class); job.setOutputFormat(ImageOutputFormat.class); TextOutputFormat.setOutputPath(job, outFile); if (background) { JobClient jc = new JobClient(job); lastSubmittedJob = jc.submitJob(job); } else { lastSubmittedJob = JobClient.runJob(job); } }