List of usage examples for org.apache.hadoop.mapreduce Job setJobName
public void setJobName(String name) throws IllegalStateException
From source file:com.sequenceiq.yarntest.mr.QuasiMonteCarlo.java
License:Apache License
/** * Run a map/reduce job for estimating Pi. * * @return the estimated value of Pi/*from w ww .ja v a2s.c o m*/ */ public static JobID submitPiEstimationMRApp(String jobName, int numMaps, long numPoints, Path tmpDir, Configuration conf) throws IOException, ClassNotFoundException, InterruptedException { Job job = new Job(conf); //setup job conf job.setJobName(jobName); job.setJarByClass(QuasiMonteCarlo.class); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputKeyClass(BooleanWritable.class); job.setOutputValueClass(LongWritable.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapperClass(QmcMapper.class); job.setReducerClass(QmcReducer.class); job.setNumReduceTasks(1); // turn off speculative execution, because DFS doesn't handle // multiple writers to the same file. job.setSpeculativeExecution(false); //setup input/output directories final Path inDir = new Path(tmpDir, "in"); final Path outDir = new Path(tmpDir, "out"); FileInputFormat.setInputPaths(job, inDir); FileOutputFormat.setOutputPath(job, outDir); final FileSystem fs = FileSystem.get(conf); if (fs.exists(tmpDir)) { fs.delete(tmpDir, true); // throw new IOException("Tmp directory " + fs.makeQualified(tmpDir) // + " already exists. Please remove it first."); } if (!fs.mkdirs(inDir)) { throw new IOException("Cannot create input directory " + inDir); } // try { //generate an input file for each map task for (int i = 0; i < numMaps; ++i) { final Path file = new Path(inDir, "part" + i); final LongWritable offset = new LongWritable(i * numPoints); final LongWritable size = new LongWritable(numPoints); final SequenceFile.Writer writer = SequenceFile.createWriter(fs, conf, file, LongWritable.class, LongWritable.class, CompressionType.NONE); try { writer.append(offset, size); } finally { writer.close(); } System.out.println("Wrote input for Map #" + i); } //start a map/reduce job System.out.println("Starting Job"); final long startTime = System.currentTimeMillis(); job.submit(); // final double duration = (System.currentTimeMillis() - startTime)/1000.0; // System.out.println("Job Finished in " + duration + " seconds"); return job.getJobID(); // } finally { // fs.delete(tmpDir, true); // } }
From source file:com.shmsoft.dmass.main.MRFreeEedProcess.java
License:Apache License
@Override public int run(String[] args) throws Exception { // inventory dir holds all package (zip) files resulting from stage String projectFileName = args[0]; String outputPath = args[1];//from w ww. j a v a 2s . co m logger.info("Running Hadoop job"); logger.info("Input project file = " + projectFileName); logger.info("Output path = " + outputPath); // Hadoop configuration class Configuration configuration = getConf(); // No speculative execution! Do not process the same file twice configuration.set("mapred.reduce.tasks.speculative.execution", "false"); // TODO even in local mode, the first argument should not be the inventory // but write a complete project file instead Project project = Project.getProject(); if (project == null || project.isEmpty()) { // configure Hadoop input files System.out.println("Reading project file " + projectFileName); project = new Project().loadFromFile(new File(projectFileName)); Project.setProject(project); } project.setProperty(ParameterProcessing.OUTPUT_DIR_HADOOP, outputPath); // send complete project information to all mappers and reducers configuration.set(ParameterProcessing.PROJECT, project.toString()); Settings.load(); configuration.set(ParameterProcessing.SETTINGS_STR, Settings.getSettings().toString()); configuration.set(ParameterProcessing.METADATA_FILE, Files.toString(new File(ColumnMetadata.metadataNamesFile), Charset.defaultCharset())); Job job = new Job(configuration); job.setJarByClass(MRFreeEedProcess.class); job.setJobName("MRFreeEedProcess"); // Hadoop processes key-value pairs job.setOutputKeyClass(MD5Hash.class); job.setOutputValueClass(MapWritable.class); // set map and reduce classes job.setMapperClass(Map.class); job.setReducerClass(Reduce.class); // Hadoop TextInputFormat class job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); // String delim = "\u0001"; // configuration.set("mapred.textoutputformat.separator", delim); // configuration.set("mapreduce.output.textoutputformat.separator", delim); logger.debug("project.isEnvHadoop() = {} ", project.isEnvHadoop()); String inputPath = projectFileName; if (project.isEnvHadoop() || Settings.getSettings().isHadoopDebug()) { inputPath = formInputPath(project); } logger.debug("Ready to run, inputPath = {}, outputPath = {}", inputPath, outputPath); FileInputFormat.setInputPaths(job, inputPath); FileOutputFormat.setOutputPath(job, new Path(outputPath)); SHMcloudLogging.init(false); if (Settings.getSettings().isHadoopDebug()) { if (new File(outputPath).exists()) { Util.deleteDirectory(new File(outputPath)); } } SolrIndex.getInstance().init(); boolean success = job.waitForCompletion(true); if (project.isEnvHadoop() && project.isFsS3()) { transferResultsToS3(outputPath); } SolrIndex.getInstance().destroy(); return success ? 0 : 1; }
From source file:com.shopzilla.hadoop.mapreduce.MiniMRClusterContextMRTest.java
License:Apache License
@Test public void testWordCount() throws Exception { Path input = new Path("/user/test/keywords_data"); Path output = new Path("/user/test/word_count"); Job job = new Job(configuration); job.setJobName("Word Count Test"); job.setMapperClass(WordCountMapper.class); job.setReducerClass(SumReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LongWritable.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); job.setNumReduceTasks(1);/*from w w w. jav a 2 s . c o m*/ FileInputFormat.setInputPaths(job, input); FileOutputFormat.setOutputPath(job, output); assertTrue("All files from /data classpath directory should have been copied into HDFS", miniMRClusterContext.getFileSystem().exists(input)); job.waitForCompletion(true); assertTrue("Output file should have been created", miniMRClusterContext.getFileSystem().exists(output)); final LinkedList<String> expectedLines = new LinkedList<String>(); expectedLines.add("goodbye\t1"); expectedLines.add("hello\t1"); expectedLines.add("world\t2"); miniMRClusterContext.processData(output, new Function<String, Void>() { @Override public Void apply(String line) { assertEquals(expectedLines.pop(), line); return null; } }); assertEquals(0, expectedLines.size()); }
From source file:com.skp.experiment.cf.als.hadoop.DistributedParallelALSFactorizationJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();/*from w w w. j a va 2 s . c om*/ addOutputOption(); addOption("lambda", null, "regularization parameter", true); addOption("implicitFeedback", null, "data consists of implicit feedback?", String.valueOf(false)); addOption("alpha", null, "confidence parameter (only used on implicit feedback)", String.valueOf(40)); addOption("numFeatures", null, "dimension of the feature space", true); addOption("numIterations", null, "number of iterations", true); addOption("numUsers", null, "number of users", true); addOption("numItems", null, "number of items", true); addOption("blockSize", null, "dfs block size.", false); //addOption("runIterations", null, "true or false for iterations", true); Map<String, String> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } numFeatures = Integer.parseInt(parsedArgs.get("--numFeatures")); numIterations = Integer.parseInt(parsedArgs.get("--numIterations")); lambda = Double.parseDouble(parsedArgs.get("--lambda")); alpha = Double.parseDouble(parsedArgs.get("--alpha")); implicitFeedback = Boolean.parseBoolean(parsedArgs.get("--implicitFeedback")); numUsers = Integer.parseInt(parsedArgs.get("--numUsers")); numItems = Integer.parseInt(parsedArgs.get("--numItems")); dfsBlockSize = getOption("blockSize") == null ? 64 * 1024 * 1024 : Long.parseLong(getOption("blockSize")); /* * compute the factorization A = U M' * * where A (users x items) is the matrix of known ratings * U (users x features) is the representation of users in the feature space * M (items x features) is the representation of items in the feature space */ /* create A' */ Job itemRatings = prepareJob(getInputPath(), pathToItemRatings(), TextInputFormat.class, ItemRatingVectorsMapper.class, IntWritable.class, VectorWritable.class, VectorSumReducer.class, IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class); itemRatings.setCombinerClass(VectorSumReducer.class); itemRatings.waitForCompletion(true); //numItems = // (int) itemRatings.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue(); log.info("Number of Items\t{}", numItems); /* create A */ Job userRatings = prepareJob(pathToItemRatings(), pathToUserRatings(), TransposeMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class, VectorWritable.class); userRatings.setCombinerClass(MergeVectorsCombiner.class); userRatings.waitForCompletion(true); //numUsers = // (int) userRatings.getCounters().findCounter("org.apache.hadoop.mapred.Task$Counter", "REDUCE_OUTPUT_RECORDS").getValue(); log.info("Number of Users\t{}", numUsers); /* count item per user */ Job userItemCntsJob = prepareJob(pathToUserRatings(), getOutputPath("userItemCnts"), SequenceFileInputFormat.class, UserItemCntsMapper.class, IntWritable.class, IntWritable.class, UserItemCntsReducer.class, IntWritable.class, IntWritable.class, SequenceFileOutputFormat.class); userItemCntsJob.setJobName("user ratings count"); userItemCntsJob.setCombinerClass(UserItemCntsReducer.class); userItemCntsJob.waitForCompletion(true); //TODO this could be fiddled into one of the upper jobs Job averageItemRatings = prepareJob(pathToItemRatings(), getTempPath("averageRatings"), AverageRatingMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class, VectorWritable.class); averageItemRatings.setCombinerClass(MergeVectorsCombiner.class); averageItemRatings.waitForCompletion(true); Vector averageRatings = ALSMatrixUtil.readFirstRow(getTempPath("averageRatings"), getConf()); /* create an initial M */ initializeM(averageRatings); for (int currentIteration = 0; currentIteration < numIterations; currentIteration++) { DistributedRowMatrix curM = new DistributedRowMatrix(pathToM(currentIteration - 1), getTempPath("Mtemp" + String.valueOf(currentIteration - 1)), numItems, numFeatures); curM.setConf(new Configuration()); DistributedRowMatrix YtransposeY = curM.times(curM); // broadcast M, read A row-wise, recompute U row-wise // log.info("Recomputing U (iteration {}/{})", currentIteration, numIterations); runSolver(pathToUserRatings(), pathToU(currentIteration), pathToM(currentIteration - 1), YtransposeY.getRowPath(), numItems); DistributedRowMatrix curU = new DistributedRowMatrix(pathToU(currentIteration), getTempPath("Utmp" + String.valueOf(currentIteration)), numUsers, numFeatures); curU.setConf(new Configuration()); DistributedRowMatrix XtransposeX = curU.times(curU); // set up index of U // CreateMapFileFromSeq.createMapFile(pathToU(currentIteration)); // broadcast U, read A' row-wise, recompute M row-wise // log.info("Recomputing M (iteration {}/{})", currentIteration, numIterations); runDistributedImplicitSolver(pathToItemRatings(), pathToM(currentIteration), pathToU(currentIteration), XtransposeX.getRowPath(), numUsers); } return 0; }
From source file:com.skp.experiment.cf.als.hadoop.ParallelALSFactorizationJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();/*from www. j a va 2 s . c o m*/ addOutputOption(); addOption("lambda", null, "regularization parameter", true); addOption("implicitFeedback", null, "data consists of implicit feedback?", String.valueOf(false)); addOption("alpha", null, "confidence parameter (only used on implicit feedback)", String.valueOf(40)); addOption("numFeatures", null, "dimension of the feature space", true); addOption("numIterations", null, "number of iterations", true); addOption("indexSizes", null, "index sizes Path", true); addOption("startIteration", null, "start iteration number", String.valueOf(0)); addOption("oldM", null, "old M matrix Path.", null); addOption("largeUserFeatures", null, "true if user x feature matrix is too large for memory", String.valueOf(true)); addOption("rmseCurve", null, "true if want to extract rmse curve", String.valueOf(true)); addOption("cleanUp", null, "true if want to clean up temporary matrix", String.valueOf(true)); addOption("useTransform", null, "true if using logarithm as transform", String.valueOf(true)); addOption("rateIndex", null, "0 based index for rate column in input file.", String.valueOf(2)); Map<String, String> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } try { /** step 0: fetch dimention of training set matrix. */ Map<String, String> indexSizesTmp = ALSMatrixUtil.fetchTextFiles(new Path(getOption("indexSizes")), DELIMETER, Arrays.asList(0), Arrays.asList(1)); numFeatures = Integer.parseInt(parsedArgs.get("--numFeatures")); numIterations = Integer.parseInt(parsedArgs.get("--numIterations")); lambda = Double.parseDouble(parsedArgs.get("--lambda")); alpha = Double.parseDouble(parsedArgs.get("--alpha")); implicitFeedback = Boolean.parseBoolean(parsedArgs.get("--implicitFeedback")); numUsers = Integer.parseInt(indexSizesTmp.get("0")); numItems = Integer.parseInt(indexSizesTmp.get("1")); numTaskTrackers = HadoopClusterUtil.getNumberOfTaskTrackers(getConf()) * multiplyMapTasks; startIteration = Integer.parseInt(parsedArgs.get("--startIteration")); largeUserFeatures = Boolean.parseBoolean(getOption("largeUserFeatures")); useRMSECurve = Boolean.parseBoolean(getOption("rmseCurve")); cleanUp = Boolean.parseBoolean(getOption("cleanUp")); useTransform = Boolean.parseBoolean(getOption("useTransform")); rateIndex = Integer.parseInt(getOption("rateIndex")); FileSystem fs = FileSystem.get(getConf()); if (!fs.exists(pathToTransformed())) { if (useTransform) { // transform price into rating Job transformJob = prepareJob(getInputPath(), pathToTransformed(), TextInputFormat.class, TransformColumnValueMapper.class, NullWritable.class, Text.class, TextOutputFormat.class); transformJob.waitForCompletion(true); } else { FileUtil.copy(FileSystem.get(getConf()), getInputPath(), FileSystem.get(getConf()), pathToTransformed(), false, getConf()); } } /* if (getOption("oldM") != null) { runOnetimeSolver(pathToTransformed(), getOutputPath("U"), new Path(getOption("oldM"))); return 0; } */ /* * compute the factorization A = U M' * * where A (users x items) is the matrix of known ratings * U (users x features) is the representation of users in the feature space * M (items x features) is the representation of items in the feature space */ if (startIteration == 0) { if (!fs.exists(pathToItemRatings())) { // create A' Job itemRatings = prepareJob(pathToTransformed(), pathToItemRatings(), TextInputFormat.class, ItemRatingVectorsMapper.class, IntWritable.class, VectorWritable.class, VectorSumReducer.class, IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class); itemRatings.setCombinerClass(VectorSumReducer.class); long matrixSizeExp = (long) (8L * numUsers * numFeatures * SAFE_MARGIN); long memoryThreshold = HadoopClusterUtil.PHYSICAL_MEMERY_LIMIT / (long) HadoopClusterUtil.MAP_TASKS_PER_NODE; int numTaskPerDataNode = Math.max(1, (int) (HadoopClusterUtil.PHYSICAL_MEMERY_LIMIT / (double) matrixSizeExp)); //log.info("matrix Size: " + matrixSizeExp + ", memorhThreshold: " + memoryThreshold + ", numTaskPerDataNode: " + numTaskPerDataNode); if (matrixSizeExp > memoryThreshold) { //log.info("A: {}", numTaskPerDataNode * HadoopClusterUtil.getNumberOfTaskTrackers(getConf())); int numReducer = Math.min( numTaskPerDataNode * HadoopClusterUtil.getNumberOfTaskTrackers(getConf()), HadoopClusterUtil.getMaxMapTasks(getConf())); //log.info("Number Of Reducer: " + numReducer); itemRatings.setNumReduceTasks(numReducer); } itemRatings.waitForCompletion(true); } if (!fs.exists(pathToUserRatings())) { Job userRatings = prepareJob(pathToItemRatings(), pathToUserRatings(), TransposeMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class, VectorWritable.class); userRatings.setNumReduceTasks(HadoopClusterUtil.getNumberOfTaskTrackers(getConf())); userRatings.setCombinerClass(MergeVectorsCombiner.class); userRatings.setNumReduceTasks(HadoopClusterUtil.getMaxMapTasks(getConf())); userRatings.waitForCompletion(true); } if (!fs.exists(getOutputPath("userItemCnt"))) { // count item per user Job userItemCntsJob = prepareJob(pathToUserRatings(), getOutputPath("userItemCnt"), SequenceFileInputFormat.class, UserItemCntsMapper.class, IntWritable.class, IntWritable.class, SequenceFileOutputFormat.class); userItemCntsJob.setJobName("user ratings count"); userItemCntsJob.waitForCompletion(true); } if (!fs.exists(getTempPath("averageRatings"))) { //TODO this could be fiddled into one of the upper jobs Job averageItemRatings = prepareJob(pathToItemRatings(), getTempPath("averageRatings"), AverageRatingMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class, VectorWritable.class); averageItemRatings.setCombinerClass(MergeVectorsCombiner.class); averageItemRatings.waitForCompletion(true); } if (!fs.exists(new Path(pathToM(-1), "part-m-00000"))) { Vector averageRatings = ALSMatrixUtil.readFirstRow(getTempPath("averageRatings"), getConf()); /** create an initial M */ initializeM(averageRatings); } } for (int currentIteration = startIteration; currentIteration < numIterations; currentIteration++) { DistributedRowMatrix curM = new DistributedRowMatrix(pathToM(currentIteration - 1), getTempPath("Mtemp/tmp-" + String.valueOf(currentIteration - 1) + "/M"), numItems, numFeatures); curM.setConf(getConf()); DistributedRowMatrix YtransposeY = curM.times(curM); /** broadcast M, read A row-wise, recompute U row-wise */ log.info("Recomputing U (iteration {}/{})", currentIteration, numIterations); runSolver(pathToUserRatings(), pathToU(currentIteration), pathToM(currentIteration - 1), YtransposeY.getRowPath(), numItems, false); DistributedRowMatrix curU = new DistributedRowMatrix(pathToU(currentIteration), getTempPath("Utmp/tmp-" + String.valueOf(currentIteration) + "/U"), numUsers, numFeatures); curU.setConf(getConf()); DistributedRowMatrix XtransposeX = curU.times(curU); /** broadcast U, read A' row-wise, recompute M row-wise */ log.info("Recomputing M (iteration {}/{})", currentIteration, numIterations); runSolver(pathToItemRatings(), pathToM(currentIteration), pathToU(currentIteration), XtransposeX.getRowPath(), numUsers, largeUserFeatures); /** calculate rmse on each updated matrix U, M and decide to further iteration */ if (currentIteration > startIteration && useRMSECurve) { Pair<Integer, Double> UsquaredError = calculateMatrixDistanceSquared( pathToU(currentIteration - 1), pathToU(currentIteration), currentIteration); Pair<Integer, Double> MsquaredError = calculateMatrixDistanceSquared( pathToM(currentIteration - 1), pathToM(currentIteration), currentIteration); String currentRMSE = currentIteration + DELIMETER + UsquaredError.getFirst() + DELIMETER + UsquaredError.getSecond() + DELIMETER + MsquaredError.getFirst() + DELIMETER + MsquaredError.getSecond() + DefaultOptionCreator.NEWLINE; rmsePerIteration += currentRMSE; log.info("iteration {}: {}", currentIteration, currentRMSE); } if (currentIteration >= startIteration + 2 && cleanUp) { fs.deleteOnExit(pathToU(currentIteration - 2)); fs.deleteOnExit(pathToM(currentIteration - 2)); } } return 0; } catch (Exception e) { e.printStackTrace(); return -1; } finally { if (useRMSECurve) { HadoopClusterUtil.writeToHdfs(getConf(), getOutputPath("RMSE"), rmsePerIteration); } } }
From source file:com.soteradefense.dga.louvain.mapreduce.CommunityCompression.java
License:Apache License
public int run(String[] args) throws Exception { Configuration mrConf = this.getConf(); for (java.util.Map.Entry<String, String> entry : dgaConfiguration.getSystemProperties().entrySet()) { mrConf.set(entry.getKey(), entry.getValue()); }/*w w w.ja v a2 s . c o m*/ Job job = Job.getInstance(mrConf); job.setJarByClass(CommunityCompression.class); Path in = new Path(inputPath); Path out = new Path(outputPath); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("CommunityCompression"); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LouvainVertexWritable.class); job.setMapperClass(CommunityCompression.Map.class); job.setReducerClass(CommunityCompression.Reduce.class); logger.debug("Running Mapreduce step with job configuration: {}", job); return job.waitForCompletion(false) ? 0 : 1; }
From source file:com.soteradefense.dga.louvain.mapreduce.LouvainTableSynthesizer.java
License:Apache License
@Override public int run(String[] args) throws Exception { Job job = null; try {//from w w w.j a v a 2 s . c o m int iteration = 0; if (!basePath.endsWith("/")) basePath = basePath + "/"; String inputPath = basePath + GIRAPH_FOLDER_BASE_NAME + FILE_NAME_SEPARATOR + iteration; String joinPath = basePath + GIRAPH_FOLDER_BASE_NAME + FILE_NAME_SEPARATOR + (iteration + 1); String outputPath = basePath + TABLE_BASE_NAME + FILE_NAME_SEPARATOR + iteration; Configuration mrConf = this.getConf(); job = Job.getInstance(mrConf); for (Map.Entry<String, String> entry : dgaConfiguration.getSystemProperties().entrySet()) { mrConf.set(entry.getKey(), entry.getValue()); } FileSystem fs = FileSystem.get(job.getConfiguration()); boolean nextFileExists = fs.exists(new Path(joinPath)); while (nextFileExists) { System.out.println("Processing " + inputPath + " and " + joinPath); job = Job.getInstance(mrConf); job.setJobName("Louvain Table Synthesizer " + iteration); job.setJarByClass(LouvainTableSynthesizer.class); job.setMapperClass(LouvainTableSynthesizerMapper.class); job.setReducerClass(LouvainTableSynthesizerReducer.class); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); //Reducer Output job.setOutputKeyClass(Text.class); job.setOutputValueClass(NullWritable.class); //Add both input folders Path in = new Path(inputPath); Path joinIn = new Path(joinPath); Path out = new Path(outputPath); FileInputFormat.addInputPath(job, in); FileInputFormat.addInputPath(job, joinIn); FileOutputFormat.setOutputPath(job, out); job.waitForCompletion(true); //Set the new temp input path inputPath = outputPath; iteration++; outputPath = basePath + TABLE_BASE_NAME + FILE_NAME_SEPARATOR + iteration; joinPath = basePath + GIRAPH_FOLDER_BASE_NAME + FILE_NAME_SEPARATOR + (iteration + 1); nextFileExists = fs.exists(new Path(joinPath)); } } catch (IOException e) { e.printStackTrace(); return -1; } catch (InterruptedException e) { e.printStackTrace(); return -1; } catch (ClassNotFoundException e) { e.printStackTrace(); return -1; } return 0; }
From source file:com.soteradefense.dga.LouvainRunner.java
License:Apache License
private int runMapreduceJob(String inputPath, String outputPath, DGAConfiguration conf) throws Exception { Configuration mrConf = new Configuration(); for (Map.Entry<String, String> entry : conf.getSystemProperties().entrySet()) { mrConf.set(entry.getKey(), entry.getValue()); }//from w w w.ja v a 2s .c o m Job job = Job.getInstance(configuration); job.setJarByClass(LouvainRunner.class); Path in = new Path(inputPath); Path out = new Path(outputPath); FileInputFormat.setInputPaths(job, in); FileOutputFormat.setOutputPath(job, out); job.setJobName("CommunityCompression"); job.setInputFormatClass(TextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(LouvainVertexWritable.class); job.setMapperClass(CommunityCompression.Map.class); job.setReducerClass(CommunityCompression.Reduce.class); logger.debug("Running Mapreduce step with job configuration: {}", job); return job.waitForCompletion(false) ? 0 : 1; }
From source file:com.sreejith.loganalyzer.mapreduce.LogDriver.java
License:Apache License
public static void main(String[] args) throws Exception { Job job = new Job(); job.setJarByClass(LogDriver.class); job.setJobName("Log Analyzer"); job.setMapperClass(LogMapper.class); job.setPartitionerClass(LogPartitioner.class); job.setCombinerClass(LogReducer.class); job.setReducerClass(LogReducer.class); job.setNumReduceTasks(2);//from www . jav a2 s . c o m job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(IntWritable.class); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.waitForCompletion(true); }
From source file:com.sudarmuthu.hadoop.countwords.CountWords.java
public static void main(String[] args) throws Exception { if (args.length != 2) { System.err.println("Usage: CountWords <input path> <output path>"); System.exit(-1);/*from w ww. ja va2 s . c o m*/ } Job job = new Job(); job.setJarByClass(CountWords.class); job.setJobName("Count Words"); FileInputFormat.addInputPath(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); job.setMapperClass(CountWordsMapper.class); job.setReducerClass(CountWordsReducer.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IntWritable.class); System.exit(job.waitForCompletion(true) ? 0 : 1); }