List of usage examples for org.apache.hadoop.mapreduce Job getCounters
public Counters getCounters() throws IOException
From source file:org.apache.kudu.mapreduce.tools.ITImportCsv.java
License:Apache License
@Test public void test() throws Exception { Configuration conf = new Configuration(); String testHome = HADOOP_UTIL.setupAndGetTestDir(ITImportCsv.class.getName(), conf).getAbsolutePath(); // Create a 2 lines input file File data = new File(testHome, "data.csv"); writeCsvFile(data);/*from w ww. ja v a 2 s . c om*/ StringBuilder sb = new StringBuilder(); for (ColumnSchema col : schema.getColumns()) { sb.append(col.getName()); sb.append(","); } sb.deleteCharAt(sb.length() - 1); String[] args = new String[] { "-D" + CommandLineParser.MASTER_ADDRESSES_KEY + "=" + getMasterAddresses(), sb.toString(), TABLE_NAME, data.toString() }; GenericOptionsParser parser = new GenericOptionsParser(conf, args); Job job = ImportCsv.createSubmittableJob(parser.getConfiguration(), parser.getRemainingArgs()); assertTrue("Test job did not end properly", job.waitForCompletion(true)); assertEquals(1, job.getCounters().findCounter(ImportCsv.Counters.BAD_LINES).getValue()); assertEquals(3, countRowsInScan(client.newScannerBuilder(openTable(TABLE_NAME)).build())); // TODO: should verify the actual returned rows, not just the count! }
From source file:org.apache.kudu.mapreduce.tools.ITRowCounter.java
License:Apache License
@Test public void test() throws Exception { Configuration conf = new Configuration(); HADOOP_UTIL.setupAndGetTestDir(ITRowCounter.class.getName(), conf).getAbsolutePath(); createFourTabletsTableWithNineRows(TABLE_NAME); String[] args = new String[] { "-D" + CommandLineParser.MASTER_ADDRESSES_KEY + "=" + getMasterAddresses(), TABLE_NAME };/* ww w . j a v a 2s .c om*/ GenericOptionsParser parser = new GenericOptionsParser(conf, args); Job job = RowCounter.createSubmittableJob(parser.getConfiguration(), parser.getRemainingArgs()); assertTrue("Job did not end properly", job.waitForCompletion(true)); assertEquals(9, job.getCounters().findCounter(RowCounter.Counters.ROWS).getValue()); }
From source file:org.apache.mahout.cf.taste.hadoop.als.ParallelALSFactorizationJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();// ww w .java 2s . co m addOutputOption(); addOption("lambda", null, "regularization parameter", true); addOption("implicitFeedback", null, "data consists of implicit feedback?", String.valueOf(false)); addOption("alpha", null, "confidence parameter (only used on implicit feedback)", String.valueOf(40)); addOption("numFeatures", null, "dimension of the feature space", true); addOption("numIterations", null, "number of iterations", true); addOption("numThreadsPerSolver", null, "threads per solver mapper", String.valueOf(1)); addOption("usesLongIDs", null, "input contains long IDs that need to be translated"); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } numFeatures = Integer.parseInt(getOption("numFeatures")); numIterations = Integer.parseInt(getOption("numIterations")); lambda = Double.parseDouble(getOption("lambda")); alpha = Double.parseDouble(getOption("alpha")); implicitFeedback = Boolean.parseBoolean(getOption("implicitFeedback")); numThreadsPerSolver = Integer.parseInt(getOption("numThreadsPerSolver")); usesLongIDs = Boolean.parseBoolean(getOption("usesLongIDs", String.valueOf(false))); /* * compute the factorization A = U M' * * where A (users x items) is the matrix of known ratings * U (users x features) is the representation of users in the feature space * M (items x features) is the representation of items in the feature space */ if (usesLongIDs) { Job mapUsers = prepareJob(getInputPath(), getOutputPath("userIDIndex"), TextInputFormat.class, MapLongIDsMapper.class, VarIntWritable.class, VarLongWritable.class, IDMapReducer.class, VarIntWritable.class, VarLongWritable.class, SequenceFileOutputFormat.class); mapUsers.getConfiguration().set(TOKEN_POS, String.valueOf(TasteHadoopUtils.USER_ID_POS)); mapUsers.waitForCompletion(true); Job mapItems = prepareJob(getInputPath(), getOutputPath("itemIDIndex"), TextInputFormat.class, MapLongIDsMapper.class, VarIntWritable.class, VarLongWritable.class, IDMapReducer.class, VarIntWritable.class, VarLongWritable.class, SequenceFileOutputFormat.class); mapItems.getConfiguration().set(TOKEN_POS, String.valueOf(TasteHadoopUtils.ITEM_ID_POS)); mapItems.waitForCompletion(true); } /* create A' */ Job itemRatings = prepareJob(getInputPath(), pathToItemRatings(), TextInputFormat.class, ItemRatingVectorsMapper.class, IntWritable.class, VectorWritable.class, VectorSumReducer.class, IntWritable.class, VectorWritable.class, SequenceFileOutputFormat.class); itemRatings.setCombinerClass(VectorSumCombiner.class); itemRatings.getConfiguration().set(USES_LONG_IDS, String.valueOf(usesLongIDs)); boolean succeeded = itemRatings.waitForCompletion(true); if (!succeeded) { return -1; } /* create A */ Job userRatings = prepareJob(pathToItemRatings(), pathToUserRatings(), TransposeMapper.class, IntWritable.class, VectorWritable.class, MergeUserVectorsReducer.class, IntWritable.class, VectorWritable.class); userRatings.setCombinerClass(MergeVectorsCombiner.class); succeeded = userRatings.waitForCompletion(true); if (!succeeded) { return -1; } //TODO this could be fiddled into one of the upper jobs Job averageItemRatings = prepareJob(pathToItemRatings(), getTempPath("averageRatings"), AverageRatingMapper.class, IntWritable.class, VectorWritable.class, MergeVectorsReducer.class, IntWritable.class, VectorWritable.class); averageItemRatings.setCombinerClass(MergeVectorsCombiner.class); succeeded = averageItemRatings.waitForCompletion(true); if (!succeeded) { return -1; } Vector averageRatings = ALS.readFirstRow(getTempPath("averageRatings"), getConf()); numItems = averageRatings.getNumNondefaultElements(); numUsers = (int) userRatings.getCounters().findCounter(Stats.NUM_USERS).getValue(); log.info("Found {} users and {} items", numUsers, numItems); /* create an initial M */ initializeM(averageRatings); for (int currentIteration = 0; currentIteration < numIterations; currentIteration++) { /* broadcast M, read A row-wise, recompute U row-wise */ log.info("Recomputing U (iteration {}/{})", currentIteration, numIterations); runSolver(pathToUserRatings(), pathToU(currentIteration), pathToM(currentIteration - 1), currentIteration, "U", numItems); /* broadcast U, read A' row-wise, recompute M row-wise */ log.info("Recomputing M (iteration {}/{})", currentIteration, numIterations); runSolver(pathToItemRatings(), pathToM(currentIteration), pathToU(currentIteration), currentIteration, "M", numUsers); } return 0; }
From source file:org.apache.mahout.cf.taste.hadoop.preparation.PreparePreferenceMatrixJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();// ww w.ja va2 s. c om addOutputOption(); addOption("minPrefsPerUser", "mp", "ignore users with less preferences than this " + "(default: " + DEFAULT_MIN_PREFS_PER_USER + ')', String.valueOf(DEFAULT_MIN_PREFS_PER_USER)); addOption("booleanData", "b", "Treat input as without pref values", Boolean.FALSE.toString()); addOption("ratingShift", "rs", "shift ratings by this value", "0.0"); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } int minPrefsPerUser = Integer.parseInt(getOption("minPrefsPerUser")); boolean booleanData = Boolean.valueOf(getOption("booleanData")); float ratingShift = Float.parseFloat(getOption("ratingShift")); //convert items to an internal index Job itemIDIndex = prepareJob(getInputPath(), getOutputPath(ITEMID_INDEX), TextInputFormat.class, ItemIDIndexMapper.class, VarIntWritable.class, VarLongWritable.class, ItemIDIndexReducer.class, VarIntWritable.class, VarLongWritable.class, SequenceFileOutputFormat.class); itemIDIndex.setCombinerClass(ItemIDIndexReducer.class); boolean succeeded = itemIDIndex.waitForCompletion(true); if (!succeeded) { return -1; } //convert user preferences into a vector per user Job toUserVectors = prepareJob(getInputPath(), getOutputPath(USER_VECTORS), TextInputFormat.class, ToItemPrefsMapper.class, VarLongWritable.class, booleanData ? VarLongWritable.class : EntityPrefWritable.class, ToUserVectorsReducer.class, VarLongWritable.class, VectorWritable.class, SequenceFileOutputFormat.class); toUserVectors.getConfiguration().setBoolean(RecommenderJob.BOOLEAN_DATA, booleanData); toUserVectors.getConfiguration().setInt(ToUserVectorsReducer.MIN_PREFERENCES_PER_USER, minPrefsPerUser); toUserVectors.getConfiguration().set(ToEntityPrefsMapper.RATING_SHIFT, String.valueOf(ratingShift)); succeeded = toUserVectors.waitForCompletion(true); if (!succeeded) { return -1; } //we need the number of users later int numberOfUsers = (int) toUserVectors.getCounters().findCounter(ToUserVectorsReducer.Counters.USERS) .getValue(); HadoopUtil.writeInt(numberOfUsers, getOutputPath(NUM_USERS), getConf()); //build the rating matrix Job toItemVectors = prepareJob(getOutputPath(USER_VECTORS), getOutputPath(RATING_MATRIX), ToItemVectorsMapper.class, IntWritable.class, VectorWritable.class, ToItemVectorsReducer.class, IntWritable.class, VectorWritable.class); toItemVectors.setCombinerClass(ToItemVectorsReducer.class); succeeded = toItemVectors.waitForCompletion(true); if (!succeeded) { return -1; } return 0; }
From source file:org.apache.mahout.cf.taste.hbase.preparation.PreparePreferenceMatrixJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();/*from w ww . ja v a 2 s .co m*/ addOutputOption(); addOption("minPrefsPerUser", "mp", "ignore users with less preferences than this " + "(default: " + DEFAULT_MIN_PREFS_PER_USER + ')', String.valueOf(DEFAULT_MIN_PREFS_PER_USER)); addOption("booleanData", "b", "Treat input as without pref values", Boolean.FALSE.toString()); addOption("ratingShift", "rs", "shift ratings by this value", "0.0"); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } int minPrefsPerUser = Integer.parseInt(getOption("minPrefsPerUser")); boolean booleanData = Boolean.valueOf(getOption("booleanData")); float ratingShift = Float.parseFloat(getOption("ratingShift")); String workingTable = getConf().get(RecommenderJob.PARAM_WORKING_TABLE); String cfRatings = getConf().get(RecommenderJob.PARAM_CF_RATINGS); //convert items to an internal index Configuration mapred_config = HBaseConfiguration.create(); mapred_config.setBoolean("mapred.compress.map.output", true); mapred_config.set(RecommenderJob.PARAM_CF_RATINGS, cfRatings); Job itemIDIndex = Job.getInstance(mapred_config); itemIDIndex.setJobName(HadoopUtil.getCustomJobName(getClass().getSimpleName(), itemIDIndex, ItemIDIndexMapper.class, ItemIDIndexReducer.class)); itemIDIndex.setJarByClass(ItemIDIndexMapper.class); // class that contains mapper and reducer Scan scan = new Scan(); scan.setCaching(500); // 1 is the default in Scan, which will be bad for MapReduce jobs scan.setCacheBlocks(false); // don't set to true for MR jobs // set other scan attrs TableMapReduceUtil.initTableMapperJob(workingTable, // input table scan, // Scan instance to control CF and attribute selection ItemIDIndexMapper.class, // mapper class VarIntWritable.class, // mapper output key VarLongWritable.class, // mapper output value itemIDIndex); itemIDIndex.setReducerClass(ItemIDIndexReducer.class); // reducer class itemIDIndex.setOutputKeyClass(VarIntWritable.class); itemIDIndex.setOutputValueClass(VarLongWritable.class); itemIDIndex.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(itemIDIndex, getOutputPath(ITEMID_INDEX)); // adjust directories as required if (!itemIDIndex.waitForCompletion(true)) return -1; ////////////////////////////////////////////////////////////////////////// //convert user preferences into a vector per user mapred_config.setBoolean(RecommenderJob.BOOLEAN_DATA, booleanData); mapred_config.setInt(ToUserVectorsReducer.MIN_PREFERENCES_PER_USER, minPrefsPerUser); mapred_config.set(ToEntityPrefsMapper.RATING_SHIFT, String.valueOf(ratingShift)); Job toUserVectors_hb = Job.getInstance(mapred_config); toUserVectors_hb.setJobName(HadoopUtil.getCustomJobName(getClass().getSimpleName(), toUserVectors_hb, ToItemPrefsMapper.class, ToUserVectorsReducer.class)); toUserVectors_hb.setJarByClass(ToItemPrefsMapper.class); // class that contains mapper and reducer TableMapReduceUtil.initTableMapperJob(workingTable, // input table scan, // Scan instance to control CF and attribute selection ToItemPrefsMapper.class, // mapper class VarLongWritable.class, // mapper output key booleanData ? VarLongWritable.class : EntityPrefWritable.class, // mapper output value toUserVectors_hb); toUserVectors_hb.setReducerClass(ToUserVectorsReducer.class); // reducer class toUserVectors_hb.setNumReduceTasks(1); // at least one, adjust as required toUserVectors_hb.setOutputKeyClass(VarLongWritable.class); toUserVectors_hb.setOutputValueClass(VectorWritable.class); toUserVectors_hb.setOutputFormatClass(SequenceFileOutputFormat.class); FileOutputFormat.setOutputPath(toUserVectors_hb, getOutputPath(USER_VECTORS)); // adjust directories as required if (!toUserVectors_hb.waitForCompletion(true)) return -1; ////////////////////////////////////////////////////////////////////////// //we need the number of users later int numberOfUsers = (int) toUserVectors_hb.getCounters().findCounter(ToUserVectorsReducer.Counters.USERS) .getValue(); HadoopUtil.writeInt(numberOfUsers, getOutputPath(NUM_USERS), getConf()); //build the rating matrix Job toItemVectors = prepareJob(getOutputPath(USER_VECTORS), getOutputPath(RATING_MATRIX), ToItemVectorsMapper.class, IntWritable.class, VectorWritable.class, ToItemVectorsReducer.class, IntWritable.class, VectorWritable.class); toItemVectors.setCombinerClass(ToItemVectorsReducer.class); if (!toItemVectors.waitForCompletion(true)) return -1; return 0; }
From source file:org.apache.mahout.classifier.rbm.training.RBMClassifierTrainingJob.java
License:Apache License
/** * Fintune using map/reduce./*w ww . j a v a 2s . com*/ * * @param batch the batch * @param iteration the iteration * @param learningrate the learningrate * @return true, if successful * @throws IOException Signals that an I/O exception has occurred. * @throws InterruptedException the interrupted exception * @throws ClassNotFoundException the class not found exception */ private boolean fintuneMR(Path batch, int iteration, double learningrate) throws IOException, InterruptedException, ClassNotFoundException { //prepare and run finetune job long batchsize; HadoopUtil.delete(getConf(), getTempPath(WEIGHT_UPDATES)); HadoopUtil.cacheFiles(getOutputPath(), getConf()); Job trainDBM = prepareJob(batch, getTempPath(WEIGHT_UPDATES), SequenceFileInputFormat.class, DBMBackPropTrainingMapper.class, IntWritable.class, MatrixWritable.class, DBMBackPropTrainingReducer.class, IntWritable.class, MatrixWritable.class, SequenceFileOutputFormat.class); trainDBM.getConfiguration().set("labelcount", String.valueOf(labelcount)); trainDBM.getConfiguration().set("learningrate", String.valueOf(learningrate)); trainDBM.setCombinerClass(DBMBackPropTrainingReducer.class); if (!trainDBM.waitForCompletion(true)) return false; batchsize = trainDBM.getCounters().findCounter(DBMBackPropTrainingMapper.BATCHES.SIZE).getValue(); changeAndSaveModel(getOutputPath(), batchsize, (iteration == 0) ? 0 : momentum); return true; }
From source file:org.apache.mahout.classifier.rbm.training.RBMClassifierTrainingJob.java
License:Apache License
/** * Train greedy mr./*from w ww . j av a2 s .c o m*/ * * @param rbmNr the rbm nr * @param batch the batch * @param iteration the iteration * @param learningrate the learningrate * @return true, if successful * @throws IOException Signals that an I/O exception has occurred. * @throws InterruptedException the interrupted exception * @throws ClassNotFoundException the class not found exception */ private boolean trainGreedyMR(int rbmNr, Path batch, int iteration, double learningrate) throws IOException, InterruptedException, ClassNotFoundException { //run greedy pretraining as map reduce job long batchsize; HadoopUtil.delete(getConf(), getTempPath(WEIGHT_UPDATES)); HadoopUtil.cacheFiles(getOutputPath(), getConf()); Job trainRBM = prepareJob(batch, getTempPath(WEIGHT_UPDATES), SequenceFileInputFormat.class, RBMGreedyPreTrainingMapper.class, IntWritable.class, MatrixWritable.class, RBMGreedyPreTrainingReducer.class, IntWritable.class, MatrixWritable.class, SequenceFileOutputFormat.class); trainRBM.getConfiguration().set("rbmNr", String.valueOf(rbmNr)); trainRBM.getConfiguration().set("labelcount", String.valueOf(labelcount)); trainRBM.getConfiguration().set("learningrate", String.valueOf(learningrate)); trainRBM.getConfiguration().set("nrGibbsSampling", String.valueOf(nrGibbsSampling)); trainRBM.setCombinerClass(RBMGreedyPreTrainingReducer.class); if (!trainRBM.waitForCompletion(true)) return false; batchsize = trainRBM.getCounters().findCounter(RBMGreedyPreTrainingMapper.BATCH.SIZE).getValue(); changeAndSaveModel(getOutputPath(), batchsize, (lastUpdate[rbmNr] == null) ? 0 : momentum); return true; }
From source file:org.apache.mahout.clustering.spectral.eigencuts.EigencutsAffinityCutsJob.java
License:Apache License
/** * Runs a single iteration of defining cluster boundaries, based on * previous calculations and the formation of the "cut matrix". * //from www.j a v a 2 s . co m * @param currentAffinity Path to the current affinity matrix. * @param cutMatrix Path to the sensitivity matrix. * @param nextAffinity Output path for the new affinity matrix. */ public static long runjob(Path currentAffinity, Path cutMatrix, Path nextAffinity, Configuration conf) throws IOException, ClassNotFoundException, InterruptedException { // these options allow us to differentiate between the two vectors // in the mapper and reducer - we'll know from the working path // which SequenceFile we're accessing conf.set(EigencutsKeys.AFFINITY_PATH, currentAffinity.getName()); conf.set(EigencutsKeys.CUTMATRIX_PATH, cutMatrix.getName()); Job job = new Job(conf, "EigencutsAffinityCutsJob"); job.setInputFormatClass(SequenceFileInputFormat.class); job.setOutputFormatClass(SequenceFileOutputFormat.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(VertexWritable.class); job.setOutputKeyClass(IntWritable.class); job.setOutputValueClass(VectorWritable.class); job.setMapperClass(EigencutsAffinityCutsMapper.class); job.setCombinerClass(EigencutsAffinityCutsCombiner.class); job.setReducerClass(EigencutsAffinityCutsReducer.class); //FileInputFormat.addInputPath(job, currentAffinity); FileInputFormat.addInputPath(job, cutMatrix); FileOutputFormat.setOutputPath(job, nextAffinity); boolean succeeded = job.waitForCompletion(true); if (!succeeded) { throw new IllegalStateException("Job failed!"); } return job.getCounters().findCounter(CUTSCOUNTER.NUM_CUTS).getValue(); }
From source file:org.apache.mahout.fpm.bigfim.BigFIMDriver.java
License:Apache License
private static long startAprioriPhase(String inputFile, String outputFile, Config config) throws IOException, InterruptedException, ClassNotFoundException, URISyntaxException { long nrLines = -1; int prefixSize = config.getPrefixLength(); for (int i = 1; i <= prefixSize; i++) { String outputDir = outputFile + separator + "ap" + i; String cacheFile = outputFile + separator + "ap" + (i - 1) + separator + "part-r-00000"; System.out.println("[AprioriPhase]: Phase: " + i + " input: " + inputFile + ", output: " + outputFile); Configuration conf = new Configuration(); setConfigurationValues(conf, config); if (nrLines != -1) { conf.setLong(Config.NUMBER_OF_LINES_KEY, nrLines); }/* w w w. j a va2 s . com*/ Job job = new Job(conf, "Apriori Phase" + i); job.setJarByClass(BigFIMDriver.class); job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(IntWritable.class); job.setMapperClass(AprioriPhaseMapper.class); job.setReducerClass(AprioriPhaseReducer.class); job.setInputFormatClass(SplitByNumberOfMappersTextInputFormat.class); job.setOutputFormatClass(TextOutputFormat.class); job.setNumReduceTasks(1); FileInputFormat.addInputPath(job, new Path(inputFile)); FileOutputFormat.setOutputPath(job, new Path(outputDir)); if (i > 1) { DistributedCache.addCacheFile(new URI(cacheFile), job.getConfiguration()); } long start = System.currentTimeMillis(); job.waitForCompletion(true); long end = System.currentTimeMillis(); System.out.println("Job Apriori Phase " + i + " took " + (end - start) / 1000 + "s"); if (i == 1) { nrLines = job.getCounters().findCounter(Task.Counter.MAP_INPUT_RECORDS).getValue(); } } return nrLines; }
From source file:org.apache.mahout.graph.components.FindComponentsJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();/*from ww w . j av a2 s . c o m*/ addOutputOption(); Map<String, String> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } Path tempDirPath = new Path(parsedArgs.get("--tempDir")); Path inputPath = getInputPath(); Path outputPath = getOutputPath(); AtomicInteger currentPhase = new AtomicInteger(); Path edgesPath = inputPath; Path zoneAssignmentsPath = new Path(tempDirPath, String.valueOf(System.currentTimeMillis())); if (shouldRunNextPhase(parsedArgs, currentPhase)) { /* * Prepare Input */ Job prepareAssignments = prepareJob(edgesPath, zoneAssignmentsPath, SequenceFileInputFormat.class, PrepareAssignmentsFileMapper.class, Vertex.class, Vertex.class, PrepareAssignmentsFileReducer.class, Vertex.class, FlaggedVertex.class, SequenceFileOutputFormat.class); prepareAssignments.waitForCompletion(true); } if (shouldRunNextPhase(parsedArgs, currentPhase)) { /* * As long as there may be zones connected */ while (true) { Path scatterEdgesAndAssignZoneOutputPath = new Path(tempDirPath, String.valueOf(System.currentTimeMillis())); /* * Scatter edges and forward zone assignments, * assign one zone to edges */ Job scatterEdgesAndAssignZone = prepareJob( new Path(zoneAssignmentsPath.toString() + "," + edgesPath.toString()), scatterEdgesAndAssignZoneOutputPath, SequenceFileInputFormat.class, ScatterEdgesAndForwardZoneAssignmentsMapper.class, JoinableVertex.class, FlaggedVertex.class, AssignOneZoneToEdgesReducer.class, UndirectedEdge.class, Vertex.class, SequenceFileOutputFormat.class); scatterEdgesAndAssignZone.setGroupingComparatorClass(JoinableVertex.GroupingComparator.class); scatterEdgesAndAssignZone.waitForCompletion(true); Path findInterzoneEdgesOutputPath = new Path(tempDirPath, String.valueOf(System.currentTimeMillis())); /* * Find interzone edges */ Job findInterzoneEdges = prepareJob(scatterEdgesAndAssignZoneOutputPath, findInterzoneEdgesOutputPath, SequenceFileInputFormat.class, Mapper.class, UndirectedEdge.class, Vertex.class, FindInterzoneEdgesReducer.class, Vertex.class, FlaggedVertex.class, SequenceFileOutputFormat.class); findInterzoneEdges.waitForCompletion(true); /* * Break if there are no new interzone edges */ if (findInterzoneEdges.getCounters().findCounter(Counter.ZONES_CONNECTED).getValue() == 0L) { break; } Path assignNewZonesOutputPath = new Path(tempDirPath, String.valueOf(System.currentTimeMillis())); /* * Assign new zones */ Job assignNewZones = prepareJob( new Path(zoneAssignmentsPath.toString() + "," + findInterzoneEdgesOutputPath.toString()), assignNewZonesOutputPath, SequenceFileInputFormat.class, BinZoneAssignmentsAndInterzoneEdgesMapper.class, JoinableVertex.class, FlaggedVertex.class, AssignNewZonesToVerticesReducer.class, Vertex.class, FlaggedVertex.class, SequenceFileOutputFormat.class); assignNewZones.setGroupingComparatorClass(JoinableVertex.GroupingComparator.class); assignNewZones.waitForCompletion(true); zoneAssignmentsPath = assignNewZonesOutputPath; } } FileSystem system = FileSystem.get(getConf()); FileUtil.copy(system, zoneAssignmentsPath, system, outputPath, false, getConf()); return 0; }