List of usage examples for org.apache.hadoop.mapred RunningJob isSuccessful
public boolean isSuccessful() throws IOException;
From source file:org.archive.jbs.Merge.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 2) { System.err.println("jbs.Merge <output> <input>..."); return 1; }//from w ww .jav a2 s . c om JobConf conf = new JobConf(getConf(), Merge.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setCombinerClass(Reduce.class); conf.setReducerClass(Reduce.class); // Choose the outputformat to either merge or index the records // // org.archive.jbs.lucene.LuceneOutputFormat // - builds local Lucene index // // org.archive.jbs.solr.SolrOutputFormat // - sends documents to remote Solr server // // org.apache.hadoop.mapred.MapFileOutputFormat // - writes merged documents to Hadoop MapFile conf.setOutputFormat((Class) Class .forName(conf.get("jbs.outputformat.class", "org.apache.hadoop.mapred.MapFileOutputFormat"))); // Set the Hadoop job name to incorporate the output format name. String formatName = conf.getOutputFormat().getClass().getName(); conf.setJobName("jbs.Merge " + formatName.substring(formatName.lastIndexOf('.') != -1 ? (formatName.lastIndexOf('.') + 1) : 0)); // Add the input paths as either NutchWAX segment directories or // text .dup files. for (int i = 1; i < args.length; i++) { Path p = new Path(args[i]); // Expand any file globs and then check each matching path FileStatus[] files = FileSystem.get(conf).globStatus(p); for (FileStatus file : files) { if (file.isDir()) { // If it's a directory, then check if it is a Nutch segment, otherwise treat as a SequenceFile. if (p.getFileSystem(conf).exists(new Path(file.getPath(), "parse_data"))) { LOG.info("Input NutchWax: " + file.getPath()); MultipleInputs.addInputPath(conf, new Path(file.getPath(), "parse_data"), SequenceFileInputFormat.class, NutchMapper.class); MultipleInputs.addInputPath(conf, new Path(file.getPath(), "parse_text"), SequenceFileInputFormat.class, NutchMapper.class); } else { // Assume it's a SequenceFile of JSON-encoded Documents. LOG.info("Input Document: " + file.getPath()); MultipleInputs.addInputPath(conf, file.getPath(), SequenceFileInputFormat.class, DocumentMapper.class); } } else { // Not a directory, assume it's a text file, either CDX or property specifications. LOG.info("Input TextFile: " + file.getPath()); MultipleInputs.addInputPath(conf, file.getPath(), TextInputFormat.class, TextMapper.class); } } } FileOutputFormat.setOutputPath(conf, new Path(args[0])); RunningJob rj = JobClient.runJob(conf); return rj.isSuccessful() ? 0 : 1; }
From source file:org.archive.jbs.misc.PageRank.java
License:Apache License
public int run(String[] args) throws Exception { if (args.length < 2) { System.err.println("PageRank <output> <input>..."); return 1; }/*from w w w . ja va 2s. c om*/ JobConf conf = new JobConf(getConf(), PageRank.class); conf.setJobName("jbs.PageRank"); // No need to set this since we use the MultipleInputs class // below, which allows us to specify a mapper for each input. // conf.setMapperClass(Map.class); conf.setReducerClass(Reduce.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(GenericObject.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setOutputFormat(SequenceFileOutputFormat.class); // The input paths should be either NutchWAX segment directories // or Hadoop SequenceFiles containing JSON-encoded Documents for (int i = 1; i < args.length; i++) { Path p = new Path(args[i]); // Expand any file globs and then check each matching path FileStatus[] files = FileSystem.get(conf).globStatus(p); for (FileStatus file : files) { if (file.isDir()) { // If it's a directory, then check if it is a Nutch segment, otherwise treat as a SequenceFile. Path nwp = new Path(file.getPath(), "parse_data"); if (p.getFileSystem(conf).exists(nwp)) { LOG.info("Adding input path: " + nwp); MultipleInputs.addInputPath(conf, nwp, SequenceFileInputFormat.class, Map.class); } else { LOG.info("Adding input path: " + file.getPath()); MultipleInputs.addInputPath(conf, file.getPath(), SequenceFileInputFormat.class, Map.class); } } else { // Not a directory, skip it. LOG.warn("Not a directory, skip input: " + file.getPath()); } } } FileOutputFormat.setOutputPath(conf, new Path(args[0])); RunningJob rj = JobClient.runJob(conf); return rj.isSuccessful() ? 0 : 1; }
From source file:org.archive.jbs.Parse.java
License:Apache License
/** * Run the job.//from w w w . ja v a 2s. c o m */ public int run(String[] args) throws Exception { if (args.length < 2) { usage(); return 1; } FileSystem fs = FileSystem.get(getConf()); // Create a job configuration JobConf job = new JobConf(getConf()); // Job name uses output dir to help identify it to the operator. job.setJobName("jbs.Parse " + args[0]); // The inputs are a list of filenames, use the // FilenameInputFormat to pass them to the mappers. job.setInputFormat(FilenameInputFormat.class); // This is a map-only job, no reducers. job.setNumReduceTasks(0); // Use the Parse-specific output format. job.setOutputFormat(PerMapOutputFormat.class); // Use our ParseMapper, with output keys and values of type // Text. job.setMapperClass(ParseMapper.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); // Configure the input and output paths, from the command-line. Path outputDir = new Path(args[0]); FileOutputFormat.setOutputPath(job, outputDir); boolean atLeastOneInput = false; for (int i = 1; i < args.length; i++) { FileSystem inputfs = FileSystem.get(new java.net.URI(args[i]), getConf()); for (FileStatus status : inputfs.globStatus(new Path(args[i]))) { Path inputPath = status.getPath(); Path outputPath = new Path(outputDir, inputPath.getName()); if (fs.exists(outputPath)) { LOG.debug("Output path already exists: " + outputPath); } else { atLeastOneInput = true; LOG.info("Add input path: " + inputPath); FileInputFormat.addInputPath(job, inputPath); } } } if (!atLeastOneInput) { LOG.info("No input files to parse."); return 0; } // Run the job! RunningJob rj = JobClient.runJob(job); if (!rj.isSuccessful()) { LOG.error("FAILED: " + rj.getID()); return 2; } return 0; }
From source file:org.archive.nutchwax.Importer.java
License:Apache License
/** * Runs the import job with the given arguments. This method * assumes that is is being run via the command-line; as such, it * emits error messages regarding invalid/missing arguments to the * system error stream.// w ww . ja v a 2s . com */ public int run(String[] args) throws Exception { if (args.length < 1) { usage(); return -1; } JobConf job = new NutchJob(getConf()); // Check for "-e <exclusions>" option. int pos = 0; if (args[0].equals("-e")) { if (args.length < 2) { System.out.println("ERROR: Missing filename for option \"-e\"\n"); usage(); return -1; } job.set("nutchwax.urlfilter.wayback.exclusions", args[1]); pos = 2; } if (args.length - pos < 1) { System.out.println("ERROR: Missing manifest file.\n"); usage(); return -1; } Path manifestPath = new Path(args[pos++]); Path segmentPath; if (args.length - pos < 1) { segmentPath = new Path("segments", org.apache.nutch.crawl.Generator.generateSegmentName()); } else { segmentPath = new Path(args[pos]); } try { job.setJobName("Importer " + manifestPath); job.set(Nutch.SEGMENT_NAME_KEY, segmentPath.getName()); //job.setInputPath ( manifestPath); FileInputFormat.addInputPath(job, manifestPath); job.setInputFormat(TextInputFormat.class); job.setMapperClass(Importer.class); //job.setOutputPath ( segmentPath ); FileOutputFormat.setOutputPath(job, segmentPath); job.setOutputFormat(FetcherOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NutchWritable.class); RunningJob rj = JobClient.runJob(job); return rj.isSuccessful() ? 0 : 1; } catch (Exception e) { LOG.fatal("Importer: ", e); System.out.println("Fatal error: " + e); e.printStackTrace(System.out); return -1; } }
From source file:org.archive.nutchwax.ImporterToHdfs.java
License:Apache License
/** * Runs the import job with the given arguments. This method assumes that is * is being run via the command-line; as such, it emits error messages * regarding invalid/missing arguments to the system error stream. *///w ww . j a va 2 s. co m public int run(String[] args) throws Exception { if (args.length < 1) { usage(); return -1; } JobConf job = new NutchJob(getConf()); System.setProperty("fullPathExecution", "false"); Path manifestPath = null; // Check for "-e <exclusions>" option & "-p <path_to_warc_files>" option. int pos = 0; for (String[] str : getOptsList(args)) { if (args.length < pos + 2) { System.out.println("ERROR: Missing filename for option \"" + str[0] + "\"\n"); usage(); return -1; } if (str[0].equals("-p")) { manifestPath = new Path(getManifestFile(str[1])); System.setProperty("fullPathExecution", "true"); } else if (str[0].equals("-e")) { job.set("nutchwax.urlfilter.wayback.exclusions", str[1]); } pos = pos + 2; } if (manifestPath == null) { if (args.length - pos < 1) { System.out.println("ERROR: Missing manifest file.\n"); usage(); return -1; } else { manifestPath = new Path(args[pos++]); } } Path segmentPath; if (args.length - pos < 1) { segmentPath = new Path("segments", org.apache.nutch.crawl.Generator.generateSegmentName()); } else { segmentPath = new Path(args[pos]); } try { job.setJobName("Importer_to_Hdfs " + manifestPath); job.set(Nutch.SEGMENT_NAME_KEY, segmentPath.getName()); // job.setInputPath ( manifestPath); FileInputFormat.addInputPath(job, manifestPath); job.setInputFormat(TextInputFormat.class); job.setMapperClass(ImporterToHdfs.class); // job.setOutputPath ( segmentPath ); FileOutputFormat.setOutputPath(job, segmentPath); job.setOutputFormat(FetcherOutputFormat.class); job.setOutputKeyClass(Text.class); job.setOutputValueClass(NutchWritable.class); RunningJob rj = JobClient.runJob(job); return rj.isSuccessful() ? 0 : 1; } catch (Exception e) { LOG.fatal("Importer_to_Hdfs: ", e); System.out.println("Fatal error: " + e); e.printStackTrace(System.out); return -1; } }
From source file:org.cloudata.examples.upload.partitionjob.PartitionJob.java
License:Apache License
public boolean runJob(String inputPath, String tableName, int numOfTablets) throws IOException { JobConf jobConf = new JobConf(PartitionJob.class); String libDir = CloudataMapReduceUtil.initMapReduce(jobConf); FileSystem fs = FileSystem.get(jobConf); // ? // www. j a va 2s .c o m FileUtil.delete(fs, new Path(getLogCountFilepath(tableName)), true); jobConf.setJobName("PartitionJob_" + tableName + "(" + new Date() + ")"); jobConf.set("cloudata.numOfTablets", String.valueOf(numOfTablets)); jobConf.set(AbstractTabletInputFormat.OUTPUT_TABLE, tableName); String clientOpt = jobConf.get("mapred.child.java.opts"); if (clientOpt == null) { clientOpt = ""; } jobConf.set("mapred.child.java.opts", clientOpt + " -Duser.name=" + System.getProperty("user.name")); //<Map> FileInputFormat.addInputPath(jobConf, new Path(inputPath)); jobConf.setInputFormat(TextInputFormat.class); jobConf.setMapperClass(PartitionMap.class); jobConf.setMapOutputKeyClass(Text.class); jobConf.setMapOutputValueClass(Text.class); //</Map> //<Reduce> Path tempOutputPath = new Path("temp/partitionJob/" + tableName + "/reducer"); FileOutputFormat.setOutputPath(jobConf, tempOutputPath); jobConf.setOutputKeyClass(Text.class); jobConf.setOutputValueClass(Text.class); jobConf.setReducerClass(PartitionReducer.class); //Reduce 1 jobConf.setNumReduceTasks(1); //</Reduce> try { RunningJob job = JobClient.runJob(jobConf); return job.isSuccessful(); } finally { FileUtil.delete(fs, new Path(getLogCountFilepath(tableName)), true); FileUtil.delete(fs, tempOutputPath, true); CloudataMapReduceUtil.clearMapReduce(libDir); } }
From source file:org.hxx.hadoop.GeneratorHbase.java
License:Apache License
public Path generateAll(int tableNum, Path segments, long topN, int reduceCnt, boolean filter, boolean norm, boolean force) { SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); long start = System.currentTimeMillis(); LOG.info("Generator: from table=" + tableNum + " starting at " + sdf.format(start)); LOG.info("Generator: filtering:=" + filter + "; Generator: normalizing=" + norm); Path segment = new Path(segments, Generator.generateSegmentName()); String table = "crawldb" + tableNum; long cnt = 0; try {//from w w w.j a v a2 s. c o m RunningJob r = generateJob(table, segment, topN, reduceCnt, filter, norm, force); Counter counter = r.getCounters().findCounter("Generator", "records"); cnt = counter.getValue(); if (r.isSuccessful()) { // LOG.info(Nutch.GEN_JOB_SUCCESS + "=1;"); } else { // LOG.info(Nutch.GEN_JOB_FAIL + "=1;"); } LOG.info("Generator: " + segment + " records: " + cnt + " current table=" + table + " timeused=" + (System.currentTimeMillis() - start) / 1000 + "s"); } catch (Throwable e) { removePath(segment); LOG.error("generateAll:", e); } int less = getConf().getInt("generator.less", 10000); if (cnt == 0) { removePath(segment); return null; } else if (cnt <= less) {// too less : && cnt <= 10000 removePath(segment); return null; } long end = System.currentTimeMillis(); // LOG.info(Nutch.GEN_TIME + "=" + (end - start) + ";"); // have records GenerateInfos.topn = topN; GenerateInfos.hostn = getConf().getInt(Generator.GENERATOR_MAX_COUNT, -1); GenerateInfo genInfo = GenerateInfos.getGenerateInfo(); genInfo.start = start; genInfo.generate = cnt; genInfo.table = table; genInfo.end = end; genInfo.endTime = sdf.format(genInfo.end); LOG.info(GenerateInfos.printString()); LOG.info("Generator: finished at " + sdf.format(end) + ", elapsed: " + TimingUtil.elapsedTime(start, end)); return segment; }
From source file:org.pentaho.di.job.entries.hadoopjobexecutor.JobEntryHadoopJobExecutor.java
License:Apache License
public Result execute(Result result, int arg1) throws KettleException { result.setNrErrors(0);// ww w. ja v a2 s. c o m Log4jFileAppender appender = null; String logFileName = "pdi-" + this.getName(); //$NON-NLS-1$ String hadoopDistro = System.getProperty("hadoop.distribution.name", hadoopDistribution); hadoopDistro = environmentSubstitute(hadoopDistro); if (Const.isEmpty(hadoopDistro)) { hadoopDistro = "generic"; } try { appender = LogWriter.createFileAppender(logFileName, true, false); LogWriter.getInstance().addAppender(appender); log.setLogLevel(parentJob.getLogLevel()); } catch (Exception e) { logError(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.FailedToOpenLogFile", logFileName, //$NON-NLS-1$ e.toString())); logError(Const.getStackTracker(e)); } try { URL resolvedJarUrl = null; String jarUrlS = environmentSubstitute(jarUrl); if (jarUrlS.indexOf("://") == -1) { // default to file:// File jarFile = new File(jarUrlS); resolvedJarUrl = jarFile.toURI().toURL(); } else { resolvedJarUrl = new URL(jarUrlS); } final String cmdLineArgsS = environmentSubstitute(cmdLineArgs); if (log.isDetailed()) logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.ResolvedJar", resolvedJarUrl.toExternalForm())); if (isSimple) { /* final AtomicInteger taskCount = new AtomicInteger(0); final AtomicInteger successCount = new AtomicInteger(0); final AtomicInteger failedCount = new AtomicInteger(0); */ if (log.isDetailed()) logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.SimpleMode")); List<Class<?>> classesWithMains = JarUtility .getClassesInJarWithMain(resolvedJarUrl.toExternalForm(), getClass().getClassLoader()); for (final Class<?> clazz : classesWithMains) { Runnable r = new Runnable() { public void run() { try { final ClassLoader cl = Thread.currentThread().getContextClassLoader(); try { // taskCount.incrementAndGet(); Thread.currentThread().setContextClassLoader(clazz.getClassLoader()); Method mainMethod = clazz.getMethod("main", new Class[] { String[].class }); Object[] args = (cmdLineArgsS != null) ? new Object[] { cmdLineArgsS.split(" ") } : new Object[0]; mainMethod.invoke(null, args); } finally { Thread.currentThread().setContextClassLoader(cl); // successCount.incrementAndGet(); // taskCount.decrementAndGet(); } } catch (Throwable ignored) { // skip, try the next one // logError(ignored.getMessage()); // failedCount.incrementAndGet(); ignored.printStackTrace(); } } }; Thread t = new Thread(r); t.start(); } // uncomment to implement blocking /* if (blocking) { while (taskCount.get() > 0 && !parentJob.isStopped()) { Thread.sleep(1000); } if (!parentJob.isStopped()) { result.setResult(successCount.get() > 0); result.setNrErrors((successCount.get() > 0) ? 0 : 1); } else { // we can't really know at this stage if // the hadoop job will finish successfully // because we have to stop now result.setResult(true); // look on the bright side of life :-)... result.setNrErrors(0); } } else { */ // non-blocking - just set success equal to no failures arising // from invocation // result.setResult(failedCount.get() == 0); // result.setNrErrors(failedCount.get()); result.setResult(true); result.setNrErrors(0); /* } */ } else { if (log.isDetailed()) logDetailed(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.AdvancedMode")); URL[] urls = new URL[] { resolvedJarUrl }; URLClassLoader loader = new URLClassLoader(urls, getClass().getClassLoader()); JobConf conf = new JobConf(); String hadoopJobNameS = environmentSubstitute(hadoopJobName); conf.setJobName(hadoopJobNameS); String outputKeyClassS = environmentSubstitute(outputKeyClass); conf.setOutputKeyClass(loader.loadClass(outputKeyClassS)); String outputValueClassS = environmentSubstitute(outputValueClass); conf.setOutputValueClass(loader.loadClass(outputValueClassS)); if (mapperClass != null) { String mapperClassS = environmentSubstitute(mapperClass); Class<? extends Mapper> mapper = (Class<? extends Mapper>) loader.loadClass(mapperClassS); conf.setMapperClass(mapper); } if (combinerClass != null) { String combinerClassS = environmentSubstitute(combinerClass); Class<? extends Reducer> combiner = (Class<? extends Reducer>) loader.loadClass(combinerClassS); conf.setCombinerClass(combiner); } if (reducerClass != null) { String reducerClassS = environmentSubstitute(reducerClass); Class<? extends Reducer> reducer = (Class<? extends Reducer>) loader.loadClass(reducerClassS); conf.setReducerClass(reducer); } if (inputFormatClass != null) { String inputFormatClassS = environmentSubstitute(inputFormatClass); Class<? extends InputFormat> inputFormat = (Class<? extends InputFormat>) loader .loadClass(inputFormatClassS); conf.setInputFormat(inputFormat); } if (outputFormatClass != null) { String outputFormatClassS = environmentSubstitute(outputFormatClass); Class<? extends OutputFormat> outputFormat = (Class<? extends OutputFormat>) loader .loadClass(outputFormatClassS); conf.setOutputFormat(outputFormat); } String hdfsHostnameS = environmentSubstitute(hdfsHostname); String hdfsPortS = environmentSubstitute(hdfsPort); String jobTrackerHostnameS = environmentSubstitute(jobTrackerHostname); String jobTrackerPortS = environmentSubstitute(jobTrackerPort); // See if we can auto detect the distribution first HadoopConfigurer configurer = HadoopConfigurerFactory.locateConfigurer(); if (configurer == null) { // go with what has been selected by the user configurer = HadoopConfigurerFactory.getConfigurer(hadoopDistro); // if the user-specified distribution is detectable, make sure it is still // the current distribution! if (configurer != null && configurer.isDetectable()) { if (!configurer.isAvailable()) { throw new KettleException(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.Error.DistroNoLongerPresent", configurer.distributionName())); } } } if (configurer == null) { throw new KettleException(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.Error.UnknownHadoopDistribution", hadoopDistro)); } logBasic(BaseMessages.getString(PKG, "JobEntryHadoopJobExecutor.Message.DistroConfigMessage", configurer.distributionName())); List<String> configMessages = new ArrayList<String>(); configurer.configure(hdfsHostnameS, hdfsPortS, jobTrackerHostnameS, jobTrackerPortS, conf, configMessages); for (String m : configMessages) { logBasic(m); } String inputPathS = environmentSubstitute(inputPath); String[] inputPathParts = inputPathS.split(","); List<Path> paths = new ArrayList<Path>(); for (String path : inputPathParts) { paths.add(new Path(configurer.getFilesystemURL() + path)); } Path[] finalPaths = paths.toArray(new Path[paths.size()]); //FileInputFormat.setInputPaths(conf, new Path(configurer.getFilesystemURL() + inputPathS)); FileInputFormat.setInputPaths(conf, finalPaths); String outputPathS = environmentSubstitute(outputPath); FileOutputFormat.setOutputPath(conf, new Path(configurer.getFilesystemURL() + outputPathS)); // process user defined values for (UserDefinedItem item : userDefined) { if (item.getName() != null && !"".equals(item.getName()) && item.getValue() != null && !"".equals(item.getValue())) { String nameS = environmentSubstitute(item.getName()); String valueS = environmentSubstitute(item.getValue()); conf.set(nameS, valueS); } } String workingDirectoryS = environmentSubstitute(workingDirectory); conf.setWorkingDirectory(new Path(configurer.getFilesystemURL() + workingDirectoryS)); conf.setJar(jarUrl); String numMapTasksS = environmentSubstitute(numMapTasks); String numReduceTasksS = environmentSubstitute(numReduceTasks); int numM = 1; try { numM = Integer.parseInt(numMapTasksS); } catch (NumberFormatException e) { logError("Can't parse number of map tasks '" + numMapTasksS + "'. Setting num" + "map tasks to 1"); } int numR = 1; try { numR = Integer.parseInt(numReduceTasksS); } catch (NumberFormatException e) { logError("Can't parse number of reduce tasks '" + numReduceTasksS + "'. Setting num" + "reduce tasks to 1"); } conf.setNumMapTasks(numM); conf.setNumReduceTasks(numR); JobClient jobClient = new JobClient(conf); RunningJob runningJob = jobClient.submitJob(conf); String loggingIntervalS = environmentSubstitute(loggingInterval); int logIntv = 60; try { logIntv = Integer.parseInt(loggingIntervalS); } catch (NumberFormatException e) { logError("Can't parse logging interval '" + loggingIntervalS + "'. Setting " + "logging interval to 60"); } if (blocking) { try { int taskCompletionEventIndex = 0; while (!parentJob.isStopped() && !runningJob.isComplete()) { if (logIntv >= 1) { printJobStatus(runningJob); taskCompletionEventIndex = logTaskMessages(runningJob, taskCompletionEventIndex); Thread.sleep(logIntv * 1000); } else { Thread.sleep(60000); } } if (parentJob.isStopped() && !runningJob.isComplete()) { // We must stop the job running on Hadoop runningJob.killJob(); // Indicate this job entry did not complete result.setResult(false); } printJobStatus(runningJob); // Log any messages we may have missed while polling logTaskMessages(runningJob, taskCompletionEventIndex); } catch (InterruptedException ie) { logError(ie.getMessage(), ie); } // Entry is successful if the MR job is successful overall result.setResult(runningJob.isSuccessful()); } } } catch (Throwable t) { t.printStackTrace(); result.setStopped(true); result.setNrErrors(1); result.setResult(false); logError(t.getMessage(), t); } if (appender != null) { LogWriter.getInstance().removeAppender(appender); appender.close(); ResultFile resultFile = new ResultFile(ResultFile.FILE_TYPE_LOG, appender.getFile(), parentJob.getJobname(), getName()); result.getResultFiles().put(resultFile.getFile().toString(), resultFile); } return result; }
From source file:org.pooledtimeseries.healthcheck.CheckCartesianProductSeqFile.java
License:Apache License
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException { long start = System.currentTimeMillis(); JobConf conf = new JobConf("Cartesian Product"); String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs(); if (otherArgs.length != 2) { System.err.println("Usage: CheckCartesianProductSeqFile <input sequence file> <out>"); System.exit(1);/*from w ww.j a v a 2 s. c o m*/ } // Configure the join type conf.setJarByClass(CheckCartesianProductSeqFile.class); conf.setMapperClass(CartesianMapper.class); conf.setReducerClass(CartesianReducer.class); conf.setInputFormat(CartesianInputFormat.class); CartesianInputFormat.setLeftInputInfo(conf, SequenceFileInputFormat.class, otherArgs[0]); CartesianInputFormat.setRightInputInfo(conf, SequenceFileInputFormat.class, otherArgs[0]); TextOutputFormat.setOutputPath(conf, new Path(otherArgs[1])); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(IntWritable.class); RunningJob job = JobClient.runJob(conf); while (!job.isComplete()) { Thread.sleep(1000); } long finish = System.currentTimeMillis(); System.out.println("Time in ms: " + (finish - start)); System.exit(job.isSuccessful() ? 0 : 2); }