List of usage examples for org.apache.hadoop.mapred JobConf setJarByClass
public void setJarByClass(Class cls)
From source file:com.yolodata.tbana.hadoop.mapred.csv.CSVInputFormatTest.java
License:Open Source License
public int run(String[] args) throws Exception { getConf().set(CSVLineRecordReader.FORMAT_DELIMITER, "\""); getConf().set(CSVLineRecordReader.FORMAT_SEPARATOR, ","); getConf().setInt(CSVNLineInputFormat.LINES_PER_MAP, 40000); getConf().setBoolean(CSVLineRecordReader.IS_ZIPFILE, false); JobConf jobConf = new JobConf(getConf()); jobConf.setJarByClass(CSVTestRunner.class); jobConf.setNumReduceTasks(0);/*w w w .ja v a 2 s.c om*/ jobConf.setMapperClass(TestMapper.class); jobConf.setInputFormat(CSVNLineInputFormat.class); jobConf.setOutputKeyClass(NullWritable.class); jobConf.setOutputValueClass(Text.class); CSVNLineInputFormat.setInputPaths(jobConf, new Path(args[0])); TextOutputFormat.setOutputPath(jobConf, new Path(args[1])); JobClient.runJob(jobConf); return 0; }
From source file:com.yolodata.tbana.hadoop.mapred.shuttl.TestMapper.java
License:Open Source License
public int run(String[] args) throws Exception { JobConf jobConf = new JobConf(TestConfigurations.getConfigurationWithShuttlSearch()); jobConf.setJarByClass(ShuttlTestJob.class); jobConf.setNumReduceTasks(1);/*from w ww . j a v a 2 s .c om*/ jobConf.setMapperClass(TestMapper.class); jobConf.setReducerClass(TestReducer.class); jobConf.setInputFormat(ShuttlCSVInputFormat.class); jobConf.setOutputKeyClass(LongWritable.class); jobConf.setOutputValueClass(Text.class); ShuttlCSVInputFormat.addInputPath(jobConf, new Path(args[0])); TextOutputFormat.setOutputPath(jobConf, new Path(args[1])); JobClient.runJob(jobConf); return 0; }
From source file:com.yolodata.tbana.hadoop.mapred.splunk.inputformat.TestMapper.java
License:Open Source License
public int run(String[] args) throws Exception { JobConf jobConf = new JobConf(getConf()); jobConf.set(SplunkInputFormat.INPUTFORMAT_MODE, args[0]); jobConf.setJarByClass(SplunkTestRunner.class); jobConf.setNumReduceTasks(1);/*from w w w. j a v a2s .co m*/ jobConf.setMapperClass(TestMapper.class); jobConf.setReducerClass(TestReducer.class); jobConf.setInputFormat(SplunkInputFormat.class); jobConf.setOutputKeyClass(LongWritable.class); jobConf.setOutputValueClass(Text.class); TextOutputFormat.setOutputPath(jobConf, new Path(args[1])); JobClient.runJob(jobConf); return 0; }
From source file:com.zjy.mongo.util.MongoTool.java
License:Apache License
private int runMapredJob(final Configuration conf) { final JobConf job = new JobConf(conf, getClass()); /**/*from ww w.j a v a 2 s .c o m*/ * Any arguments specified with -D <property>=<value> * on the CLI will be picked up and set here * They override any XML level values * Note that -D<space> is important - no space will * not work as it gets picked up by Java itself */ // TODO - Do we need to set job name somehow more specifically? // This may or may not be correct/sane job.setJarByClass(getClass()); final Class<? extends org.apache.hadoop.mapred.Mapper> mapper = MapredMongoConfigUtil.getMapper(conf); if (LOG.isDebugEnabled()) { LOG.debug("Mapper Class: " + mapper); LOG.debug("Input URI: " + conf.get(MapredMongoConfigUtil.INPUT_URI)); } job.setMapperClass(mapper); Class<? extends org.apache.hadoop.mapred.Reducer> combiner = MapredMongoConfigUtil.getCombiner(conf); if (combiner != null) { job.setCombinerClass(combiner); } job.setReducerClass(MapredMongoConfigUtil.getReducer(conf)); job.setOutputFormat(MapredMongoConfigUtil.getOutputFormat(conf)); job.setOutputKeyClass(MapredMongoConfigUtil.getOutputKey(conf)); job.setOutputValueClass(MapredMongoConfigUtil.getOutputValue(conf)); job.setInputFormat(MapredMongoConfigUtil.getInputFormat(conf)); Class mapOutputKeyClass = MapredMongoConfigUtil.getMapperOutputKey(conf); Class mapOutputValueClass = MapredMongoConfigUtil.getMapperOutputValue(conf); if (mapOutputKeyClass != null) { job.setMapOutputKeyClass(mapOutputKeyClass); } if (mapOutputValueClass != null) { job.setMapOutputValueClass(mapOutputValueClass); } /** * Determines if the job will run verbosely e.g. print debug output * Only works with foreground jobs */ final boolean verbose = MapredMongoConfigUtil.isJobVerbose(conf); /** * Run job in foreground aka wait for completion or background? */ final boolean background = MapredMongoConfigUtil.isJobBackground(conf); try { RunningJob runningJob = JobClient.runJob(job); if (background) { LOG.info("Setting up and running MapReduce job in background."); return 0; } else { LOG.info("Setting up and running MapReduce job in foreground, will wait for results. {Verbose? " + verbose + "}"); runningJob.waitForCompletion(); return 0; } } catch (final Exception e) { LOG.error("Exception while executing job... ", e); return 1; } }
From source file:de.l3s.streamcorpus.mapreduce.TerrierIndexing.java
License:Mozilla Public License
/** Starts the MapReduce indexing. * @param args/* w w w. j a v a2s . com*/ * @throws Exception */ public int run(String[] args) throws Exception { long time = System.currentTimeMillis(); // For the moment: Hard-code the terrier home to quick test System.setProperty("terrier.home", "/home/tuan.tran/executable/StreamCorpusIndexer"); boolean docPartitioned = false; int numberOfReducers = Integer .parseInt(ApplicationSetup.getProperty("terrier.hadoop.indexing.reducers", "26")); final HadoopPlugin.JobFactory jf = HadoopPlugin.getJobFactory("HOD-TerrierIndexing"); if (args.length == 2 && args[0].equals("-p")) { logger.debug("Document-partitioned Mode, " + numberOfReducers + " output indices."); numberOfReducers = Integer.parseInt(args[1]); docPartitioned = true; } else if (args.length == 1 && args[0].equals("--merge")) { if (numberOfReducers > 1) mergeLexiconInvertedFiles(ApplicationSetup.TERRIER_INDEX_PATH, numberOfReducers); else logger.error("No point merging 1 reduce task output"); return 0; } else if (args.length == 0) { logger.debug("Term-partitioned Mode, " + numberOfReducers + " reducers creating one inverted index."); docPartitioned = false; if (numberOfReducers > MAX_REDUCE) { logger.warn("Excessive reduce tasks (" + numberOfReducers + ") in use " + "- SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm can use " + MAX_REDUCE + " at most"); } } /*else { logger.fatal(usage()); return 0; }*/ if (!(CompressionFactory.getCompressionConfiguration("inverted", new String[0], false) instanceof BitCompressionConfiguration)) { logger.error("Sorry, only default BitCompressionConfiguration is supported by HadoopIndexing" + " - you can recompress the inverted index later using IndexRecompressor"); return 0; } if (jf == null) throw new Exception("Could not get JobFactory from HadoopPlugin"); final JobConf conf = jf.newJob(); conf.setJarByClass(TerrierIndexing.class); conf.setJobName("StreamCorpusIndexer: Terrier Indexing"); if (Files.exists(ApplicationSetup.TERRIER_INDEX_PATH) && Index.existsIndex(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX)) { logger.fatal("Cannot index while index exists at " + ApplicationSetup.TERRIER_INDEX_PATH + "," + ApplicationSetup.TERRIER_INDEX_PREFIX); return 0; } // boolean blockIndexing = ApplicationSetup.BLOCK_INDEXING; boolean blockIndexing = true; if (blockIndexing) { conf.setMapperClass(Hadoop_BlockSinglePassIndexer.class); conf.setReducerClass(Hadoop_BlockSinglePassIndexer.class); } else { conf.setMapperClass(Hadoop_BasicSinglePassIndexer.class); conf.setReducerClass(Hadoop_BasicSinglePassIndexer.class); } FileOutputFormat.setOutputPath(conf, new Path(ApplicationSetup.TERRIER_INDEX_PATH)); conf.set("indexing.hadoop.prefix", ApplicationSetup.TERRIER_INDEX_PREFIX); conf.setMapOutputKeyClass(SplitEmittedTerm.class); conf.setMapOutputValueClass(MapEmittedPostingList.class); conf.setBoolean("indexing.hadoop.multiple.indices", docPartitioned); if (!conf.get("mapred.job.tracker").equals("local")) { conf.setMapOutputCompressorClass(GzipCodec.class); conf.setCompressMapOutput(true); } else { conf.setCompressMapOutput(false); } conf.setInputFormat(MultiFileCollectionInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setOutputKeyComparatorClass(SplitEmittedTerm.SETRawComparatorTermSplitFlush.class); conf.setOutputValueGroupingComparator(SplitEmittedTerm.SETRawComparatorTerm.class); conf.setReduceSpeculativeExecution(false); //parse the collection.spec BufferedReader specBR = Files.openFileReader(ApplicationSetup.COLLECTION_SPEC); String line = null; List<Path> paths = new ArrayList<Path>(); while ((line = specBR.readLine()) != null) { if (line.startsWith("#")) continue; paths.add(new Path(line)); } specBR.close(); FileInputFormat.setInputPaths(conf, paths.toArray(new Path[paths.size()])); // not sure if this is effective in YARN conf.setNumMapTasks(2000); // increase the heap usage conf.set("mapreduce.map.memory.mb", "6100"); conf.set("mapred.job.map.memory.mb", "6100"); conf.set("mapreduce.reduce.memory.mb", "6144"); conf.set("mapred.job.reduce.memory.mb", "6144"); conf.set("mapreduce.map.java.opts", "-Xmx6100m"); conf.set("mapred.map.child.java.opts", "-Xmx6100m"); conf.set("mapreduce.reduce.java.opts", "-Xmx6144m"); conf.set("mapred.reduce.child.opts", "-Xmx6144m"); //conf.setBoolean("mapred.used.genericoptionsparser", true) ; // This is the nasty thing in MapReduce v2 and YARN: They always prefer their ancient jars first. Set this on to say you don't like it conf.set("mapreduce.job.user.classpath.first", "true"); // increase the yarn memory to 10 GB conf.set("yarn.nodemanager.resource.memory-mb", "12288"); conf.set("yarn.nodemanager.resource.cpu-vcores", "16"); conf.set("yarn.scheduler.minimum-allocation-mb", "4096"); conf.setNumReduceTasks(numberOfReducers); if (numberOfReducers > 1) { if (docPartitioned) conf.setPartitionerClass(SplitEmittedTerm.SETPartitioner.class); else conf.setPartitionerClass(SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm.class); } else { //for JUnit tests, we seem to need to restore the original partitioner class conf.setPartitionerClass(HashPartitioner.class); } /*JobID jobId = null; boolean ranOK = true; try{ RunningJob rj = JobClient.runJob(conf); jobId = rj.getID(); HadoopUtility.finishTerrierJob(conf); } catch (Exception e) { logger.error("Problem running job", e); e.printStackTrace(); ranOK = false; } if (jobId != null) { deleteTaskFiles(ApplicationSetup.TERRIER_INDEX_PATH, jobId); } */ //if (ranOK) //{ System.out.println("Merging indices"); if (!docPartitioned) { if (numberOfReducers > 1) mergeLexiconInvertedFiles(ApplicationSetup.TERRIER_INDEX_PATH, numberOfReducers); } Hadoop_BasicSinglePassIndexer.finish(ApplicationSetup.TERRIER_INDEX_PATH, docPartitioned ? numberOfReducers : 1, jf); //} System.out.println("Time Taken = " + ((System.currentTimeMillis() - time) / 1000) + " seconds"); jf.close(); return 0; }
From source file:de.l3s.streamcorpus.StreamCorpusIndexing.java
License:Mozilla Public License
/** Starts the MapReduce indexing. * @param args//from www. ja v a2 s. co m * @throws Exception */ public int run(String[] args) throws Exception { long time = System.currentTimeMillis(); // For the moment: Hard-code the terrier home to quick test System.setProperty("terrier.home", "/home/tuan.tran/executable/StreamCorpusIndexer"); boolean docPartitioned = false; int numberOfReducers = Integer .parseInt(ApplicationSetup.getProperty("terrier.hadoop.indexing.reducers", "26")); final HadoopPlugin.JobFactory jf = HadoopPlugin.getJobFactory("HOD-TerrierIndexing"); if (args.length == 2 && args[0].equals("-p")) { logger.debug("Document-partitioned Mode, " + numberOfReducers + " output indices."); numberOfReducers = Integer.parseInt(args[1]); docPartitioned = true; } else if (args.length == 1 && args[0].equals("--merge")) { if (numberOfReducers > 1) mergeLexiconInvertedFiles(ApplicationSetup.TERRIER_INDEX_PATH, numberOfReducers); else logger.error("No point merging 1 reduce task output"); return 0; } else if (args.length == 0) { logger.debug("Term-partitioned Mode, " + numberOfReducers + " reducers creating one inverted index."); docPartitioned = false; if (numberOfReducers > MAX_REDUCE) { logger.warn("Excessive reduce tasks (" + numberOfReducers + ") in use " + "- SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm can use " + MAX_REDUCE + " at most"); } } /*else { logger.fatal(usage()); return 0; }*/ if (!(CompressionFactory.getCompressionConfiguration("inverted", new String[0], false) instanceof BitCompressionConfiguration)) { logger.error("Sorry, only default BitCompressionConfiguration is supported by HadoopIndexing" + " - you can recompress the inverted index later using IndexRecompressor"); return 0; } if (jf == null) throw new Exception("Could not get JobFactory from HadoopPlugin"); final JobConf conf = jf.newJob(); conf.setJarByClass(StreamCorpusIndexing.class); conf.setJobName("StreamCorpusIndexer: Terrier Indexing"); if (Files.exists(ApplicationSetup.TERRIER_INDEX_PATH) && Index.existsIndex(ApplicationSetup.TERRIER_INDEX_PATH, ApplicationSetup.TERRIER_INDEX_PREFIX)) { logger.fatal("Cannot index while index exists at " + ApplicationSetup.TERRIER_INDEX_PATH + "," + ApplicationSetup.TERRIER_INDEX_PREFIX); return 0; } // boolean blockIndexing = ApplicationSetup.BLOCK_INDEXING; boolean blockIndexing = true; if (blockIndexing) { conf.setMapperClass(Hadoop_BlockSinglePassIndexer.class); conf.setReducerClass(Hadoop_BlockSinglePassIndexer.class); } else { conf.setMapperClass(Hadoop_BasicSinglePassIndexer.class); conf.setReducerClass(Hadoop_BasicSinglePassIndexer.class); } FileOutputFormat.setOutputPath(conf, new Path(ApplicationSetup.TERRIER_INDEX_PATH)); conf.set("indexing.hadoop.prefix", ApplicationSetup.TERRIER_INDEX_PREFIX); conf.setMapOutputKeyClass(SplitEmittedTerm.class); conf.setMapOutputValueClass(MapEmittedPostingList.class); conf.setBoolean("indexing.hadoop.multiple.indices", docPartitioned); if (!conf.get("mapred.job.tracker").equals("local")) { conf.setMapOutputCompressorClass(GzipCodec.class); conf.setCompressMapOutput(true); } else { conf.setCompressMapOutput(false); } conf.setInputFormat(MultiFileCollectionInputFormat.class); conf.setOutputFormat(NullOutputFormat.class); conf.setOutputKeyComparatorClass(SplitEmittedTerm.SETRawComparatorTermSplitFlush.class); conf.setOutputValueGroupingComparator(SplitEmittedTerm.SETRawComparatorTerm.class); conf.setReduceSpeculativeExecution(false); //parse the collection.spec BufferedReader specBR = Files.openFileReader(ApplicationSetup.COLLECTION_SPEC); String line = null; List<Path> paths = new ArrayList<Path>(); while ((line = specBR.readLine()) != null) { if (line.startsWith("#")) continue; paths.add(new Path(line)); } specBR.close(); FileInputFormat.setInputPaths(conf, paths.toArray(new Path[paths.size()])); // not sure if this is effective in YARN conf.setNumMapTasks(2000); // increase the heap usage conf.set("mapreduce.map.memory.mb", "6100"); conf.set("mapred.job.map.memory.mb", "6100"); conf.set("mapreduce.reduce.memory.mb", "6144"); conf.set("mapred.job.reduce.memory.mb", "6144"); conf.set("mapreduce.map.java.opts", "-Xmx6100m"); conf.set("mapred.map.child.java.opts", "-Xmx6100m"); conf.set("mapreduce.reduce.java.opts", "-Xmx6144m"); conf.set("mapred.reduce.child.opts", "-Xmx6144m"); //conf.setBoolean("mapred.used.genericoptionsparser", true) ; // This is the nasty thing in MapReduce v2 and YARN: They always prefer their ancient jars first. Set this on to say you don't like it conf.set("mapreduce.job.user.classpath.first", "true"); // increase the yarn memory to 10 GB conf.set("yarn.nodemanager.resource.memory-mb", "12288"); conf.set("yarn.nodemanager.resource.cpu-vcores", "16"); conf.set("yarn.scheduler.minimum-allocation-mb", "4096"); conf.setNumReduceTasks(numberOfReducers); if (numberOfReducers > 1) { if (docPartitioned) conf.setPartitionerClass(SplitEmittedTerm.SETPartitioner.class); else conf.setPartitionerClass(SplitEmittedTerm.SETPartitionerLowercaseAlphaTerm.class); } else { //for JUnit tests, we seem to need to restore the original partitioner class conf.setPartitionerClass(HashPartitioner.class); } /*JobID jobId = null; boolean ranOK = true; try{ RunningJob rj = JobClient.runJob(conf); jobId = rj.getID(); HadoopUtility.finishTerrierJob(conf); } catch (Exception e) { logger.error("Problem running job", e); e.printStackTrace(); ranOK = false; } if (jobId != null) { deleteTaskFiles(ApplicationSetup.TERRIER_INDEX_PATH, jobId); } */ //if (ranOK) //{ System.out.println("Merging indices"); if (!docPartitioned) { if (numberOfReducers > 1) mergeLexiconInvertedFiles(ApplicationSetup.TERRIER_INDEX_PATH, numberOfReducers); } Hadoop_BasicSinglePassIndexer.finish(ApplicationSetup.TERRIER_INDEX_PATH, docPartitioned ? numberOfReducers : 1, jf); //} System.out.println("Time Taken = " + ((System.currentTimeMillis() - time) / 1000) + " seconds"); jf.close(); return 0; }
From source file:edu.ohsu.sonmezsysbio.cloudbreak.command.CommandNovoalignSingleEnds.java
public void runHadoopJob(Configuration configuration) throws IOException, URISyntaxException { JobConf conf = new JobConf(configuration); conf.setJobName("Single End Alignment"); conf.setJarByClass(Cloudbreak.class); FileInputFormat.addInputPath(conf, new Path(hdfsDataDir)); Path outputDir = new Path(hdfsAlignmentsDir); FileSystem.get(conf).delete(outputDir); FileOutputFormat.setOutputPath(conf, outputDir); addDistributedCacheFile(conf, reference, "novoalign.reference"); addDistributedCacheFile(conf, pathToNovoalign, "novoalign.executable"); if (pathToNovoalignLicense != null) { addDistributedCacheFile(conf, pathToNovoalignLicense, "novoalign.license"); }/*w ww. j a v a2 s.c o m*/ DistributedCache.createSymlink(conf); conf.set("mapred.task.timeout", "3600000"); conf.set("novoalign.threshold", threshold); conf.set("novoalign.quality.format", qualityFormat); conf.setInputFormat(SequenceFileInputFormat.class); conf.setMapperClass(NovoalignSingleEndMapper.class); conf.setMapOutputKeyClass(Text.class); conf.setMapOutputValueClass(Text.class); conf.setCompressMapOutput(true); conf.setReducerClass(SingleEndAlignmentsToPairsReducer.class); conf.setOutputFormat(SequenceFileOutputFormat.class); conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.set("mapred.output.compress", "true"); conf.set("mapred.output.compression", "org.apache.hadoop.io.compress.SnappyCodec"); JobClient.runJob(conf); }
From source file:edu.ub.ahstfg.indexer.Indexer.java
License:Open Source License
@Override public int run(String[] arg0) throws Exception { LOG.info("Creating Hadoop job for Indexer."); JobConf job = new JobConf(getConf()); job.setJarByClass(Indexer.class); LOG.info("Setting input path to '" + INPUT_PATH + "'"); FileInputFormat.setInputPaths(job, new Path(INPUT_PATH)); // Set filters if it's necessary. LOG.info("Clearing the output path at '" + OUTPUT_PATH + "'"); // Change URI to Path if it's necessary. FileSystem fs = FileSystem.get(new URI(OUTPUT_PATH), job); if (fs.exists(new Path(OUTPUT_PATH))) { fs.delete(new Path(OUTPUT_PATH), true); }/*from w w w. j a v a 2 s . com*/ LOG.info("Setting output path to '" + OUTPUT_PATH + "'"); FileOutputFormat.setOutputPath(job, new Path(OUTPUT_PATH)); FileOutputFormat.setCompressOutput(job, false); LOG.info("Setting input format."); job.setInputFormat(ArcInputFormat.class); LOG.info("Setting output format."); job.setOutputFormat(IndexOutputFormat.class); LOG.info("Setting output data types."); job.setOutputKeyClass(Text.class); job.setOutputValueClass(IndexRecord.class); LOG.info("Setting mapper and reducer."); job.setMapperClass(IndexerMapper.class); job.setMapOutputValueClass(ParsedDocument.class); job.setReducerClass(IndexerReducer.class); if (JobClient.runJob(job).isSuccessful()) { return 0; } else { return 1; } }
From source file:edu.ub.ahstfg.indexer.wordcount.WordCount.java
License:Open Source License
@Override public int run(String[] args) throws Exception { LOG.info("Creating Hadoop job for ARC input files word count."); JobConf job = new JobConf(getConf()); job.setJarByClass(WordCount.class); LOG.info("Setting input path to '" + inputPath + "'"); FileInputFormat.setInputPaths(job, new Path(inputPath)); // Set filters if it's necessary. LOG.info("Clearing the output path at '" + outputPath + "'"); // Change URI to Path if it's necessary. FileSystem fs = FileSystem.get(new URI(outputPath), job); if (fs.exists(new Path(outputPath))) { fs.delete(new Path(outputPath), true); }/*from w w w .j a va 2 s .c o m*/ LOG.info("Setting output path to '" + outputPath + "'"); FileOutputFormat.setOutputPath(job, new Path(outputPath)); FileOutputFormat.setCompressOutput(job, false); LOG.info("Setting input format."); // job.setInputFormat(TextInputFormat.class); job.setInputFormat(ArcInputFormat.class); LOG.info("Setting output format."); job.setOutputFormat(TextOutputFormat.class); LOG.info("Setting output data types."); job.setOutputKeyClass(Text.class); job.setOutputValueClass(LongWritable.class); LOG.info("Setting mapper and reducer."); // job.setMapperClass(WordCountTextInputMapper.class); job.setMapperClass(WordCountArcInputMapper.class); job.setReducerClass(LongSumReducer.class); if (JobClient.runJob(job).isSuccessful()) { return 0; } else { return 1; } }
From source file:edu.uci.ics.fuzzyjoin.hadoop.FuzzyJoinDriver.java
License:Apache License
public static void run(JobConf job) throws IOException { job.setJarByClass(FuzzyJoinDriver.class); ///*from www. jav a 2 s . c om*/ // print info // String ret = "FuzzyJoinDriver(" + job.getJobName() + ")\n" + " Input Path: {"; Path inputs[] = FileInputFormat.getInputPaths(job); for (int ctr = 0; ctr < inputs.length; ctr++) { if (ctr > 0) { ret += "\n "; } ret += inputs[ctr].toString(); } ret += "}\n"; ret += " Output Path: " + FileOutputFormat.getOutputPath(job) + "\n" + " Map Jobs: " + job.getNumMapTasks() + "\n" + " Reduce Jobs: " + job.getNumReduceTasks() + "\n" + " Properties: {"; String[][] properties = new String[][] { new String[] { FuzzyJoinConfig.SIMILARITY_NAME_PROPERTY, FuzzyJoinConfig.SIMILARITY_NAME_VALUE }, new String[] { FuzzyJoinConfig.SIMILARITY_THRESHOLD_PROPERTY, "" + FuzzyJoinConfig.SIMILARITY_THRESHOLD_VALUE }, new String[] { FuzzyJoinConfig.TOKENIZER_PROPERTY, FuzzyJoinConfig.TOKENIZER_VALUE }, new String[] { TOKENS_PACKAGE_PROPERTY, TOKENS_PACKAGE_VALUE }, new String[] { TOKENS_LENGTHSTATS_PROPERTY, "" + TOKENS_LENGTHSTATS_VALUE }, new String[] { RIDPAIRS_GROUP_CLASS_PROPERTY, RIDPAIRS_GROUP_CLASS_VALUE }, new String[] { RIDPAIRS_GROUP_FACTOR_PROPERTY, "" + RIDPAIRS_GROUP_FACTOR_VALUE }, new String[] { FuzzyJoinConfig.DATA_TOKENS_PROPERTY, "" }, new String[] { DATA_JOININDEX_PROPERTY, "" }, }; for (int crt = 0; crt < properties.length; crt++) { if (crt > 0) { ret += "\n "; } ret += properties[crt][0] + "=" + job.get(properties[crt][0], properties[crt][1]); } ret += "}"; System.out.println(ret); // // run job // Date startTime = new Date(); System.out.println("Job started: " + startTime); JobClient.runJob(job); Date end_time = new Date(); System.out.println("Job ended: " + end_time); System.out.println( "The job took " + (end_time.getTime() - startTime.getTime()) / (float) 1000.0 + " seconds."); }