List of usage examples for org.apache.hadoop.mapreduce Job isComplete
public boolean isComplete() throws IOException
From source file:BU.MET.CS755.SpeciesIterDriver2.java
static boolean MRSpeciesRank(String args[], int iterCnt) { long newCounterVal = 0; long totalLinks = 1; // Initialize to 1 to prevent divide by zero long totalIterations = 0; Job theJob = null; conf = new JobConf(SpeciesIterDriver2.class); conf.setJobName("Species Iter"); conf.setNumReduceTasks(5);/*from w ww . ja v a 2s . c om*/ conf.setOutputKeyClass(Text.class); conf.setOutputValueClass(Text.class); conf.setMapperClass(SpeciesIterMapper2.class); conf.setReducerClass(SpeciesIterReducer2.class); boolean nextIterationNeeded = true; while (nextIterationNeeded || numExtraIterations != 0) { long iterationNumber = 0; if ((iterCnt == 0) || (iterCnt == 1)) { inputpath = args[1] + "0"; } else { inputpath = args[1] + iterCnt; } iterCnt++; conf.set("iterationNumber", Integer.toString(iterCnt)); conf.set("totalLinks", Long.toString(totalLinks)); outputpath = args[1] + iterCnt; FileInputFormat.setInputPaths(conf, new Path(inputpath)); FileOutputFormat.setOutputPath(conf, new Path(outputpath)); try { theJob = new Job(conf, "SpeciesIter"); } catch (Exception e) { e.printStackTrace(); } try { if (theJob != null) { theJob.waitForCompletion(true); } } catch (Exception e) { e.printStackTrace(); } try { if (theJob.isComplete()) { Counters jobCtrs = theJob.getCounters(); if (jobCtrs != null) { newCounterVal = jobCtrs.findCounter(ITERATION_COUNTER.ITERATIONS_NEEDED).getValue(); } // If reducer recorded change in species rank, repeat iteration. if ((newCounterVal > 0) || (iterCnt == 1)) { nextIterationNeeded = true; } else { nextIterationNeeded = false; numExtraIterations--; // Do one extra iteration } totalLinks = jobCtrs.findCounter(BU.MET.CS755.SpeciesIterDriver2.ITERATION_COUNTER.TOTAL_LINKS) .getValue(); } totalIterations += 1; if (totalIterations > 200) { System.out.println("too many iterations!!"); } } catch (Exception e) { e.printStackTrace(); } } System.out.println("Total iterations = " + totalIterations); return true; }
From source file:co.nubetech.hiho.dedup.DedupJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { Configuration conf = getConf(); populateConfiguration(args);//from w ww. j a va2 s . c o m try { checkMandatoryConfs(); } catch (HIHOException e1) { e1.printStackTrace(); throw new Exception(e1); } Job job = new Job(conf); job.setJobName("Dedup job"); job.setJarByClass(DedupJob.class); Class inputFormatClass = Class.forName(inputFormat); Class outputFormatClass = Class.forName(outputFormat); Class inputKeyClass = Class.forName(inputKeyClassName); Class inputValueClass = Class.forName(inputValueClassName); if (dedupBy.equals("key")) { job.setMapperClass(DedupKeyMapper.class); job.setReducerClass(DedupKeyReducer.class); job.setMapOutputValueClass(inputValueClass); } else if (dedupBy.equals("value")) { job.setMapperClass(DedupValueMapper.class); job.setReducerClass(DedupValueReducer.class); job.setMapOutputValueClass(inputKeyClass); } job.setInputFormatClass(inputFormatClass); if (inputFormat.equals("co.nubetech.hiho.dedup.DelimitedTextInputFormat")) { DelimitedTextInputFormat.setProperties(job, delimiter, column); } job.setMapOutputKeyClass(HihoTuple.class); job.setOutputKeyClass(inputKeyClass); job.setOutputValueClass(inputValueClass); job.setPartitionerClass(HihoHashPartitioner.class); FileInputFormat.setInputPaths(job, inputPath); job.setOutputFormatClass(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(outputPath)); try { logger.debug("Output format class is " + job.getOutputFormatClass()); logger.debug("Class is " + ReflectionUtils .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName()); job.waitForCompletion(false); if (job.isComplete()) { Counters counters = job.getCounters(); totalRecordsRead = counters.findCounter(DedupRecordCounter.TOTAL_RECORDS_READ).getValue(); badRecords = counters.findCounter(DedupRecordCounter.BAD_RECORD).getValue(); output = counters.findCounter(DedupRecordCounter.OUTPUT).getValue(); duplicateRecords = totalRecordsRead - output; logger.info("Total records read are: " + totalRecordsRead); logger.info("Bad Records are: " + badRecords); logger.info("Output records are: " + output); logger.info("Duplicate records are: " + duplicateRecords); } } catch (Exception e) { e.printStackTrace(); } return 0; }
From source file:co.nubetech.hiho.merge.MergeJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { populateConfiguration(args);//from w w w . j a v a2 s .c o m try { checkMandatoryConfs(); } catch (HIHOException e1) { e1.printStackTrace(); throw new Exception(e1); } Class inputFormatClass = Class.forName(inputFormat); Class outputFormatClass = Class.forName(outputFormat); Class inputKeyClass = Class.forName(inputKeyClassName); Class inputValueClass = Class.forName(inputValueClassName); Configuration conf = getConf(); conf.set(HIHOConf.MERGE_OLD_PATH, oldPath); conf.set(HIHOConf.MERGE_NEW_PATH, newPath); Job job = new Job(conf); job.setJobName("Merge job"); job.setJarByClass(MergeJob.class); if (mergeBy.equals("key")) { job.setMapperClass(MergeKeyMapper.class); job.setReducerClass(MergeKeyReducer.class); } else if (mergeBy.equals("value")) { job.setMapperClass(MergeValueMapper.class); job.setReducerClass(MergeValueReducer.class); } job.setInputFormatClass(inputFormatClass); DelimitedTextInputFormat.setProperties(job, delimiter, column); job.setMapOutputKeyClass(HihoTuple.class); job.setMapOutputValueClass(HihoValue.class); job.setOutputKeyClass(inputKeyClass); job.setOutputValueClass(inputValueClass); FileInputFormat.setInputPaths(job, oldPath + "," + newPath); job.setOutputFormatClass(outputFormatClass); FileOutputFormat.setOutputPath(job, new Path(outputPath)); try { logger.debug("Output format class is " + job.getOutputFormatClass()); logger.debug("Class is " + ReflectionUtils .newInstance(job.getOutputFormatClass(), job.getConfiguration()).getClass().getName()); job.waitForCompletion(false); if (job.isComplete()) { Counters counters = job.getCounters(); totalRecordsOld = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_OLD).getValue(); totalRecordsNew = counters.findCounter(MergeRecordCounter.TOTAL_RECORDS_NEW).getValue(); badRecords = counters.findCounter(MergeRecordCounter.BAD_RECORD).getValue(); output = counters.findCounter(MergeRecordCounter.OUTPUT).getValue(); logger.info("Total old records read are: " + totalRecordsOld); logger.info("Total new records read are: " + totalRecordsNew); logger.info("Bad Records are: " + badRecords); logger.info("Output records are: " + output); } } catch (Exception e) { e.printStackTrace(); } return 0; }
From source file:com.ikanow.aleph2.analytics.hadoop.assets.VerySimpleLocalExample.java
License:Apache License
@SuppressWarnings({ "deprecation", "unchecked", "rawtypes" })
@Test// w ww . j a v a2s.c om
public void test_localHadoopLaunch()
throws IOException, IllegalStateException, ClassNotFoundException, InterruptedException {
// 0) Setup the temp dir
final String temp_dir = System.getProperty("java.io.tmpdir") + File.separator;
//final Path tmp_path = FileContext.getLocalFSFileContext().makeQualified(new Path(temp_dir));
final Path tmp_path2 = FileContext.getLocalFSFileContext()
.makeQualified(new Path(temp_dir + "/tmp_output"));
try {
FileContext.getLocalFSFileContext().delete(tmp_path2, true);
} catch (Exception e) {
} // (just doesn't exist yet)
// 1) Setup config with local mode
final Configuration config = new Configuration();
config.setBoolean("mapred.used.genericoptionsparser", true); // (just stops an annoying warning from appearing)
config.set("fs.file.impl", "org.apache.hadoop.fs.LocalFileSystem");
config.set("mapred.job.tracker", "local");
config.set("fs.defaultFS", "local");
config.unset("mapreduce.framework.name");
// If running locally, turn "snappy" off - tomcat isn't pointing its native library path in the right place
config.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec");
// 2) Build job and do more setup using the Job API
//TODO: not sure why this is deprecated, it doesn't seem to be in v1? We do need to move to JobConf at some point, but I ran into some
// issues when trying to do everything I needed to for V1, so seems expedient to start here and migrate away
final Job hj = new Job(config); // (NOTE: from here, changes to config are ignored)
// Input format:
//TOOD: fails because of guava issue, looks like we'll need to move to 2.7 and check it works with 2.5.x server?
//TextInputFormat.addInputPath(hj, tmp_path);
//hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName ("org.apache.hadoop.mapreduce.lib.input.TextInputFormat"));
hj.setInputFormatClass(TestInputFormat.class);
// Output format:
hj.setOutputFormatClass((Class<? extends OutputFormat>) Class
.forName("org.apache.hadoop.mapreduce.lib.output.TextOutputFormat"));
TextOutputFormat.setOutputPath(hj, tmp_path2);
// Mapper etc (combiner/reducer are similar)
hj.setMapperClass(TestMapper.class);
hj.setOutputKeyClass(Text.class);
hj.setOutputValueClass(Text.class);
hj.setNumReduceTasks(0); // (disable reducer for now)
hj.setJar("test");
try {
hj.submit();
} catch (UnsatisfiedLinkError e) {
throw new RuntimeException(
"This is a windows/hadoop compatibility problem - adding the hadoop-commons in the misc_test_assets subdirectory to the top of the classpath should resolve it (and does in V1), though I haven't yet made that work with Aleph2",
e);
}
//hj.getJobID().toString();
while (!hj.isComplete()) {
Thread.sleep(1000);
}
assertTrue("Finished successfully", hj.isSuccessful());
}
From source file:com.ikanow.infinit.e.processing.custom.launcher.CustomHadoopTaskLauncher.java
License:Open Source License
@SuppressWarnings({ "unchecked", "rawtypes" })
public String runHadoopJob(CustomMapReduceJobPojo job, String tempJarLocation)
throws IOException, SAXException, ParserConfigurationException {
StringWriter xml = new StringWriter();
String outputCollection = job.outputCollectionTemp;// (non-append mode)
if ((null != job.appendResults) && job.appendResults)
outputCollection = job.outputCollection; // (append mode, write directly in....)
else if (null != job.incrementalMode)
job.incrementalMode = false; // (not allowed to be in incremental mode and not update mode)
createConfigXML(xml, job.jobtitle, job.inputCollection,
InfiniteHadoopUtils.getQueryOrProcessing(job.query, InfiniteHadoopUtils.QuerySpec.INPUTFIELDS),
job.isCustomTable, job.getOutputDatabase(), job._id.toString(), outputCollection, job.mapper,
job.reducer, job.combiner,/* w ww . j av a 2s . c o m*/
InfiniteHadoopUtils.getQueryOrProcessing(job.query, InfiniteHadoopUtils.QuerySpec.QUERY),
job.communityIds, job.outputKey, job.outputValue, job.arguments, job.incrementalMode,
job.submitterID, job.selfMerge, job.outputCollection, job.appendResults);
ClassLoader savedClassLoader = Thread.currentThread().getContextClassLoader();
URLClassLoader child = new URLClassLoader(new URL[] { new File(tempJarLocation).toURI().toURL() },
savedClassLoader);
Thread.currentThread().setContextClassLoader(child);
// Check version: for now, any infinit.e.data_model with an VersionTest class is acceptable
boolean dataModelLoaded = true;
try {
URLClassLoader versionTest = new URLClassLoader(new URL[] { new File(tempJarLocation).toURI().toURL() },
null);
try {
Class.forName("com.ikanow.infinit.e.data_model.custom.InfiniteMongoInputFormat", true, versionTest);
} catch (ClassNotFoundException e2) {
//(this is fine, will use the cached version)
dataModelLoaded = false;
}
if (dataModelLoaded)
Class.forName("com.ikanow.infinit.e.data_model.custom.InfiniteMongoVersionTest", true, versionTest);
} catch (ClassNotFoundException e1) {
throw new RuntimeException(
"This JAR is compiled with too old a version of the data-model, please recompile with Jan 2014 (rc2) onwards");
}
// Now load the XML into a configuration object:
Configuration config = new Configuration();
// Add the client configuration overrides:
if (!bLocalMode) {
String hadoopConfigPath = props_custom.getHadoopConfigPath() + "/hadoop/";
config.addResource(new Path(hadoopConfigPath + "core-site.xml"));
config.addResource(new Path(hadoopConfigPath + "mapred-site.xml"));
config.addResource(new Path(hadoopConfigPath + "hadoop-site.xml"));
} //TESTED
try {
DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
Document doc = dBuilder.parse(new ByteArrayInputStream(xml.toString().getBytes()));
NodeList nList = doc.getElementsByTagName("property");
for (int temp = 0; temp < nList.getLength(); temp++) {
Node nNode = nList.item(temp);
if (nNode.getNodeType() == Node.ELEMENT_NODE) {
Element eElement = (Element) nNode;
String name = getTagValue("name", eElement);
String value = getTagValue("value", eElement);
if ((null != name) && (null != value)) {
config.set(name, value);
}
}
}
} catch (Exception e) {
throw new IOException(e.getMessage());
}
// Some other config defaults:
// (not sure if these are actually applied, or derived from the defaults - for some reason they don't appear in CDH's client config)
config.set("mapred.map.tasks.speculative.execution", "false");
config.set("mapred.reduce.tasks.speculative.execution", "false");
// (default security is ignored here, have it set via HADOOP_TASKTRACKER_CONF in cloudera)
// Now run the JAR file
try {
BasicDBObject advancedConfigurationDbo = null;
try {
advancedConfigurationDbo = (null != job.query)
? ((BasicDBObject) com.mongodb.util.JSON.parse(job.query))
: (new BasicDBObject());
} catch (Exception e) {
advancedConfigurationDbo = new BasicDBObject();
}
boolean esMode = advancedConfigurationDbo.containsField("qt") && !job.isCustomTable;
if (esMode && !job.inputCollection.equals("doc_metadata.metadata")) {
throw new RuntimeException(
"Infinit.e Queries are only supported on doc_metadata - use MongoDB queries instead.");
}
config.setBoolean("mapred.used.genericoptionsparser", true); // (just stops an annoying warning from appearing)
if (bLocalMode) { // local job tracker and FS mode
config.set("mapred.job.tracker", "local");
config.set("fs.default.name", "local");
} else {
if (bTestMode) { // run job tracker locally but FS mode remotely
config.set("mapred.job.tracker", "local");
} else { // normal job tracker
String trackerUrl = HadoopUtils.getXMLProperty(
props_custom.getHadoopConfigPath() + "/hadoop/mapred-site.xml", "mapred.job.tracker");
config.set("mapred.job.tracker", trackerUrl);
}
String fsUrl = HadoopUtils.getXMLProperty(
props_custom.getHadoopConfigPath() + "/hadoop/core-site.xml", "fs.default.name");
config.set("fs.default.name", fsUrl);
}
if (!dataModelLoaded && !(bTestMode || bLocalMode)) { // If running distributed and no data model loaded then add ourselves
Path jarToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/lib/",
"infinit.e.data_model.jar", config);
DistributedCache.addFileToClassPath(jarToCache, config);
jarToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/lib/",
"infinit.e.processing.custom.library.jar", config);
DistributedCache.addFileToClassPath(jarToCache, config);
} //TESTED
// Debug scripts (only if they exist), and only in non local/test mode
if (!bLocalMode && !bTestMode) {
try {
Path scriptToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/scripts/",
"custom_map_error_handler.sh", config);
config.set("mapred.map.task.debug.script", "custom_map_error_handler.sh " + job.jobtitle);
config.set("mapreduce.map.debug.script", "custom_map_error_handler.sh " + job.jobtitle);
DistributedCache.createSymlink(config);
DistributedCache.addCacheFile(scriptToCache.toUri(), config);
} catch (Exception e) {
} // just carry on
try {
Path scriptToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/scripts/",
"custom_reduce_error_handler.sh", config);
config.set("mapred.reduce.task.debug.script", "custom_reduce_error_handler.sh " + job.jobtitle);
config.set("mapreduce.reduce.debug.script", "custom_reduce_error_handler.sh " + job.jobtitle);
DistributedCache.createSymlink(config);
DistributedCache.addCacheFile(scriptToCache.toUri(), config);
} catch (Exception e) {
} // just carry on
} //TODO (???): TOTEST
// (need to do these 2 things here before the job is created, at which point the config class has been copied across)
//1)
Class<?> mapperClazz = Class.forName(job.mapper, true, child);
if (ICustomInfiniteInternalEngine.class.isAssignableFrom(mapperClazz)) { // Special case: internal custom engine, so gets an additional integration hook
ICustomInfiniteInternalEngine preActivities = (ICustomInfiniteInternalEngine) mapperClazz
.newInstance();
preActivities.preTaskActivities(job._id, job.communityIds, config, !(bTestMode || bLocalMode));
} //TESTED
//2)
if (job.inputCollection.equalsIgnoreCase("file.binary_shares")) {
// Need to download the GridFSZip file
try {
Path jarToCache = InfiniteHadoopUtils.cacheLocalFile("/opt/infinite-home/lib/unbundled/",
"GridFSZipFile.jar", config);
DistributedCache.addFileToClassPath(jarToCache, config);
} catch (Throwable t) {
} // (this is fine, will already be on the classpath .. otherwise lots of other stuff will be failing all over the place!)
}
if (job.inputCollection.equals("records")) {
InfiniteElasticsearchHadoopUtils.handleElasticsearchInput(job, config, advancedConfigurationDbo);
//(won't run under 0.19 so running with "records" should cause all sorts of exceptions)
} //TESTED (by hand)
if (bTestMode || bLocalMode) { // If running locally, turn "snappy" off - tomcat isn't pointing its native library path in the right place
config.set("mapred.map.output.compression.codec", "org.apache.hadoop.io.compress.DefaultCodec");
}
// Manually specified caches
List<URL> localJarCaches = InfiniteHadoopUtils.handleCacheList(advancedConfigurationDbo.get("$caches"),
job, config, props_custom);
Job hj = new Job(config); // (NOTE: from here, changes to config are ignored)
try {
if (null != localJarCaches) {
if (bLocalMode || bTestMode) {
Method method = URLClassLoader.class.getDeclaredMethod("addURL", new Class[] { URL.class });
method.setAccessible(true);
method.invoke(child, localJarCaches.toArray());
} //TOTEST (tested logically)
}
Class<?> classToLoad = Class.forName(job.mapper, true, child);
hj.setJarByClass(classToLoad);
if (job.inputCollection.equalsIgnoreCase("filesystem")) {
String inputPath = null;
try {
inputPath = MongoDbUtil.getProperty(advancedConfigurationDbo, "file.url");
if (!inputPath.endsWith("/")) {
inputPath = inputPath + "/";
}
} catch (Exception e) {
}
if (null == inputPath) {
throw new RuntimeException("Must specify 'file.url' if reading from filesystem.");
}
inputPath = InfiniteHadoopUtils.authenticateInputDirectory(job, inputPath);
InfiniteFileInputFormat.addInputPath(hj, new Path(inputPath + "*/*")); // (that extra bit makes it recursive)
InfiniteFileInputFormat.setMaxInputSplitSize(hj, 33554432); // (32MB)
InfiniteFileInputFormat.setInfiniteInputPathFilter(hj, config);
hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName(
"com.ikanow.infinit.e.data_model.custom.InfiniteFileInputFormat", true, child));
} else if (job.inputCollection.equalsIgnoreCase("file.binary_shares")) {
String[] oidStrs = null;
try {
String inputPath = MongoDbUtil.getProperty(advancedConfigurationDbo, "file.url");
Pattern oidExtractor = Pattern.compile("inf://share/([^/]+)");
Matcher m = oidExtractor.matcher(inputPath);
if (m.find()) {
oidStrs = m.group(1).split("\\s*,\\s*");
} else {
throw new RuntimeException(
"file.url must be in format inf://share/<oid-list>/<string>: " + inputPath);
}
InfiniteHadoopUtils.authenticateShareList(job, oidStrs);
} catch (Exception e) {
throw new RuntimeException(
"Authentication error: " + e.getMessage() + ": " + advancedConfigurationDbo, e);
}
hj.getConfiguration().setStrings("mapred.input.dir", oidStrs);
hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName(
"com.ikanow.infinit.e.data_model.custom.InfiniteShareInputFormat", true, child));
} else if (job.inputCollection.equals("records")) {
hj.setInputFormatClass((Class<? extends InputFormat>) Class
.forName("com.ikanow.infinit.e.data_model.custom.InfiniteEsInputFormat", true, child));
} else {
if (esMode) {
hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName(
"com.ikanow.infinit.e.processing.custom.utils.InfiniteElasticsearchMongoInputFormat",
true, child));
} else {
hj.setInputFormatClass((Class<? extends InputFormat>) Class.forName(
"com.ikanow.infinit.e.data_model.custom.InfiniteMongoInputFormat", true, child));
}
}
if ((null != job.exportToHdfs) && job.exportToHdfs) {
//TODO (INF-2469): Also, if the output key is BSON then also run as text (but output as JSON?)
Path outPath = InfiniteHadoopUtils.ensureOutputDirectory(job, props_custom);
if ((null != job.outputKey) && (null != job.outputValue)
&& job.outputKey.equalsIgnoreCase("org.apache.hadoop.io.text")
&& job.outputValue.equalsIgnoreCase("org.apache.hadoop.io.text")) {
// (slight hack before I sort out the horrendous job class - if key/val both text and exporting to HDFS then output as Text)
hj.setOutputFormatClass((Class<? extends OutputFormat>) Class
.forName("org.apache.hadoop.mapreduce.lib.output.TextOutputFormat", true, child));
TextOutputFormat.setOutputPath(hj, outPath);
} //TESTED
else {
hj.setOutputFormatClass((Class<? extends OutputFormat>) Class.forName(
"org.apache.hadoop.mapreduce.lib.output.SequenceFileOutputFormat", true, child));
SequenceFileOutputFormat.setOutputPath(hj, outPath);
} //TESTED
} else { // normal case, stays in MongoDB
hj.setOutputFormatClass((Class<? extends OutputFormat>) Class.forName(
"com.ikanow.infinit.e.data_model.custom.InfiniteMongoOutputFormat", true, child));
}
hj.setMapperClass((Class<? extends Mapper>) mapperClazz);
String mapperOutputKeyOverride = advancedConfigurationDbo.getString("$mapper_key_class", null);
if (null != mapperOutputKeyOverride) {
hj.setMapOutputKeyClass(Class.forName(mapperOutputKeyOverride));
} //TESTED
String mapperOutputValueOverride = advancedConfigurationDbo.getString("$mapper_value_class", null);
if (null != mapperOutputValueOverride) {
hj.setMapOutputValueClass(Class.forName(mapperOutputValueOverride));
} //TESTED
if ((null != job.reducer) && !job.reducer.startsWith("#") && !job.reducer.equalsIgnoreCase("null")
&& !job.reducer.equalsIgnoreCase("none")) {
hj.setReducerClass((Class<? extends Reducer>) Class.forName(job.reducer, true, child));
// Variable reducers:
if (null != job.query) {
try {
hj.setNumReduceTasks(advancedConfigurationDbo.getInt("$reducers", 1));
} catch (Exception e) {
try {
// (just check it's not a string that is a valid int)
hj.setNumReduceTasks(
Integer.parseInt(advancedConfigurationDbo.getString("$reducers", "1")));
} catch (Exception e2) {
}
}
} //TESTED
} else {
hj.setNumReduceTasks(0);
}
if ((null != job.combiner) && !job.combiner.startsWith("#")
&& !job.combiner.equalsIgnoreCase("null") && !job.combiner.equalsIgnoreCase("none")) {
hj.setCombinerClass((Class<? extends Reducer>) Class.forName(job.combiner, true, child));
}
hj.setOutputKeyClass(Class.forName(job.outputKey, true, child));
hj.setOutputValueClass(Class.forName(job.outputValue, true, child));
hj.setJobName(job.jobtitle);
currJobName = job.jobtitle;
} catch (Error e) { // (messing about with class loaders = lots of chances for errors!)
throw new RuntimeException(e.getMessage(), e);
}
if (bTestMode || bLocalMode) {
hj.submit();
currThreadId = null;
Logger.getRootLogger().addAppender(this);
currLocalJobId = hj.getJobID().toString();
currLocalJobErrs.setLength(0);
while (!hj.isComplete()) {
Thread.sleep(1000);
}
Logger.getRootLogger().removeAppender(this);
if (hj.isSuccessful()) {
if (this.currLocalJobErrs.length() > 0) {
return "local_done: " + this.currLocalJobErrs.toString();
} else {
return "local_done";
}
} else {
return "Error: " + this.currLocalJobErrs.toString();
}
} else {
hj.submit();
String jobId = hj.getJobID().toString();
return jobId;
}
} catch (Exception e) {
e.printStackTrace();
Thread.currentThread().setContextClassLoader(savedClassLoader);
return "Error: " + InfiniteHadoopUtils.createExceptionMessage(e);
} finally {
Thread.currentThread().setContextClassLoader(savedClassLoader);
}
}
From source file:com.inmobi.conduit.distcp.tools.TestDistCp.java
License:Apache License
public void testUniformSizeDistCp() throws Exception { try {//from www .j a va 2 s . c om clearState(); Path sourcePath = new Path(SOURCE_PATH).makeQualified(cluster.getFileSystem()); List<Path> sources = new ArrayList<Path>(); sources.add(sourcePath); Path targetPath = new Path(TARGET_PATH).makeQualified(cluster.getFileSystem()); DistCpOptions options = new DistCpOptions(sources, targetPath); options.setOutPutDirectory(counterOutputPath); options.setAtomicCommit(true); options.setBlocking(false); Job job = new DistCp(configuration, options).execute(); Path workDir = CopyOutputFormat.getWorkingDirectory(job); Path finalDir = CopyOutputFormat.getCommitDirectory(job); while (!job.isComplete()) { if (cluster.getFileSystem().exists(workDir)) { break; } } job.waitForCompletion(true); Assert.assertFalse(cluster.getFileSystem().exists(workDir)); Assert.assertTrue(cluster.getFileSystem().exists(finalDir)); Assert.assertFalse(cluster.getFileSystem() .exists(new Path(job.getConfiguration().get(DistCpConstants.CONF_LABEL_META_FOLDER)))); verifyResults(); } catch (Exception e) { LOG.error("Exception encountered", e); Assert.fail("Unexpected exception: " + e.getMessage()); } }
From source file:com.inmobi.conduit.distcp.tools.TestDistCp.java
License:Apache License
@Test public void testDynamicDistCp() throws Exception { try {//from www . java 2s .c om clearState(); Path sourcePath = new Path(SOURCE_PATH).makeQualified(cluster.getFileSystem()); List<Path> sources = new ArrayList<Path>(); sources.add(sourcePath); Path targetPath = new Path(TARGET_PATH).makeQualified(cluster.getFileSystem()); DistCpOptions options = new DistCpOptions(sources, targetPath); options.setCopyStrategy("dynamic"); options.setOutPutDirectory(counterOutputPath); options.setAtomicCommit(true); options.setAtomicWorkPath(new Path("/work")); options.setBlocking(false); Job job = new DistCp(configuration, options).execute(); Path workDir = CopyOutputFormat.getWorkingDirectory(job); Path finalDir = CopyOutputFormat.getCommitDirectory(job); while (!job.isComplete()) { if (cluster.getFileSystem().exists(workDir)) { break; } } job.waitForCompletion(true); Assert.assertFalse(cluster.getFileSystem().exists(workDir)); Assert.assertTrue(cluster.getFileSystem().exists(finalDir)); verifyResults(); } catch (Exception e) { LOG.error("Exception encountered", e); Assert.fail("Unexpected exception: " + e.getMessage()); } }
From source file:crunch.MaxTemperature.java
License:Apache License
@Override
public int run(String[] args) throws Exception {
if (args.length != 1) {
JobBuilder.printUsage(this, "<job ID>");
return -1;
}/*from www . jav a 2s.co m*/
String jobID = args[0];
// vv NewMissingTemperatureFields
Cluster cluster = new Cluster(getConf());
Job job = cluster.getJob(JobID.forName(jobID));
// ^^ NewMissingTemperatureFields
if (job == null) {
System.err.printf("No job with ID %s found.\n", jobID);
return -1;
}
if (!job.isComplete()) {
System.err.printf("Job %s is not complete.\n", jobID);
return -1;
}
// vv NewMissingTemperatureFields
Counters counters = job.getCounters();
long missing = counters.findCounter(MaxTemperatureWithCounters.Temperature.MISSING).getValue();
long total = counters.findCounter(TaskCounter.MAP_INPUT_RECORDS).getValue();
// ^^ NewMissingTemperatureFields
System.out.printf("Records with missing temperature fields: %.2f%%\n", 100.0 * missing / total);
return 0;
}
From source file:gobblin.compaction.mapreduce.MRCompactor.java
License:Apache License
@Override public void cancel() throws IOException { try {/*from w w w . j av a2 s .c o m*/ for (Map.Entry<Dataset, Job> entry : MRCompactor.RUNNING_MR_JOBS.entrySet()) { Job hadoopJob = entry.getValue(); if (!hadoopJob.isComplete()) { LOG.info(String.format("Killing hadoop job %s for dataset %s", hadoopJob.getJobID(), entry.getKey())); hadoopJob.killJob(); } } } finally { try { ExecutorsUtils.shutdownExecutorService(this.jobExecutor, Optional.of(LOG), 0, TimeUnit.NANOSECONDS); } finally { if (this.verifier.isPresent()) { this.verifier.get().closeNow(); } } } }
From source file:gobblin.compaction.mapreduce.MRCompactorJobRunner.java
License:Apache License
private void submitAndWait(Job job) throws ClassNotFoundException, IOException, InterruptedException { job.submit();// ww w.j a va 2 s .com MRCompactor.addRunningHadoopJob(this.dataset, job); LOG.info(String.format("MR job submitted for dataset %s, input %s, url: %s", this.dataset, getInputPaths(), job.getTrackingURL())); while (!job.isComplete()) { if (this.policy == Policy.ABORT_ASAP) { LOG.info(String.format("MR job for dataset %s, input %s killed due to input data incompleteness." + " Will try again later", this.dataset, getInputPaths())); job.killJob(); return; } Thread.sleep(MR_JOB_CHECK_COMPLETE_INTERVAL_MS); } if (!job.isSuccessful()) { throw new RuntimeException(String.format("MR job failed for topic %s, input %s, url: %s", this.dataset, getInputPaths(), job.getTrackingURL())); } }