List of usage examples for org.apache.hadoop.mapreduce Job addFileToClassPath
public void addFileToClassPath(Path file) throws IOException
From source file:com.ikanow.aleph2.analytics.hadoop.services.BeJobLauncher.java
License:Open Source License
/** Cache the system and user classpaths * @param job/* w ww . j ava2 s. c om*/ * @param context * @throws IOException * @throws ExecutionException * @throws InterruptedException * @throws IllegalArgumentException */ protected static void cacheJars(final Job job, final DataBucketBean bucket, final IAnalyticsContext context) throws IllegalArgumentException, InterruptedException, ExecutionException, IOException { final FileContext fc = context.getServiceContext().getStorageService() .getUnderlyingPlatformDriver(FileContext.class, Optional.empty()).get(); final String rootPath = context.getServiceContext().getStorageService().getRootPath(); // Aleph2 libraries: need to cache them context.getAnalyticsContextLibraries(Optional.empty()).stream().map(f -> new File(f)) .map(f -> Tuples._2T(f, new Path(rootPath + "/" + f.getName()))).map(Lambdas.wrap_u(f_p -> { final FileStatus fs = Lambdas.get(() -> { try { return fc.getFileStatus(f_p._2()); } catch (Exception e) { return null; } }); if (null == fs) { //cache doesn't exist // Local version Path srcPath = FileContext.getLocalFSFileContext() .makeQualified(new Path(f_p._1().toString())); fc.util().copy(srcPath, f_p._2()); } return f_p._2(); })).forEach(Lambdas.wrap_consumer_u(path -> job.addFileToClassPath(path))); ; // User libraries: this is slightly easier since one of the 2 keys // is the HDFS path (the other is the _id) context.getAnalyticsLibraries(Optional.of(bucket), bucket.analytic_thread().jobs()).get().entrySet() .stream().map(kv -> kv.getKey()).filter(path -> path.startsWith(rootPath)) .forEach(Lambdas.wrap_consumer_u(path -> job.addFileToClassPath(new Path(path)))); ; }
From source file:com.ikanow.aleph2.analytics.r.services.BeJobLauncher.java
License:Apache License
/** Cache the system and user classpaths * @param job//from ww w . java 2s .c o m * @param context * @throws IOException * @throws ExecutionException * @throws InterruptedException * @throws IllegalArgumentException */ protected static void cacheJars(final Job job, final DataBucketBean bucket, final IAnalyticsContext context) throws IllegalArgumentException, InterruptedException, ExecutionException, IOException { final FileContext fc = context.getServiceContext().getStorageService() .getUnderlyingPlatformDriver(FileContext.class, Optional.empty()).get(); final String rootPath = context.getServiceContext().getStorageService().getRootPath(); // Aleph2 libraries: need to cache them context.getAnalyticsContextLibraries(Optional.empty()).stream().map(f -> new File(f)) .map(f -> Tuples._2T(f, new Path(rootPath + "/" + f.getName()))).map(Lambdas.wrap_u(f_p -> { final FileStatus fs = Lambdas.get(() -> { //TODO (ALEPH-12): need to clear out the cache intermittently try { return fc.getFileStatus(f_p._2()); } catch (Exception e) { return null; } }); if (null == fs) { //cache doesn't exist // Local version try (FSDataOutputStream outer = fc.create(f_p._2(), EnumSet.of(CreateFlag.CREATE), // ie should fail if the destination file already exists org.apache.hadoop.fs.Options.CreateOpts.createParent())) { Files.copy(f_p._1(), outer.getWrappedStream()); } catch (FileAlreadyExistsException e) {//(carry on - the file is versioned so it can't be out of date) } } return f_p._2(); })).forEach(Lambdas.wrap_consumer_u(path -> job.addFileToClassPath(path))); ; // User libraries: this is slightly easier since one of the 2 keys // is the HDFS path (the other is the _id) context.getAnalyticsLibraries(Optional.of(bucket), bucket.analytic_thread().jobs()).get().entrySet() .stream().map(kv -> kv.getKey()).filter(path -> path.startsWith(rootPath)) .forEach(Lambdas.wrap_consumer_u(path -> job.addFileToClassPath(new Path(path)))); ; }
From source file:com.msd.gin.halyard.tools.HalyardParallelExport.java
License:Apache License
@Override public int run(String[] args) throws Exception { Options options = new Options(); options.addOption(newOption("h", null, "Prints this help")); options.addOption(newOption("v", null, "Prints version")); options.addOption(newOption("s", "source_htable", "Source HBase table with Halyard RDF store")); options.addOption(newOption("q", "sparql_query", "SPARQL tuple or graph query with use of '" + PARALLEL_SPLIT_FUNCTION_URI + "' function")); options.addOption(newOption("t", "target_url", "file://<path>/<file_name>{0}.<ext> or hdfs://<path>/<file_name>{0}.<ext> or jdbc:<jdbc_connection>/<table_name>")); options.addOption(newOption("p", "property=value", "JDBC connection properties")); options.addOption(newOption("l", "driver_classpath", "JDBC driver classpath delimited by ':'")); options.addOption(newOption("c", "driver_class", "JDBC driver class name")); try {//from w w w .j ava 2 s . c o m CommandLine cmd = new PosixParser().parse(options, args); if (args.length == 0 || cmd.hasOption('h')) { printHelp(options); return -1; } if (cmd.hasOption('v')) { Properties p = new Properties(); try (InputStream in = HalyardExport.class .getResourceAsStream("/META-INF/maven/com.msd.gin.halyard/hbasesail/pom.properties")) { if (in != null) p.load(in); } System.out.println("Halyard Parallel Export version " + p.getProperty("version", "unknown")); return 0; } if (!cmd.getArgList().isEmpty()) throw new ExportException("Unknown arguments: " + cmd.getArgList().toString()); for (char c : "sqt".toCharArray()) { if (!cmd.hasOption(c)) throw new ExportException("Missing mandatory option: " + c); } for (char c : "sqtlc".toCharArray()) { String s[] = cmd.getOptionValues(c); if (s != null && s.length > 1) throw new ExportException("Multiple values for option: " + c); } String source = cmd.getOptionValue('s'); String query = cmd.getOptionValue('q'); if (!query.contains(PARALLEL_SPLIT_FUNCTION_NAME)) { throw new ExportException("Parallel export SPARQL query must contain '" + PARALLEL_SPLIT_FUNCTION_URI + "' function."); } String target = cmd.getOptionValue('t'); if ((target.startsWith("file:") || target.startsWith("hdfs:")) && !target.contains("{0}")) { throw new ExportException( "Parallel export file target must contain '{0}' counter in the file path or name."); } getConf().set(SOURCE, source); getConf().set(QUERY, query); getConf().set(TARGET, target); String driver = cmd.getOptionValue('c'); if (driver != null) { getConf().set(JDBC_DRIVER, driver); } String props[] = cmd.getOptionValues('p'); if (props != null) { for (int i = 0; i < props.length; i++) { props[i] = Base64.encodeBase64String(props[i].getBytes(UTF8)); } getConf().setStrings(JDBC_PROPERTIES, props); } TableMapReduceUtil.addDependencyJars(getConf(), HalyardExport.class, NTriplesUtil.class, Rio.class, AbstractRDFHandler.class, RDFFormat.class, RDFParser.class, HTable.class, HBaseConfiguration.class, AuthenticationProtos.class, Trace.class); HBaseConfiguration.addHbaseResources(getConf()); Job job = Job.getInstance(getConf(), "HalyardParallelExport " + source + " -> " + target); String cp = cmd.getOptionValue('l'); if (cp != null) { String jars[] = cp.split(":"); for (int i = 0; i < jars.length; i++) { File f = new File(jars[i]); if (!f.isFile()) throw new ExportException("Invalid JDBC driver classpath element: " + jars[i]); job.addFileToClassPath(new Path(f.toURI())); jars[i] = f.getName(); } job.getConfiguration().setStrings(JDBC_CLASSPATH, jars); } job.setJarByClass(HalyardParallelExport.class); job.setMaxMapAttempts(1); job.setMapperClass(ParallelExportMapper.class); job.setMapOutputKeyClass(NullWritable.class); job.setMapOutputValueClass(Void.class); job.setNumReduceTasks(0); job.setInputFormatClass(IndexedInputFormat.class); job.setOutputFormatClass(NullOutputFormat.class); TableMapReduceUtil.initCredentials(job); if (job.waitForCompletion(true)) { LOG.info("Parallel Export Completed.."); return 0; } return -1; } catch (RuntimeException exp) { System.out.println(exp.getMessage()); printHelp(options); throw exp; } }
From source file:io.druid.indexer.JobHelper.java
License:Apache License
static void addJarToClassPath(File jarFile, Path distributedClassPath, Path intermediateClassPath, FileSystem fs, Job job) throws IOException { // Create distributed directory if it does not exist. // rename will always fail if destination does not exist. fs.mkdirs(distributedClassPath);//w w w .jav a 2s . c om // Non-snapshot jar files are uploaded to the shared classpath. final Path hdfsPath = new Path(distributedClassPath, jarFile.getName()); if (!fs.exists(hdfsPath)) { // Muliple jobs can try to upload the jar here, // to avoid them from overwriting files, first upload to intermediateClassPath and then rename to the distributedClasspath. final Path intermediateHdfsPath = new Path(intermediateClassPath, jarFile.getName()); uploadJar(jarFile, intermediateHdfsPath, fs); IOException exception = null; try { log.info("Renaming jar to path[%s]", hdfsPath); fs.rename(intermediateHdfsPath, hdfsPath); if (!fs.exists(hdfsPath)) { throw new IOException(String.format("File does not exist even after moving from[%s] to [%s]", intermediateHdfsPath, hdfsPath)); } } catch (IOException e) { // rename failed, possibly due to race condition. check if some other job has uploaded the jar file. try { if (!fs.exists(hdfsPath)) { log.error(e, "IOException while Renaming jar file"); exception = e; } } catch (IOException e1) { e.addSuppressed(e1); exception = e; } } finally { try { if (fs.exists(intermediateHdfsPath)) { fs.delete(intermediateHdfsPath, false); } } catch (IOException e) { if (exception == null) { exception = e; } else { exception.addSuppressed(e); } } if (exception != null) { throw exception; } } } job.addFileToClassPath(hdfsPath); }
From source file:io.druid.indexer.JobHelper.java
License:Apache License
static void addSnapshotJarToClassPath(File jarFile, Path intermediateClassPath, FileSystem fs, Job job) throws IOException { // Snapshot jars are uploaded to non shared intermediate directory. final Path hdfsPath = new Path(intermediateClassPath, jarFile.getName()); // existing is used to prevent uploading file multiple times in same run. if (!existing.contains(hdfsPath)) { uploadJar(jarFile, hdfsPath, fs); existing.add(hdfsPath);//from w w w .java 2 s. c om } job.addFileToClassPath(hdfsPath); }
From source file:org.apache.druid.indexer.JobHelper.java
License:Apache License
static void addJarToClassPath(File jarFile, Path distributedClassPath, Path intermediateClassPath, FileSystem fs, Job job) throws IOException { // Create distributed directory if it does not exist. // rename will always fail if destination does not exist. fs.mkdirs(distributedClassPath);/* w w w. j a v a 2s . c om*/ // Non-snapshot jar files are uploaded to the shared classpath. final Path hdfsPath = new Path(distributedClassPath, jarFile.getName()); if (shouldUploadOrReplace(jarFile, hdfsPath, fs)) { // Muliple jobs can try to upload the jar here, // to avoid them from overwriting files, first upload to intermediateClassPath and then rename to the distributedClasspath. final Path intermediateHdfsPath = new Path(intermediateClassPath, jarFile.getName()); uploadJar(jarFile, intermediateHdfsPath, fs); IOException exception = null; try { log.info("Renaming jar to path[%s]", hdfsPath); fs.rename(intermediateHdfsPath, hdfsPath); if (!fs.exists(hdfsPath)) { throw new IOE("File does not exist even after moving from[%s] to [%s]", intermediateHdfsPath, hdfsPath); } } catch (IOException e) { // rename failed, possibly due to race condition. check if some other job has uploaded the jar file. try { if (!fs.exists(hdfsPath)) { log.error(e, "IOException while Renaming jar file"); exception = e; } } catch (IOException e1) { e.addSuppressed(e1); exception = e; } } finally { try { if (fs.exists(intermediateHdfsPath)) { fs.delete(intermediateHdfsPath, false); } } catch (IOException e) { if (exception == null) { exception = e; } else { exception.addSuppressed(e); } } if (exception != null) { throw exception; } } } job.addFileToClassPath(hdfsPath); }
From source file:org.apache.druid.indexer.JobHelper.java
License:Apache License
static void addSnapshotJarToClassPath(File jarFile, Path intermediateClassPath, FileSystem fs, Job job) throws IOException { // Snapshot jars are uploaded to non shared intermediate directory. final Path hdfsPath = new Path(intermediateClassPath, jarFile.getName()); // Prevent uploading same file multiple times in same run. if (!fs.exists(hdfsPath)) { uploadJar(jarFile, hdfsPath, fs); }//from w ww.j av a2s. co m job.addFileToClassPath(hdfsPath); }
From source file:org.apache.mahout.cf.taste.hbase.item.RecommenderJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();/*from ww w .j ava 2 s . c o m*/ addOutputOption(); addOption("numRecommendations", "n", "Number of recommendations per user", String.valueOf(AggregateAndRecommendReducer.DEFAULT_NUM_RECOMMENDATIONS)); addOption("usersFile", null, "File of users to recommend for", null); addOption("itemsFile", null, "File of items to recommend for", null); addOption("filterFile", "f", "File containing comma-separated userID,itemID pairs. Used to exclude the item from " + "the recommendations for that user (optional)", null); addOption("userItemFile", "uif", "File containing comma-separated userID,itemID pairs (optional). " + "Used to include only these items into recommendations. " + "Cannot be used together with usersFile or itemsFile", null); addOption("booleanData", "b", "Treat input as without pref values", Boolean.FALSE.toString()); addOption("maxPrefsPerUser", "mxp", "Maximum number of preferences considered per user in final recommendation phase", String.valueOf(UserVectorSplitterMapper.DEFAULT_MAX_PREFS_PER_USER_CONSIDERED)); addOption("minPrefsPerUser", "mp", "ignore users with less preferences than this in the similarity computation " + "(default: " + DEFAULT_MIN_PREFS_PER_USER + ')', String.valueOf(DEFAULT_MIN_PREFS_PER_USER)); addOption("maxSimilaritiesPerItem", "m", "Maximum number of similarities considered per item ", String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ITEM)); addOption("maxPrefsInItemSimilarity", "mpiis", "max number of preferences to consider per user or item in the " + "item similarity computation phase, users or items with more preferences will be sampled down (default: " + DEFAULT_MAX_PREFS + ')', String.valueOf(DEFAULT_MAX_PREFS)); addOption("similarityClassname", "s", "Name of distributed similarity measures class to instantiate, " + "alternatively use one of the predefined similarities (" + VectorSimilarityMeasures.list() + ')', true); addOption("threshold", "tr", "discard item pairs with a similarity value below this", false); addOption("outputPathForSimilarityMatrix", "opfsm", "write the item similarity matrix to this path (optional)", false); addOption("randomSeed", null, "use this seed for sampling", false); addFlag("sequencefileOutput", null, "write the output into a SequenceFile instead of a text file"); Map<String, List<String>> parsedArgs = parseArguments(args, true, true); if (parsedArgs == null) { return -1; } //Create column family recommendations HBaseClient hb = new HBaseClient(getConf()); String workingTable = getConf().get(PARAM_WORKING_TABLE); String cfRecommendations = getConf().get(PARAM_CF_RECOMMENDATIONS); if (!hb.hasColumn(workingTable, cfRecommendations)) hb.addColumn(workingTable, cfRecommendations); int numRecommendations = Integer.parseInt(getOption("numRecommendations")); String usersFile = getOption("usersFile"); String itemsFile = getOption("itemsFile"); String filterFile = getOption("filterFile"); String userItemFile = getOption("userItemFile"); boolean booleanData = Boolean.valueOf(getOption("booleanData")); int maxPrefsPerUser = Integer.parseInt(getOption("maxPrefsPerUser")); int minPrefsPerUser = Integer.parseInt(getOption("minPrefsPerUser")); int maxPrefsInItemSimilarity = Integer.parseInt(getOption("maxPrefsInItemSimilarity")); int maxSimilaritiesPerItem = Integer.parseInt(getOption("maxSimilaritiesPerItem")); String similarityClassname = getOption("similarityClassname"); double threshold = hasOption("threshold") ? Double.parseDouble(getOption("threshold")) : RowSimilarityJob.NO_THRESHOLD; long randomSeed = hasOption("randomSeed") ? Long.parseLong(getOption("randomSeed")) : RowSimilarityJob.NO_FIXED_RANDOM_SEED; Path prepPath = getTempPath(DEFAULT_PREPARE_PATH); Path similarityMatrixPath = getTempPath("similarityMatrix"); Path explicitFilterPath = getTempPath("explicitFilterPath"); Path partialMultiplyPath = getTempPath("partialMultiply"); AtomicInteger currentPhase = new AtomicInteger(); int numberOfUsers = -1; if (shouldRunNextPhase(parsedArgs, currentPhase)) { ToolRunner.run(getConf(), new PreparePreferenceMatrixJob(), new String[] { "--input", getInputPath().toString(), "--output", prepPath.toString(), "--minPrefsPerUser", String.valueOf(minPrefsPerUser), "--booleanData", String.valueOf(booleanData), "--tempDir", getTempPath().toString(), }); numberOfUsers = HadoopUtil.readInt(new Path(prepPath, PreparePreferenceMatrixJob.NUM_USERS), getConf()); } if (shouldRunNextPhase(parsedArgs, currentPhase)) { /* special behavior if phase 1 is skipped */ if (numberOfUsers == -1) { numberOfUsers = (int) HadoopUtil.countRecords( new Path(prepPath, PreparePreferenceMatrixJob.USER_VECTORS), PathType.LIST, null, getConf()); } //calculate the co-occurrence matrix ToolRunner.run(getConf(), new RowSimilarityJob(), new String[] { "--input", new Path(prepPath, PreparePreferenceMatrixJob.RATING_MATRIX).toString(), "--output", similarityMatrixPath.toString(), "--numberOfColumns", String.valueOf(numberOfUsers), "--similarityClassname", similarityClassname, "--maxObservationsPerRow", String.valueOf(maxPrefsInItemSimilarity), "--maxObservationsPerColumn", String.valueOf(maxPrefsInItemSimilarity), "--maxSimilaritiesPerRow", String.valueOf(maxSimilaritiesPerItem), "--excludeSelfSimilarity", String.valueOf(Boolean.TRUE), "--threshold", String.valueOf(threshold), "--randomSeed", String.valueOf(randomSeed), "--tempDir", getTempPath().toString(), }); // write out the similarity matrix if the user specified that behavior if (hasOption("outputPathForSimilarityMatrix")) { Path outputPathForSimilarityMatrix = new Path(getOption("outputPathForSimilarityMatrix")); Job outputSimilarityMatrix = prepareJob(similarityMatrixPath, outputPathForSimilarityMatrix, SequenceFileInputFormat.class, ItemSimilarityJob.MostSimilarItemPairsMapper.class, EntityEntityWritable.class, DoubleWritable.class, ItemSimilarityJob.MostSimilarItemPairsReducer.class, EntityEntityWritable.class, DoubleWritable.class, TextOutputFormat.class); Configuration mostSimilarItemsConf = outputSimilarityMatrix.getConfiguration(); mostSimilarItemsConf.set(ItemSimilarityJob.ITEM_ID_INDEX_PATH_STR, new Path(prepPath, PreparePreferenceMatrixJob.ITEMID_INDEX).toString()); mostSimilarItemsConf.setInt(ItemSimilarityJob.MAX_SIMILARITIES_PER_ITEM, maxSimilaritiesPerItem); outputSimilarityMatrix.waitForCompletion(true); } } //start the multiplication of the co-occurrence matrix by the user vectors if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job partialMultiply = Job.getInstance(getConf(), "partialMultiply"); Configuration partialMultiplyConf = partialMultiply.getConfiguration(); MultipleInputs.addInputPath(partialMultiply, similarityMatrixPath, SequenceFileInputFormat.class, SimilarityMatrixRowWrapperMapper.class); MultipleInputs.addInputPath(partialMultiply, new Path(prepPath, PreparePreferenceMatrixJob.USER_VECTORS), SequenceFileInputFormat.class, UserVectorSplitterMapper.class); partialMultiply.setJarByClass(ToVectorAndPrefReducer.class); partialMultiply.setMapOutputKeyClass(VarIntWritable.class); partialMultiply.setMapOutputValueClass(VectorOrPrefWritable.class); partialMultiply.setReducerClass(ToVectorAndPrefReducer.class); partialMultiply.setOutputFormatClass(SequenceFileOutputFormat.class); partialMultiply.setOutputKeyClass(VarIntWritable.class); partialMultiply.setOutputValueClass(VectorAndPrefsWritable.class); partialMultiplyConf.setBoolean("mapreduce.compress.map.output", true); partialMultiplyConf.set("mapred.output.dir", partialMultiplyPath.toString()); if (usersFile != null) { partialMultiplyConf.set(UserVectorSplitterMapper.USERS_FILE, usersFile); } if (userItemFile != null) { partialMultiplyConf.set(IDReader.USER_ITEM_FILE, userItemFile); } partialMultiplyConf.setInt(UserVectorSplitterMapper.MAX_PREFS_PER_USER_CONSIDERED, maxPrefsPerUser); boolean succeeded = partialMultiply.waitForCompletion(true); if (!succeeded) { return -1; } } if (shouldRunNextPhase(parsedArgs, currentPhase)) { //filter out any users we don't care about /* convert the user/item pairs to filter if a filterfile has been specified */ if (filterFile != null) { Job itemFiltering = prepareJob(new Path(filterFile), explicitFilterPath, TextInputFormat.class, ItemFilterMapper.class, VarLongWritable.class, VarLongWritable.class, ItemFilterAsVectorAndPrefsReducer.class, VarIntWritable.class, VectorAndPrefsWritable.class, SequenceFileOutputFormat.class); boolean succeeded = itemFiltering.waitForCompletion(true); if (!succeeded) { return -1; } } String aggregateAndRecommendInput = partialMultiplyPath.toString(); if (filterFile != null) { aggregateAndRecommendInput += "," + explicitFilterPath; } //extract out the recommendations Configuration aggregateAndRecommendConf_hb = HBaseConfiguration.create(getConf()); aggregateAndRecommendConf_hb.setBoolean("mapred.compress.map.output", true); Job aggregateAndRecommend_hb = Job.getInstance(aggregateAndRecommendConf_hb); aggregateAndRecommendConf_hb = aggregateAndRecommend_hb.getConfiguration(); aggregateAndRecommend_hb.addFileToClassPath(new Path("lib/recommender.jar")); aggregateAndRecommend_hb.setJobName(HadoopUtil.getCustomJobName(getClass().getSimpleName(), aggregateAndRecommend_hb, PartialMultiplyMapper.class, AggregateAndRecommendReducer.class)); aggregateAndRecommend_hb.setJarByClass(AggregateAndRecommendReducer.class); // class that contains mapper and reducer aggregateAndRecommend_hb.setInputFormatClass(SequenceFileInputFormat.class); aggregateAndRecommend_hb.setMapperClass(PartialMultiplyMapper.class); aggregateAndRecommend_hb.setMapOutputKeyClass(VarLongWritable.class); aggregateAndRecommend_hb.setMapOutputValueClass(PrefAndSimilarityColumnWritable.class); FileInputFormat.setInputPaths(aggregateAndRecommend_hb, new Path(aggregateAndRecommendInput)); TableMapReduceUtil.initTableReducerJob(getConf().get(PARAM_WORKING_TABLE), AggregateAndRecommendReducer.class, aggregateAndRecommend_hb); aggregateAndRecommend_hb.setReducerClass(AggregateAndRecommendReducer.class); if (itemsFile != null) { aggregateAndRecommendConf_hb.set(AggregateAndRecommendReducer.ITEMS_FILE, itemsFile); } if (userItemFile != null) { aggregateAndRecommendConf_hb.set(IDReader.USER_ITEM_FILE, userItemFile); } if (filterFile != null) { setS3SafeCombinedInputPath(aggregateAndRecommend_hb, getTempPath(), partialMultiplyPath, explicitFilterPath); } setIOSort(aggregateAndRecommend_hb); aggregateAndRecommendConf_hb.set(AggregateAndRecommendReducer.ITEMID_INDEX_PATH, new Path(prepPath, PreparePreferenceMatrixJob.ITEMID_INDEX).toString()); aggregateAndRecommendConf_hb.setInt(AggregateAndRecommendReducer.NUM_RECOMMENDATIONS, numRecommendations); aggregateAndRecommendConf_hb.setBoolean(BOOLEAN_DATA, booleanData); if (!aggregateAndRecommend_hb.waitForCompletion(true)) { return -1; } } return 0; }
From source file:org.apache.solr.hadoop.hack.MiniMRClientClusterFactory.java
License:Apache License
public static MiniMRClientCluster create(Class<?> caller, String identifier, int noOfNMs, Configuration conf, File testWorkDir) throws IOException { if (conf == null) { conf = new Configuration(); }/*from w w w . ja v a 2s . c o m*/ FileSystem fs = FileSystem.get(conf); Path testRootDir = new Path(testWorkDir.getPath(), identifier + "-tmpDir").makeQualified(fs); Path appJar = new Path(testRootDir, "MRAppJar.jar"); // Copy MRAppJar and make it private. Path appMasterJar = new Path(MiniMRYarnCluster.APPJAR); fs.copyFromLocalFile(appMasterJar, appJar); fs.setPermission(appJar, new FsPermission("744")); Job job = Job.getInstance(conf); job.addFileToClassPath(appJar); Path callerJar = new Path(JarFinder.getJar(caller)); Path remoteCallerJar = new Path(testRootDir, callerJar.getName()); fs.copyFromLocalFile(callerJar, remoteCallerJar); fs.setPermission(remoteCallerJar, new FsPermission("744")); job.addFileToClassPath(remoteCallerJar); MiniMRYarnCluster miniMRYarnCluster; try { miniMRYarnCluster = new MiniMRYarnCluster(identifier, noOfNMs, testWorkDir); } catch (Exception e) { throw new RuntimeException(e); } job.getConfiguration().set("minimrclientcluster.caller.name", identifier); job.getConfiguration().setInt("minimrclientcluster.nodemanagers.number", noOfNMs); miniMRYarnCluster.init(job.getConfiguration()); miniMRYarnCluster.start(); return new MiniMRYarnClusterAdapter(miniMRYarnCluster, testWorkDir); }
From source file:org.janusgraph.hadoop.compat.h2.DistCacheConfigurer.java
License:Apache License
@Override public void configure(Job job) throws IOException { Configuration conf = job.getConfiguration(); FileSystem localFS = FileSystem.getLocal(conf); FileSystem jobFS = FileSystem.get(conf); for (Path p : getLocalPaths()) { Path stagedPath = uploadFileIfNecessary(localFS, p, jobFS); // Calling this method decompresses the archive and makes Hadoop // handle its class files individually. This leads to crippling // overhead times (10+ seconds) even with the LocalJobRunner // courtesy of o.a.h.yarn.util.FSDownload.changePermissions // copying and changing the mode of each classfile copy file individually. //job.addArchiveToClassPath(p); // Just add the compressed archive instead: job.addFileToClassPath(stagedPath); }/*w ww .j a v a2 s. c o m*/ // We don't really need to set a map reduce job jar here, // but doing so suppresses a warning String mj = getMapredJar(); if (null != mj) job.setJar(mj); }