Example usage for org.apache.hadoop.mapreduce Job addFileToClassPath

Introduction

In this page you can find the example usage for org.apache.hadoop.mapreduce Job addFileToClassPath.

Prototype

public void addFileToClassPath(Path file) throws IOException

Source Link

Document

Add an file path to the current set of classpath entries It adds the file to cache as well.

Usage

From source file:com.ikanow.aleph2.analytics.hadoop.services.BeJobLauncher.java

License:Open Source License

/** Cache the system and user classpaths
 * @param job/* w  ww  . j ava2  s.  c  om*/
 * @param context
 * @throws IOException 
 * @throws ExecutionException 
 * @throws InterruptedException 
 * @throws IllegalArgumentException 
 */
protected static void cacheJars(final Job job, final DataBucketBean bucket, final IAnalyticsContext context)
        throws IllegalArgumentException, InterruptedException, ExecutionException, IOException {
    final FileContext fc = context.getServiceContext().getStorageService()
            .getUnderlyingPlatformDriver(FileContext.class, Optional.empty()).get();
    final String rootPath = context.getServiceContext().getStorageService().getRootPath();

    // Aleph2 libraries: need to cache them
    context.getAnalyticsContextLibraries(Optional.empty()).stream().map(f -> new File(f))
            .map(f -> Tuples._2T(f, new Path(rootPath + "/" + f.getName()))).map(Lambdas.wrap_u(f_p -> {
                final FileStatus fs = Lambdas.get(() -> {
                    try {
                        return fc.getFileStatus(f_p._2());
                    } catch (Exception e) {
                        return null;
                    }
                });
                if (null == fs) { //cache doesn't exist
                    // Local version
                    Path srcPath = FileContext.getLocalFSFileContext()
                            .makeQualified(new Path(f_p._1().toString()));
                    fc.util().copy(srcPath, f_p._2());
                }
                return f_p._2();
            })).forEach(Lambdas.wrap_consumer_u(path -> job.addFileToClassPath(path)));
    ;

    // User libraries: this is slightly easier since one of the 2 keys
    // is the HDFS path (the other is the _id)
    context.getAnalyticsLibraries(Optional.of(bucket), bucket.analytic_thread().jobs()).get().entrySet()
            .stream().map(kv -> kv.getKey()).filter(path -> path.startsWith(rootPath))
            .forEach(Lambdas.wrap_consumer_u(path -> job.addFileToClassPath(new Path(path))));
    ;
}

From source file:com.ikanow.aleph2.analytics.r.services.BeJobLauncher.java

License:Apache License

/** Cache the system and user classpaths
 * @param job//from  ww  w  .  java 2s  .c  o  m
 * @param context
 * @throws IOException 
 * @throws ExecutionException 
 * @throws InterruptedException 
 * @throws IllegalArgumentException 
 */
protected static void cacheJars(final Job job, final DataBucketBean bucket, final IAnalyticsContext context)
        throws IllegalArgumentException, InterruptedException, ExecutionException, IOException {
    final FileContext fc = context.getServiceContext().getStorageService()
            .getUnderlyingPlatformDriver(FileContext.class, Optional.empty()).get();
    final String rootPath = context.getServiceContext().getStorageService().getRootPath();

    // Aleph2 libraries: need to cache them
    context.getAnalyticsContextLibraries(Optional.empty()).stream().map(f -> new File(f))
            .map(f -> Tuples._2T(f, new Path(rootPath + "/" + f.getName()))).map(Lambdas.wrap_u(f_p -> {
                final FileStatus fs = Lambdas.get(() -> {
                    //TODO (ALEPH-12): need to clear out the cache intermittently
                    try {
                        return fc.getFileStatus(f_p._2());
                    } catch (Exception e) {
                        return null;
                    }
                });
                if (null == fs) { //cache doesn't exist
                    // Local version
                    try (FSDataOutputStream outer = fc.create(f_p._2(), EnumSet.of(CreateFlag.CREATE), // ie should fail if the destination file already exists 
                            org.apache.hadoop.fs.Options.CreateOpts.createParent())) {
                        Files.copy(f_p._1(), outer.getWrappedStream());
                    } catch (FileAlreadyExistsException e) {//(carry on - the file is versioned so it can't be out of date)
                    }
                }
                return f_p._2();
            })).forEach(Lambdas.wrap_consumer_u(path -> job.addFileToClassPath(path)));
    ;

    // User libraries: this is slightly easier since one of the 2 keys
    // is the HDFS path (the other is the _id)
    context.getAnalyticsLibraries(Optional.of(bucket), bucket.analytic_thread().jobs()).get().entrySet()
            .stream().map(kv -> kv.getKey()).filter(path -> path.startsWith(rootPath))
            .forEach(Lambdas.wrap_consumer_u(path -> job.addFileToClassPath(new Path(path))));
    ;
}

From source file:com.msd.gin.halyard.tools.HalyardParallelExport.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    Options options = new Options();
    options.addOption(newOption("h", null, "Prints this help"));
    options.addOption(newOption("v", null, "Prints version"));
    options.addOption(newOption("s", "source_htable", "Source HBase table with Halyard RDF store"));
    options.addOption(newOption("q", "sparql_query",
            "SPARQL tuple or graph query with use of '" + PARALLEL_SPLIT_FUNCTION_URI + "' function"));
    options.addOption(newOption("t", "target_url",
            "file://<path>/<file_name>{0}.<ext> or hdfs://<path>/<file_name>{0}.<ext> or jdbc:<jdbc_connection>/<table_name>"));
    options.addOption(newOption("p", "property=value", "JDBC connection properties"));
    options.addOption(newOption("l", "driver_classpath", "JDBC driver classpath delimited by ':'"));
    options.addOption(newOption("c", "driver_class", "JDBC driver class name"));
    try {//from   w  w  w .j  ava  2  s .  c  o m
        CommandLine cmd = new PosixParser().parse(options, args);
        if (args.length == 0 || cmd.hasOption('h')) {
            printHelp(options);
            return -1;
        }
        if (cmd.hasOption('v')) {
            Properties p = new Properties();
            try (InputStream in = HalyardExport.class
                    .getResourceAsStream("/META-INF/maven/com.msd.gin.halyard/hbasesail/pom.properties")) {
                if (in != null)
                    p.load(in);
            }
            System.out.println("Halyard Parallel Export version " + p.getProperty("version", "unknown"));
            return 0;
        }
        if (!cmd.getArgList().isEmpty())
            throw new ExportException("Unknown arguments: " + cmd.getArgList().toString());
        for (char c : "sqt".toCharArray()) {
            if (!cmd.hasOption(c))
                throw new ExportException("Missing mandatory option: " + c);
        }
        for (char c : "sqtlc".toCharArray()) {
            String s[] = cmd.getOptionValues(c);
            if (s != null && s.length > 1)
                throw new ExportException("Multiple values for option: " + c);
        }
        String source = cmd.getOptionValue('s');
        String query = cmd.getOptionValue('q');
        if (!query.contains(PARALLEL_SPLIT_FUNCTION_NAME)) {
            throw new ExportException("Parallel export SPARQL query must contain '"
                    + PARALLEL_SPLIT_FUNCTION_URI + "' function.");
        }
        String target = cmd.getOptionValue('t');
        if ((target.startsWith("file:") || target.startsWith("hdfs:")) && !target.contains("{0}")) {
            throw new ExportException(
                    "Parallel export file target must contain '{0}' counter in the file path or name.");
        }
        getConf().set(SOURCE, source);
        getConf().set(QUERY, query);
        getConf().set(TARGET, target);
        String driver = cmd.getOptionValue('c');
        if (driver != null) {
            getConf().set(JDBC_DRIVER, driver);
        }
        String props[] = cmd.getOptionValues('p');
        if (props != null) {
            for (int i = 0; i < props.length; i++) {
                props[i] = Base64.encodeBase64String(props[i].getBytes(UTF8));
            }
            getConf().setStrings(JDBC_PROPERTIES, props);
        }
        TableMapReduceUtil.addDependencyJars(getConf(), HalyardExport.class, NTriplesUtil.class, Rio.class,
                AbstractRDFHandler.class, RDFFormat.class, RDFParser.class, HTable.class,
                HBaseConfiguration.class, AuthenticationProtos.class, Trace.class);
        HBaseConfiguration.addHbaseResources(getConf());
        Job job = Job.getInstance(getConf(), "HalyardParallelExport " + source + " -> " + target);
        String cp = cmd.getOptionValue('l');
        if (cp != null) {
            String jars[] = cp.split(":");
            for (int i = 0; i < jars.length; i++) {
                File f = new File(jars[i]);
                if (!f.isFile())
                    throw new ExportException("Invalid JDBC driver classpath element: " + jars[i]);
                job.addFileToClassPath(new Path(f.toURI()));
                jars[i] = f.getName();
            }
            job.getConfiguration().setStrings(JDBC_CLASSPATH, jars);
        }
        job.setJarByClass(HalyardParallelExport.class);
        job.setMaxMapAttempts(1);
        job.setMapperClass(ParallelExportMapper.class);
        job.setMapOutputKeyClass(NullWritable.class);
        job.setMapOutputValueClass(Void.class);
        job.setNumReduceTasks(0);
        job.setInputFormatClass(IndexedInputFormat.class);
        job.setOutputFormatClass(NullOutputFormat.class);
        TableMapReduceUtil.initCredentials(job);
        if (job.waitForCompletion(true)) {
            LOG.info("Parallel Export Completed..");
            return 0;
        }
        return -1;
    } catch (RuntimeException exp) {
        System.out.println(exp.getMessage());
        printHelp(options);
        throw exp;
    }

}

From source file:io.druid.indexer.JobHelper.java

License:Apache License

static void addJarToClassPath(File jarFile, Path distributedClassPath, Path intermediateClassPath,
        FileSystem fs, Job job) throws IOException {
    // Create distributed directory if it does not exist.
    // rename will always fail if destination does not exist.
    fs.mkdirs(distributedClassPath);//w w w .jav  a 2s  .  c om

    // Non-snapshot jar files are uploaded to the shared classpath.
    final Path hdfsPath = new Path(distributedClassPath, jarFile.getName());
    if (!fs.exists(hdfsPath)) {
        // Muliple jobs can try to upload the jar here,
        // to avoid them from overwriting files, first upload to intermediateClassPath and then rename to the distributedClasspath.
        final Path intermediateHdfsPath = new Path(intermediateClassPath, jarFile.getName());
        uploadJar(jarFile, intermediateHdfsPath, fs);
        IOException exception = null;
        try {
            log.info("Renaming jar to path[%s]", hdfsPath);
            fs.rename(intermediateHdfsPath, hdfsPath);
            if (!fs.exists(hdfsPath)) {
                throw new IOException(String.format("File does not exist even after moving from[%s] to [%s]",
                        intermediateHdfsPath, hdfsPath));
            }
        } catch (IOException e) {
            // rename failed, possibly due to race condition. check if some other job has uploaded the jar file.
            try {
                if (!fs.exists(hdfsPath)) {
                    log.error(e, "IOException while Renaming jar file");
                    exception = e;
                }
            } catch (IOException e1) {
                e.addSuppressed(e1);
                exception = e;
            }
        } finally {
            try {
                if (fs.exists(intermediateHdfsPath)) {
                    fs.delete(intermediateHdfsPath, false);
                }
            } catch (IOException e) {
                if (exception == null) {
                    exception = e;
                } else {
                    exception.addSuppressed(e);
                }
            }
            if (exception != null) {
                throw exception;
            }
        }
    }
    job.addFileToClassPath(hdfsPath);
}

From source file:io.druid.indexer.JobHelper.java

License:Apache License

static void addSnapshotJarToClassPath(File jarFile, Path intermediateClassPath, FileSystem fs, Job job)
        throws IOException {
    // Snapshot jars are uploaded to non shared intermediate directory.
    final Path hdfsPath = new Path(intermediateClassPath, jarFile.getName());

    // existing is used to prevent uploading file multiple times in same run.
    if (!existing.contains(hdfsPath)) {
        uploadJar(jarFile, hdfsPath, fs);
        existing.add(hdfsPath);//from  w  w  w  .java  2  s.  c om
    }
    job.addFileToClassPath(hdfsPath);
}

From source file:org.apache.druid.indexer.JobHelper.java

License:Apache License

static void addJarToClassPath(File jarFile, Path distributedClassPath, Path intermediateClassPath,
        FileSystem fs, Job job) throws IOException {
    // Create distributed directory if it does not exist.
    // rename will always fail if destination does not exist.
    fs.mkdirs(distributedClassPath);/* w w  w. j a v  a 2s  .  c  om*/

    // Non-snapshot jar files are uploaded to the shared classpath.
    final Path hdfsPath = new Path(distributedClassPath, jarFile.getName());
    if (shouldUploadOrReplace(jarFile, hdfsPath, fs)) {
        // Muliple jobs can try to upload the jar here,
        // to avoid them from overwriting files, first upload to intermediateClassPath and then rename to the distributedClasspath.
        final Path intermediateHdfsPath = new Path(intermediateClassPath, jarFile.getName());
        uploadJar(jarFile, intermediateHdfsPath, fs);
        IOException exception = null;
        try {
            log.info("Renaming jar to path[%s]", hdfsPath);
            fs.rename(intermediateHdfsPath, hdfsPath);
            if (!fs.exists(hdfsPath)) {
                throw new IOE("File does not exist even after moving from[%s] to [%s]", intermediateHdfsPath,
                        hdfsPath);
            }
        } catch (IOException e) {
            // rename failed, possibly due to race condition. check if some other job has uploaded the jar file.
            try {
                if (!fs.exists(hdfsPath)) {
                    log.error(e, "IOException while Renaming jar file");
                    exception = e;
                }
            } catch (IOException e1) {
                e.addSuppressed(e1);
                exception = e;
            }
        } finally {
            try {
                if (fs.exists(intermediateHdfsPath)) {
                    fs.delete(intermediateHdfsPath, false);
                }
            } catch (IOException e) {
                if (exception == null) {
                    exception = e;
                } else {
                    exception.addSuppressed(e);
                }
            }
            if (exception != null) {
                throw exception;
            }
        }
    }
    job.addFileToClassPath(hdfsPath);
}

From source file:org.apache.druid.indexer.JobHelper.java

License:Apache License

static void addSnapshotJarToClassPath(File jarFile, Path intermediateClassPath, FileSystem fs, Job job)
        throws IOException {
    // Snapshot jars are uploaded to non shared intermediate directory.
    final Path hdfsPath = new Path(intermediateClassPath, jarFile.getName());
    // Prevent uploading same file multiple times in same run.
    if (!fs.exists(hdfsPath)) {
        uploadJar(jarFile, hdfsPath, fs);
    }//from w ww.j  av  a2s.  co  m
    job.addFileToClassPath(hdfsPath);
}

From source file:org.apache.mahout.cf.taste.hbase.item.RecommenderJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    addInputOption();/*from ww w  .j  ava 2  s . c o  m*/
    addOutputOption();
    addOption("numRecommendations", "n", "Number of recommendations per user",
            String.valueOf(AggregateAndRecommendReducer.DEFAULT_NUM_RECOMMENDATIONS));
    addOption("usersFile", null, "File of users to recommend for", null);
    addOption("itemsFile", null, "File of items to recommend for", null);
    addOption("filterFile", "f",
            "File containing comma-separated userID,itemID pairs. Used to exclude the item from "
                    + "the recommendations for that user (optional)",
            null);
    addOption("userItemFile", "uif",
            "File containing comma-separated userID,itemID pairs (optional). "
                    + "Used to include only these items into recommendations. "
                    + "Cannot be used together with usersFile or itemsFile",
            null);
    addOption("booleanData", "b", "Treat input as without pref values", Boolean.FALSE.toString());
    addOption("maxPrefsPerUser", "mxp",
            "Maximum number of preferences considered per user in final recommendation phase",
            String.valueOf(UserVectorSplitterMapper.DEFAULT_MAX_PREFS_PER_USER_CONSIDERED));
    addOption("minPrefsPerUser", "mp",
            "ignore users with less preferences than this in the similarity computation " + "(default: "
                    + DEFAULT_MIN_PREFS_PER_USER + ')',
            String.valueOf(DEFAULT_MIN_PREFS_PER_USER));
    addOption("maxSimilaritiesPerItem", "m", "Maximum number of similarities considered per item ",
            String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ITEM));
    addOption("maxPrefsInItemSimilarity", "mpiis",
            "max number of preferences to consider per user or item in the "
                    + "item similarity computation phase, users or items with more preferences will be sampled down (default: "
                    + DEFAULT_MAX_PREFS + ')',
            String.valueOf(DEFAULT_MAX_PREFS));
    addOption("similarityClassname", "s", "Name of distributed similarity measures class to instantiate, "
            + "alternatively use one of the predefined similarities (" + VectorSimilarityMeasures.list() + ')',
            true);
    addOption("threshold", "tr", "discard item pairs with a similarity value below this", false);
    addOption("outputPathForSimilarityMatrix", "opfsm",
            "write the item similarity matrix to this path (optional)", false);
    addOption("randomSeed", null, "use this seed for sampling", false);
    addFlag("sequencefileOutput", null, "write the output into a SequenceFile instead of a text file");

    Map<String, List<String>> parsedArgs = parseArguments(args, true, true);
    if (parsedArgs == null) {
        return -1;
    }

    //Create column family recommendations
    HBaseClient hb = new HBaseClient(getConf());
    String workingTable = getConf().get(PARAM_WORKING_TABLE);
    String cfRecommendations = getConf().get(PARAM_CF_RECOMMENDATIONS);
    if (!hb.hasColumn(workingTable, cfRecommendations))
        hb.addColumn(workingTable, cfRecommendations);

    int numRecommendations = Integer.parseInt(getOption("numRecommendations"));
    String usersFile = getOption("usersFile");
    String itemsFile = getOption("itemsFile");
    String filterFile = getOption("filterFile");
    String userItemFile = getOption("userItemFile");
    boolean booleanData = Boolean.valueOf(getOption("booleanData"));
    int maxPrefsPerUser = Integer.parseInt(getOption("maxPrefsPerUser"));
    int minPrefsPerUser = Integer.parseInt(getOption("minPrefsPerUser"));
    int maxPrefsInItemSimilarity = Integer.parseInt(getOption("maxPrefsInItemSimilarity"));
    int maxSimilaritiesPerItem = Integer.parseInt(getOption("maxSimilaritiesPerItem"));
    String similarityClassname = getOption("similarityClassname");
    double threshold = hasOption("threshold") ? Double.parseDouble(getOption("threshold"))
            : RowSimilarityJob.NO_THRESHOLD;
    long randomSeed = hasOption("randomSeed") ? Long.parseLong(getOption("randomSeed"))
            : RowSimilarityJob.NO_FIXED_RANDOM_SEED;

    Path prepPath = getTempPath(DEFAULT_PREPARE_PATH);
    Path similarityMatrixPath = getTempPath("similarityMatrix");
    Path explicitFilterPath = getTempPath("explicitFilterPath");
    Path partialMultiplyPath = getTempPath("partialMultiply");

    AtomicInteger currentPhase = new AtomicInteger();

    int numberOfUsers = -1;

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        ToolRunner.run(getConf(), new PreparePreferenceMatrixJob(),
                new String[] { "--input", getInputPath().toString(), "--output", prepPath.toString(),
                        "--minPrefsPerUser", String.valueOf(minPrefsPerUser), "--booleanData",
                        String.valueOf(booleanData), "--tempDir", getTempPath().toString(), });

        numberOfUsers = HadoopUtil.readInt(new Path(prepPath, PreparePreferenceMatrixJob.NUM_USERS), getConf());
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {

        /* special behavior if phase 1 is skipped */
        if (numberOfUsers == -1) {
            numberOfUsers = (int) HadoopUtil.countRecords(
                    new Path(prepPath, PreparePreferenceMatrixJob.USER_VECTORS), PathType.LIST, null,
                    getConf());
        }

        //calculate the co-occurrence matrix
        ToolRunner.run(getConf(), new RowSimilarityJob(),
                new String[] { "--input",
                        new Path(prepPath, PreparePreferenceMatrixJob.RATING_MATRIX).toString(), "--output",
                        similarityMatrixPath.toString(), "--numberOfColumns", String.valueOf(numberOfUsers),
                        "--similarityClassname", similarityClassname, "--maxObservationsPerRow",
                        String.valueOf(maxPrefsInItemSimilarity), "--maxObservationsPerColumn",
                        String.valueOf(maxPrefsInItemSimilarity), "--maxSimilaritiesPerRow",
                        String.valueOf(maxSimilaritiesPerItem), "--excludeSelfSimilarity",
                        String.valueOf(Boolean.TRUE), "--threshold", String.valueOf(threshold), "--randomSeed",
                        String.valueOf(randomSeed), "--tempDir", getTempPath().toString(), });

        // write out the similarity matrix if the user specified that behavior
        if (hasOption("outputPathForSimilarityMatrix")) {
            Path outputPathForSimilarityMatrix = new Path(getOption("outputPathForSimilarityMatrix"));

            Job outputSimilarityMatrix = prepareJob(similarityMatrixPath, outputPathForSimilarityMatrix,
                    SequenceFileInputFormat.class, ItemSimilarityJob.MostSimilarItemPairsMapper.class,
                    EntityEntityWritable.class, DoubleWritable.class,
                    ItemSimilarityJob.MostSimilarItemPairsReducer.class, EntityEntityWritable.class,
                    DoubleWritable.class, TextOutputFormat.class);

            Configuration mostSimilarItemsConf = outputSimilarityMatrix.getConfiguration();
            mostSimilarItemsConf.set(ItemSimilarityJob.ITEM_ID_INDEX_PATH_STR,
                    new Path(prepPath, PreparePreferenceMatrixJob.ITEMID_INDEX).toString());
            mostSimilarItemsConf.setInt(ItemSimilarityJob.MAX_SIMILARITIES_PER_ITEM, maxSimilaritiesPerItem);
            outputSimilarityMatrix.waitForCompletion(true);
        }
    }

    //start the multiplication of the co-occurrence matrix by the user vectors
    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job partialMultiply = Job.getInstance(getConf(), "partialMultiply");
        Configuration partialMultiplyConf = partialMultiply.getConfiguration();

        MultipleInputs.addInputPath(partialMultiply, similarityMatrixPath, SequenceFileInputFormat.class,
                SimilarityMatrixRowWrapperMapper.class);
        MultipleInputs.addInputPath(partialMultiply,
                new Path(prepPath, PreparePreferenceMatrixJob.USER_VECTORS), SequenceFileInputFormat.class,
                UserVectorSplitterMapper.class);
        partialMultiply.setJarByClass(ToVectorAndPrefReducer.class);
        partialMultiply.setMapOutputKeyClass(VarIntWritable.class);
        partialMultiply.setMapOutputValueClass(VectorOrPrefWritable.class);
        partialMultiply.setReducerClass(ToVectorAndPrefReducer.class);
        partialMultiply.setOutputFormatClass(SequenceFileOutputFormat.class);
        partialMultiply.setOutputKeyClass(VarIntWritable.class);
        partialMultiply.setOutputValueClass(VectorAndPrefsWritable.class);
        partialMultiplyConf.setBoolean("mapreduce.compress.map.output", true);
        partialMultiplyConf.set("mapred.output.dir", partialMultiplyPath.toString());

        if (usersFile != null) {
            partialMultiplyConf.set(UserVectorSplitterMapper.USERS_FILE, usersFile);
        }

        if (userItemFile != null) {
            partialMultiplyConf.set(IDReader.USER_ITEM_FILE, userItemFile);
        }

        partialMultiplyConf.setInt(UserVectorSplitterMapper.MAX_PREFS_PER_USER_CONSIDERED, maxPrefsPerUser);

        boolean succeeded = partialMultiply.waitForCompletion(true);
        if (!succeeded) {
            return -1;
        }
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        //filter out any users we don't care about
        /* convert the user/item pairs to filter if a filterfile has been specified */
        if (filterFile != null) {
            Job itemFiltering = prepareJob(new Path(filterFile), explicitFilterPath, TextInputFormat.class,
                    ItemFilterMapper.class, VarLongWritable.class, VarLongWritable.class,
                    ItemFilterAsVectorAndPrefsReducer.class, VarIntWritable.class, VectorAndPrefsWritable.class,
                    SequenceFileOutputFormat.class);
            boolean succeeded = itemFiltering.waitForCompletion(true);
            if (!succeeded) {
                return -1;
            }
        }

        String aggregateAndRecommendInput = partialMultiplyPath.toString();
        if (filterFile != null) {
            aggregateAndRecommendInput += "," + explicitFilterPath;
        }

        //extract out the recommendations
        Configuration aggregateAndRecommendConf_hb = HBaseConfiguration.create(getConf());
        aggregateAndRecommendConf_hb.setBoolean("mapred.compress.map.output", true);

        Job aggregateAndRecommend_hb = Job.getInstance(aggregateAndRecommendConf_hb);
        aggregateAndRecommendConf_hb = aggregateAndRecommend_hb.getConfiguration();

        aggregateAndRecommend_hb.addFileToClassPath(new Path("lib/recommender.jar"));

        aggregateAndRecommend_hb.setJobName(HadoopUtil.getCustomJobName(getClass().getSimpleName(),
                aggregateAndRecommend_hb, PartialMultiplyMapper.class, AggregateAndRecommendReducer.class));
        aggregateAndRecommend_hb.setJarByClass(AggregateAndRecommendReducer.class); // class that contains mapper and reducer

        aggregateAndRecommend_hb.setInputFormatClass(SequenceFileInputFormat.class);
        aggregateAndRecommend_hb.setMapperClass(PartialMultiplyMapper.class);
        aggregateAndRecommend_hb.setMapOutputKeyClass(VarLongWritable.class);
        aggregateAndRecommend_hb.setMapOutputValueClass(PrefAndSimilarityColumnWritable.class);

        FileInputFormat.setInputPaths(aggregateAndRecommend_hb, new Path(aggregateAndRecommendInput));
        TableMapReduceUtil.initTableReducerJob(getConf().get(PARAM_WORKING_TABLE),
                AggregateAndRecommendReducer.class, aggregateAndRecommend_hb);

        aggregateAndRecommend_hb.setReducerClass(AggregateAndRecommendReducer.class);

        if (itemsFile != null) {
            aggregateAndRecommendConf_hb.set(AggregateAndRecommendReducer.ITEMS_FILE, itemsFile);
        }

        if (userItemFile != null) {
            aggregateAndRecommendConf_hb.set(IDReader.USER_ITEM_FILE, userItemFile);
        }

        if (filterFile != null) {
            setS3SafeCombinedInputPath(aggregateAndRecommend_hb, getTempPath(), partialMultiplyPath,
                    explicitFilterPath);
        }
        setIOSort(aggregateAndRecommend_hb);
        aggregateAndRecommendConf_hb.set(AggregateAndRecommendReducer.ITEMID_INDEX_PATH,
                new Path(prepPath, PreparePreferenceMatrixJob.ITEMID_INDEX).toString());

        aggregateAndRecommendConf_hb.setInt(AggregateAndRecommendReducer.NUM_RECOMMENDATIONS,
                numRecommendations);
        aggregateAndRecommendConf_hb.setBoolean(BOOLEAN_DATA, booleanData);

        if (!aggregateAndRecommend_hb.waitForCompletion(true)) {
            return -1;
        }
    }

    return 0;
}

From source file:org.apache.solr.hadoop.hack.MiniMRClientClusterFactory.java

License:Apache License

public static MiniMRClientCluster create(Class<?> caller, String identifier, int noOfNMs, Configuration conf,
        File testWorkDir) throws IOException {

    if (conf == null) {
        conf = new Configuration();
    }/*from   w w w . ja  v a  2s . c o  m*/

    FileSystem fs = FileSystem.get(conf);

    Path testRootDir = new Path(testWorkDir.getPath(), identifier + "-tmpDir").makeQualified(fs);
    Path appJar = new Path(testRootDir, "MRAppJar.jar");

    // Copy MRAppJar and make it private.
    Path appMasterJar = new Path(MiniMRYarnCluster.APPJAR);

    fs.copyFromLocalFile(appMasterJar, appJar);
    fs.setPermission(appJar, new FsPermission("744"));

    Job job = Job.getInstance(conf);

    job.addFileToClassPath(appJar);

    Path callerJar = new Path(JarFinder.getJar(caller));
    Path remoteCallerJar = new Path(testRootDir, callerJar.getName());
    fs.copyFromLocalFile(callerJar, remoteCallerJar);
    fs.setPermission(remoteCallerJar, new FsPermission("744"));
    job.addFileToClassPath(remoteCallerJar);

    MiniMRYarnCluster miniMRYarnCluster;
    try {
        miniMRYarnCluster = new MiniMRYarnCluster(identifier, noOfNMs, testWorkDir);
    } catch (Exception e) {
        throw new RuntimeException(e);
    }
    job.getConfiguration().set("minimrclientcluster.caller.name", identifier);
    job.getConfiguration().setInt("minimrclientcluster.nodemanagers.number", noOfNMs);
    miniMRYarnCluster.init(job.getConfiguration());
    miniMRYarnCluster.start();

    return new MiniMRYarnClusterAdapter(miniMRYarnCluster, testWorkDir);
}

From source file:org.janusgraph.hadoop.compat.h2.DistCacheConfigurer.java

License:Apache License

@Override
public void configure(Job job) throws IOException {

    Configuration conf = job.getConfiguration();
    FileSystem localFS = FileSystem.getLocal(conf);
    FileSystem jobFS = FileSystem.get(conf);

    for (Path p : getLocalPaths()) {
        Path stagedPath = uploadFileIfNecessary(localFS, p, jobFS);
        // Calling this method decompresses the archive and makes Hadoop
        // handle its class files individually.  This leads to crippling
        // overhead times (10+ seconds) even with the LocalJobRunner
        // courtesy of o.a.h.yarn.util.FSDownload.changePermissions
        // copying and changing the mode of each classfile copy file individually.
        //job.addArchiveToClassPath(p);
        // Just add the compressed archive instead:
        job.addFileToClassPath(stagedPath);
    }/*w ww .j  a v  a2  s.  c  o  m*/

    // We don't really need to set a map reduce job jar here,
    // but doing so suppresses a warning
    String mj = getMapredJar();
    if (null != mj)
        job.setJar(mj);
}