Example usage for org.apache.hadoop.fs Path makeQualified

List of usage examples for org.apache.hadoop.fs Path makeQualified

Introduction

In this page you can find the example usage for org.apache.hadoop.fs Path makeQualified.

Prototype

@Deprecated
public Path makeQualified(FileSystem fs) 

Source Link

Document

Returns a qualified path object for the FileSystem 's working directory.

Usage

From source file:it.crs4.seal.read_sort.ReadSort.java

License:Open Source License

public static Path getAnnotationPath(Configuration conf) throws IOException {
    String annotationName = conf.get(ReadSort.REF_ANN_PROP_NAME);
    if (annotationName == null)
        throw new RuntimeException("missing property " + REF_ANN_PROP_NAME);

    LOG.info("reading reference annotation from " + annotationName);

    Path annPath = new Path(annotationName);

    FileSystem srcFs;/*  ww  w  . ja  v  a  2s.co m*/
    if (conf.get("mapred.cache.archives") != null) {
        // we're using the distributed cache for the reference,
        // so it's on the local file system
        srcFs = FileSystem.getLocal(conf);
    } else
        srcFs = annPath.getFileSystem(conf);

    return annPath.makeQualified(srcFs);
}

From source file:it.crs4.seal.read_sort.ReadSortOptionParser.java

License:Open Source License

@Override
protected CommandLine parseOptions(Configuration conf, String[] args) throws IOException, ParseException {
    CommandLine line = super.parseOptions(conf, args);

    /********* distributed reference and annotations *********/
    if (line.hasOption(distReference.getOpt())) {
        // Distribute the reference archive, and create a // symlink "reference" to the directory
        Path optPath = new Path(line.getOptionValue(distReference.getOpt()));
        optPath = optPath.makeQualified(optPath.getFileSystem(conf));
        Path cachePath = new Path(optPath.toString() + "#reference");
        conf.set("mapred.cache.archives", cachePath.toString());
        conf.set("mapred.create.symlink", "yes");

        if (line.hasOption(ann.getOpt()))
            conf.set(ReadSort.REF_ANN_PROP_NAME, "reference/" + line.getOptionValue(ann.getOpt()));
        else//from   ww w.j  a  va  2  s  .  c o m
            throw new ParseException(
                    "You must specify the name of the annotation file within the distributed reference archive with -"
                            + ann.getOpt());
    } else if (line.hasOption(ann.getOpt())) {
        // direct access to the reference annotation
        conf.set(ReadSort.REF_ANN_PROP_NAME, line.getOptionValue(ann.getOpt()));
    } else
        throw new ParseException("You must provide the path the reference annotation file (<ref>.ann)");

    conf.set(ClusterUtils.NUM_RED_TASKS_PROPERTY, String.valueOf(getNReduceTasks()));
    return line;
}

From source file:it.crs4.seal.tsv_sort.TsvSort.java

License:Apache License

public int run(String[] args) throws Exception {
    LOG.info("starting");

    TsvSortOptionParser parser = new TsvSortOptionParser();
    parser.parse(getConf(), args);//from   w ww  .ja v  a  2  s  .  com

    LOG.info("Using " + parser.getNReduceTasks() + " reduce tasks");

    Job job = new Job(getConf());

    job.setJobName("TsvSort " + parser.getInputPaths().get(0));
    job.setJarByClass(TsvSort.class);
    job.setOutputKeyClass(Text.class);
    job.setOutputValueClass(Text.class);
    job.setInputFormatClass(TsvInputFormat.class);
    job.setOutputFormatClass(TextValueOutputFormat.class);
    job.setPartitionerClass(TotalOrderPartitioner.class);

    // output path
    FileOutputFormat.setOutputPath(job, parser.getOutputPath());

    FileSystem fs = parser.getOutputPath().getFileSystem(job.getConfiguration());
    /*
     *
     * Pick a random name for the partition file in the same directory as the
     * output path.  So, TsvSort /user/me/input /user/me/output
     * results in the partition file being placed in /user/me/_partition.lst.12340921387402174
     *
     * Why not place it directly in the input path?
     *
     *   We wouldn't be able to run two sorts on the same data at the same time.
     *   We've received complaints about this in the past, so it has been a
     *   limit in practice.
     *
     * Why not place it directly in the output path?
     *
     *   We'd have to create the output path before the output format did.
     *   For this to work we'd have to disable the FileOutputFormat's default check
     *   that verifies that the output directory doesn't exist.  This means that we'd
     *   need some other way to ensure that we're not writing to the same path where
     *   some other job wrote.
     */
    Path partitionFile;
    Random rnd = new Random();
    do {
        partitionFile = new Path(parser.getOutputPath().getParent(),
                String.format("_partition.lst.%012d", Math.abs(rnd.nextLong())));
    } while (fs.exists(partitionFile)); // this is still subject to a race condition between it and another instance of this program
    partitionFile = partitionFile.makeQualified(fs);
    LOG.info("partition file path: " + partitionFile);

    URI partitionUri = new URI(partitionFile.toString() + "#" + PARTITION_SYMLINK);
    LOG.debug("partitionUri for distributed cache: " + partitionUri);

    // input paths
    for (Path p : parser.getInputPaths())
        TsvInputFormat.addInputPath(job, p);

    LOG.info("sampling input");
    TextSampler.writePartitionFile(new TsvInputFormat(), job, partitionFile);
    LOG.info("created partitions");
    try {
        DistributedCache.addCacheFile(partitionUri, job.getConfiguration());
        DistributedCache.createSymlink(job.getConfiguration());

        int retcode = job.waitForCompletion(true) ? 0 : 1;
        LOG.info("done");
        return retcode;
    } finally {
        LOG.debug("deleting partition file " + partitionFile);
        fs.delete(partitionFile, false);
    }
}

From source file:ml.shifu.guagua.hadoop.io.GuaguaOptionsParser.java

License:Apache License

/**
 * Take input as a comma separated list of files and verifies if they exist. It defaults for file:/// if the files
 * specified do not have a scheme. it returns the paths uri converted defaulting to file:///. So an input of
 * /home/user/file1,/home/user/file2 would return file:///home/user/file1,file:///home/user/file2
 *//*from  www .jav  a 2  s .co  m*/
@SuppressWarnings("deprecation")
private String validateFiles(String files, Configuration conf) throws IOException {
    if (files == null)
        return null;
    String[] fileArr = files.split(FILE_SEPERATOR);
    String[] finalArr = new String[fileArr.length];
    for (int i = 0; i < fileArr.length; i++) {
        String tmp = fileArr[i];
        String finalPath;
        URI pathURI;
        try {
            pathURI = new URI(tmp);
        } catch (URISyntaxException e) {
            throw new IllegalArgumentException(e);
        }
        Path path = new Path(pathURI);
        FileSystem localFs = FileSystem.getLocal(conf);
        if (pathURI.getScheme() == null) {
            // default to the local file system
            // check if the file exists or not first
            if (!localFs.exists(path)) {
                throw new FileNotFoundException("File " + tmp + " does not exist.");
            }
            finalPath = path.makeQualified(localFs).toString();
        } else {
            // check if the file exists in this file system
            // we need to recreate this filesystem object to copy
            // these files to the file system jobtracker is running
            // on.
            FileSystem fs = path.getFileSystem(conf);
            if (!fs.exists(path)) {
                throw new FileNotFoundException("File " + tmp + " does not exist.");
            }
            finalPath = path.makeQualified(fs).toString();
        }
        finalArr[i] = finalPath;
    }
    return StringUtils.arrayToString(finalArr);
}

From source file:ml.shifu.guagua.mapreduce.GuaguaOptionsParser.java

License:Apache License

/**
 * Take input as a comma separated list of files and verifies if they exist. It defaults for file:/// if the files
 * specified do not have a scheme. it returns the paths uri converted defaulting to file:///. So an input of
 * /home/user/file1,/home/user/file2 would return file:///home/user/file1,file:///home/user/file2
 *///from w w  w  . ja va 2 s  . c o  m
private String validateFiles(String files, Configuration conf) throws IOException {
    if (files == null)
        return null;
    String[] fileArr = files.split(FILE_SEPERATOR);
    String[] finalArr = new String[fileArr.length];
    for (int i = 0; i < fileArr.length; i++) {
        String tmp = fileArr[i];
        String finalPath;
        URI pathURI;
        try {
            pathURI = new URI(tmp);
        } catch (URISyntaxException e) {
            throw new IllegalArgumentException(e);
        }
        Path path = new Path(pathURI.toString());
        FileSystem localFs = FileSystem.getLocal(conf);
        if (pathURI.getScheme() == null) {
            // default to the local file system
            // check if the file exists or not first
            if (!localFs.exists(path)) {
                throw new FileNotFoundException("File " + tmp + " does not exist.");
            }
            finalPath = path.makeQualified(localFs).toString();
        } else {
            // check if the file exists in this file system
            // we need to recreate this filesystem object to copy
            // these files to the file system jobtracker is running
            // on.
            FileSystem fs = path.getFileSystem(conf);
            if (!fs.exists(path)) {
                throw new FileNotFoundException("File " + tmp + " does not exist.");
            }
            finalPath = path.makeQualified(fs).toString();
        }
        finalArr[i] = finalPath;
    }
    return StringUtils.arrayToString(finalArr);
}

From source file:ml.shifu.guagua.yarn.GuaguaOptionsParser.java

License:Apache License

/**
 * Take input as a comma separated list of files and verifies if they exist. It defaults for file:/// if the files
 * specified do not have a scheme. it returns the paths uri converted defaulting to file:///. So an input of
 * /home/user/file1,/home/user/file2 would return file:///home/user/file1,file:///home/user/file2
 *//* ww w . j av  a 2 s .  c o  m*/
private String validateFiles(String files, Configuration conf) throws IOException {
    if (files == null)
        return null;
    String[] fileArr = files.split(FILE_SEPERATOR);
    String[] finalArr = new String[fileArr.length];
    for (int i = 0; i < fileArr.length; i++) {
        String tmp = fileArr[i];
        String finalPath;
        URI pathURI;
        try {
            pathURI = new URI(tmp);
        } catch (URISyntaxException e) {
            throw new IllegalArgumentException(e);
        }
        Path path = new Path(pathURI);
        FileSystem localFs = FileSystem.getLocal(conf);
        if (pathURI.getScheme() == null) {
            // default to the local file system
            // check if the file exists or not first
            if (!localFs.exists(path)) {
                throw new FileNotFoundException("File " + tmp + " does not exist.");
            }
            finalPath = path.makeQualified(localFs).toString();
        } else {
            // check if the file exists in this file system
            // we need to recreate this filesystem object to copy
            // these files to the file system jobtracker is running
            // on.
            FileSystem fs = path.getFileSystem(conf);
            if (!fs.exists(path)) {
                throw new FileNotFoundException("File " + tmp + " does not exist.");
            }
            finalPath = path.makeQualified(fs).toString();
        }
        finalArr[i] = finalPath;
    }
    return StringUtils.arrayToString(finalArr);
}

From source file:my.mahout.AbstractJob.java

License:Apache License

/**
 * necessary to make this job (having a combined input path) work on Amazon S3, hopefully this is
 * obsolete when MultipleInputs is available again
 *///www.  j  a va2s . c  o m
public static void setS3SafeCombinedInputPath(Job job, Path referencePath, Path inputPathOne, Path inputPathTwo)
        throws IOException {
    FileSystem fs = FileSystem.get(referencePath.toUri(), job.getConfiguration());
    FileInputFormat.setInputPaths(job, inputPathOne.makeQualified(fs), inputPathTwo.makeQualified(fs));
}

From source file:nl.gridline.zieook.inx.movielens.AggregateAndRecommendReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException {
    Configuration jobConf = context.getConfiguration();
    recommendationsPerUser = jobConf.getInt(NUM_RECOMMENDATIONS, DEFAULT_NUM_RECOMMENDATIONS);
    booleanData = jobConf.getBoolean(RecommenderJob.BOOLEAN_DATA, false);
    indexItemIDMap = TasteHadoopUtils.readItemIDIndexMap(jobConf.get(ITEMID_INDEX_PATH), jobConf);

    FSDataInputStream in = null;/*from   w  w  w  .j a  va2  s  .com*/
    try {
        String itemFilePathString = jobConf.get(ITEMS_FILE);
        if (itemFilePathString == null) {
            itemsToRecommendFor = null;
        } else {
            Path unqualifiedItemsFilePath = new Path(itemFilePathString);
            FileSystem fs = FileSystem.get(unqualifiedItemsFilePath.toUri(), jobConf);
            itemsToRecommendFor = new FastIDSet();
            Path itemsFilePath = unqualifiedItemsFilePath.makeQualified(fs);
            in = fs.open(itemsFilePath);
            for (String line : new FileLineIterable(in)) {
                itemsToRecommendFor.add(Long.parseLong(line));
            }
        }
    } finally {
        IOUtils.closeStream(in);
    }
}

From source file:nl.gridline.zieook.inx.movielens.UserVectorSplitterMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException {
    Configuration jobConf = context.getConfiguration();
    maxPrefsPerUserConsidered = jobConf.getInt(MAX_PREFS_PER_USER_CONSIDERED,
            DEFAULT_MAX_PREFS_PER_USER_CONSIDERED);
    String usersFilePathString = jobConf.get(USERS_FILE);
    if (usersFilePathString != null) {
        FSDataInputStream in = null;//  ww  w  .  ja v  a2 s  .c om
        try {
            Path unqualifiedUsersFilePath = new Path(usersFilePathString);
            FileSystem fs = FileSystem.get(unqualifiedUsersFilePath.toUri(), jobConf);
            usersToRecommendFor = new FastIDSet();
            Path usersFilePath = unqualifiedUsersFilePath.makeQualified(fs);
            in = fs.open(usersFilePath);
            for (String line : new FileLineIterable(in)) {
                usersToRecommendFor.add(Long.parseLong(line));
            }
        } finally {
            IOUtils.closeStream(in);
        }
    }
}

From source file:nl.gridline.zieook.runners.cf.RecommenderJobZieOok.java

License:Apache License

@Override
public int run(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
    addInputOption();/*  ww  w.ja v  a2s .  c om*/
    addOutputOption();
    addOption("numRecommendations", "n", "Number of recommendations per user",
            String.valueOf(AggregateAndRecommendReducer.DEFAULT_NUM_RECOMMENDATIONS));
    addOption("usersFile", "u", "File of users to recommend for", null);
    addOption("itemsFile", "i", "File of items to recommend for", null);
    addOption("filterFile", "f",
            "File containing comma-separated userID,itemID pairs. Used to exclude the item from "
                    + "the recommendations for that user (optional)",
            null);
    addOption("booleanData", "b", "Treat input as without pref values", Boolean.FALSE.toString());
    addOption("maxPrefsPerUser", "mp",
            "Maximum number of preferences considered per user in final recommendation phase",
            String.valueOf(UserVectorSplitterMapper.DEFAULT_MAX_PREFS_PER_USER_CONSIDERED));
    addOption("minPrefsPerUser", "mp",
            "ignore users with less preferences than this in the similarity computation " + "(default: "
                    + DEFAULT_MIN_PREFS_PER_USER + ')',
            String.valueOf(DEFAULT_MIN_PREFS_PER_USER));
    addOption("maxSimilaritiesPerItem", "m", "Maximum number of similarities considered per item ",
            String.valueOf(DEFAULT_MAX_SIMILARITIES_PER_ITEM));
    addOption("maxCooccurrencesPerItem", "mo",
            "try to cap the number of cooccurrences per item to this " + "number (default: "
                    + DEFAULT_MAX_COOCCURRENCES_PER_ITEM + ')',
            String.valueOf(DEFAULT_MAX_COOCCURRENCES_PER_ITEM));
    addOption("similarityClassname", "s",
            "Name of distributed similarity class to instantiate, alternatively use "
                    + "one of the predefined similarities (" + SimilarityType.listEnumNames() + ')',
            String.valueOf(SimilarityType.SIMILARITY_COOCCURRENCE));

    Map<String, String> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    Path inputPath = getInputPath();
    Path outputPath = getOutputPath();
    Path tempDirPath = new Path(parsedArgs.get("--tempDir"));
    int numRecommendations = Integer.parseInt(parsedArgs.get("--numRecommendations"));
    String usersFile = parsedArgs.get("--usersFile");
    String itemsFile = parsedArgs.get("--itemsFile");
    String filterFile = parsedArgs.get("--filterFile");
    boolean booleanData = Boolean.valueOf(parsedArgs.get("--booleanData"));
    int maxPrefsPerUser = Integer.parseInt(parsedArgs.get("--maxPrefsPerUser"));
    int minPrefsPerUser = Integer.parseInt(parsedArgs.get("--minPrefsPerUser"));
    int maxSimilaritiesPerItem = Integer.parseInt(parsedArgs.get("--maxSimilaritiesPerItem"));
    int maxCooccurrencesPerItem = Integer.parseInt(parsedArgs.get("--maxCooccurrencesPerItem"));
    String similarityClassname = parsedArgs.get("--similarityClassname");

    Path userVectorPath = new Path(tempDirPath, "userVectors");
    Path itemIDIndexPath = new Path(tempDirPath, "itemIDIndex");
    Path countUsersPath = new Path(tempDirPath, "countUsers");
    Path itemUserMatrixPath = new Path(tempDirPath, "itemUserMatrix");
    Path similarityMatrixPath = new Path(tempDirPath, "similarityMatrix");
    Path prePartialMultiplyPath1 = new Path(tempDirPath, "prePartialMultiply1");
    Path prePartialMultiplyPath2 = new Path(tempDirPath, "prePartialMultiply2");
    Path explicitFilterPath = new Path(tempDirPath, "explicitFilterPath");
    Path partialMultiplyPath = new Path(tempDirPath, "partialMultiply");

    AtomicInteger currentPhase = new AtomicInteger();

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job itemIDIndex = prepareJob(inputPath, itemIDIndexPath, TextInputFormat.class, ItemIDIndexMapper.class,
                VarIntWritable.class, VarLongWritable.class, ItemIDIndexReducer.class, VarIntWritable.class,
                VarLongWritable.class, SequenceFileOutputFormat.class);
        itemIDIndex.setCombinerClass(ItemIDIndexReducer.class);
        task.setCurrentJob(itemIDIndex).waitForCompletion(true);
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job toUserVector = prepareJob(inputPath, userVectorPath, TextInputFormat.class, ToItemPrefsMapper.class,
                VarLongWritable.class, booleanData ? VarLongWritable.class : EntityPrefWritable.class,
                ToUserVectorReducer.class, VarLongWritable.class, VectorWritable.class,
                SequenceFileOutputFormat.class);
        toUserVector.getConfiguration().setBoolean(BOOLEAN_DATA, booleanData);
        toUserVector.getConfiguration().setInt(ToUserVectorReducer.MIN_PREFERENCES_PER_USER, minPrefsPerUser);
        task.setCurrentJob(toUserVector).waitForCompletion(true);
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job countUsers = prepareJob(userVectorPath, countUsersPath, SequenceFileInputFormat.class,
                CountUsersMapper.class, CountUsersKeyWritable.class, VarLongWritable.class,
                CountUsersReducer.class, VarIntWritable.class, NullWritable.class, TextOutputFormat.class);
        countUsers.setPartitionerClass(CountUsersKeyWritable.CountUsersPartitioner.class);
        countUsers.setGroupingComparatorClass(CountUsersKeyWritable.CountUsersGroupComparator.class);
        task.setCurrentJob(countUsers).waitForCompletion(true);
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job maybePruneAndTransponse = prepareJob(userVectorPath, itemUserMatrixPath,
                SequenceFileInputFormat.class, MaybePruneRowsMapper.class, IntWritable.class,
                DistributedRowMatrix.MatrixEntryWritable.class, ToItemVectorsReducer.class, IntWritable.class,
                VectorWritable.class, SequenceFileOutputFormat.class);
        maybePruneAndTransponse.getConfiguration().setInt(MaybePruneRowsMapper.MAX_COOCCURRENCES,
                maxCooccurrencesPerItem);
        task.setCurrentJob(maybePruneAndTransponse).waitForCompletion(true);
    }

    int numberOfUsers = TasteHadoopUtils.readIntFromFile(getConf(), countUsersPath);

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        /*
         * Once DistributedRowMatrix uses the hadoop 0.20 API, we should refactor this call to something like
         * new DistributedRowMatrix(...).rowSimilarity(...)
         */
        try {
            ToolRunner.run(getConf(), new RowSimilarityZieOok(), new String[] { //
                    "--input", itemUserMatrixPath.toString(), //
                    "--output", similarityMatrixPath.toString(), //
                    "--numberOfColumns", String.valueOf(numberOfUsers), //
                    "--similarityClassname", similarityClassname, //
                    "--maxSimilaritiesPerRow", String.valueOf(maxSimilaritiesPerItem + 1), //
                    "--tempDir", tempDirPath.toString() });
        } catch (Exception e) {
            throw new IllegalStateException("item-item-similarity computation failed", e);
        }
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job prePartialMultiply1 = prepareJob(similarityMatrixPath, prePartialMultiplyPath1,
                SequenceFileInputFormat.class, SimilarityMatrixRowWrapperMapper.class, VarIntWritable.class,
                VectorOrPrefWritable.class, Reducer.class, VarIntWritable.class, VectorOrPrefWritable.class,
                SequenceFileOutputFormat.class);
        task.setCurrentJob(prePartialMultiply1).waitForCompletion(true);

        Job prePartialMultiply2 = prepareJob(userVectorPath, prePartialMultiplyPath2,
                SequenceFileInputFormat.class, UserVectorSplitterMapper.class, VarIntWritable.class,
                VectorOrPrefWritable.class, Reducer.class, VarIntWritable.class, VectorOrPrefWritable.class,
                SequenceFileOutputFormat.class);
        if (usersFile != null) {
            prePartialMultiply2.getConfiguration().set(UserVectorSplitterMapper.USERS_FILE, usersFile);
        }
        prePartialMultiply2.getConfiguration().setInt(UserVectorSplitterMapper.MAX_PREFS_PER_USER_CONSIDERED,
                maxPrefsPerUser);
        task.setCurrentJob(prePartialMultiply2).waitForCompletion(true);

        Job partialMultiply = prepareJob(new Path(prePartialMultiplyPath1 + "," + prePartialMultiplyPath2),
                partialMultiplyPath, SequenceFileInputFormat.class, Mapper.class, VarIntWritable.class,
                VectorOrPrefWritable.class, ToVectorAndPrefReducer.class, VarIntWritable.class,
                VectorAndPrefsWritable.class, SequenceFileOutputFormat.class);

        /* necessary to make this job (having a combined input path) work on Amazon S3 */
        Configuration partialMultiplyConf = partialMultiply.getConfiguration();
        FileSystem fs = FileSystem.get(tempDirPath.toUri(), partialMultiplyConf);
        prePartialMultiplyPath1 = prePartialMultiplyPath1.makeQualified(fs);
        prePartialMultiplyPath2 = prePartialMultiplyPath2.makeQualified(fs);
        FileInputFormat.setInputPaths(partialMultiply, prePartialMultiplyPath1, prePartialMultiplyPath2);
        task.setCurrentJob(partialMultiply).waitForCompletion(true);
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {

        /* convert the user/item pairs to filter if a filterfile has been specified */
        if (filterFile != null) {
            Job itemFiltering = prepareJob(new Path(filterFile), explicitFilterPath, TextInputFormat.class,
                    ItemFilterMapper.class, VarLongWritable.class, VarLongWritable.class,
                    ItemFilterAsVectorAndPrefsReducer.class, VarIntWritable.class, VectorAndPrefsWritable.class,
                    SequenceFileOutputFormat.class);
            task.setCurrentJob(itemFiltering).waitForCompletion(true);
        }

        String aggregateAndRecommendInput = partialMultiplyPath.toString();
        if (filterFile != null) {
            aggregateAndRecommendInput += "," + explicitFilterPath;
        }

        Job aggregateAndRecommend = prepareJob(new Path(aggregateAndRecommendInput), outputPath,
                SequenceFileInputFormat.class, PartialMultiplyMapper.class, VarLongWritable.class,
                PrefAndSimilarityColumnWritable.class, AggregateAndRecommendReducer.class,
                VarLongWritable.class, RecommendedItemsWritable.class, SequenceFileOutputFormat.class);
        Configuration aggregateAndRecommendConf = aggregateAndRecommend.getConfiguration();
        if (itemsFile != null) {
            aggregateAndRecommendConf.set(AggregateAndRecommendReducer.ITEMS_FILE, itemsFile);
        }

        if (filterFile != null) {
            /* necessary to make this job (having a combined input path) work on Amazon S3 */
            FileSystem fs = FileSystem.get(tempDirPath.toUri(), aggregateAndRecommendConf);
            partialMultiplyPath = partialMultiplyPath.makeQualified(fs);
            explicitFilterPath = explicitFilterPath.makeQualified(fs);
            FileInputFormat.setInputPaths(aggregateAndRecommend, partialMultiplyPath, explicitFilterPath);
        }
        setIOSort(aggregateAndRecommend);
        aggregateAndRecommendConf.set(AggregateAndRecommendReducer.ITEMID_INDEX_PATH,
                itemIDIndexPath.toString());
        aggregateAndRecommendConf.setInt(AggregateAndRecommendReducer.NUM_RECOMMENDATIONS, numRecommendations);
        aggregateAndRecommendConf.setBoolean(BOOLEAN_DATA, booleanData);
        task.setCurrentJob(aggregateAndRecommend).waitForCompletion(true);
    }

    return 0;
}