Example usage for org.apache.mahout.cf.taste.hadoop.preparation PreparePreferenceMatrixJob USER_VECTORS

List of usage examples for org.apache.mahout.cf.taste.hadoop.preparation PreparePreferenceMatrixJob USER_VECTORS

Introduction

In this page you can find the example usage for org.apache.mahout.cf.taste.hadoop.preparation PreparePreferenceMatrixJob USER_VECTORS.

Prototype

String USER_VECTORS

To view the source code for org.apache.mahout.cf.taste.hadoop.preparation PreparePreferenceMatrixJob USER_VECTORS.

Click Source Link

Usage

From source file:finderbots.recommenders.hadoop.RecommenderUpdateJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    options = new Options();
    CmdLineParser parser = new CmdLineParser(options);
    String s = options.toString();

    try {/*  w  w w .j a v  a 2 s  .com*/
        parser.parseArgument(args);
    } catch (CmdLineException e) {
        System.err.println(e.getMessage());
        parser.printUsage(System.err);
        return -1;
    }

    cleanOutputDirs(options);
    Path prefFilesRootDir = new Path(options.getOutputDir());
    FileSystem fs = prefFilesRootDir.getFileSystem(getConf());
    Path indexesPath = new Path(prefFilesRootDir, options.getIndexesDir());
    Path prefsPath = new Path(prefFilesRootDir, options.getPrefsDir());
    options.setPrefsDir(prefsPath.toString());

    // split into actions and store in subdirs
    // create an index/dictionary for users and items
    // this job cleans out the output dir first
    ActionSplitterJob aj = new ActionSplitterJob();
    ToolRunner.run(getConf(), aj,
            new String[] { "--input", options.getInputDir(), "--output", prefsPath.toString(), "--indexDir",
                    indexesPath.toString(), "--inputFilePattern", options.getFileNamePatternString(),
                    "--action1", options.getAction1(), "--action2", options.getAction2(), "--inputDelim",
                    options.getInputDelim(), "--outputDelim", options.getOutputDelim(), "--actionIDCol",
                    Integer.toString(options.getActionColumn()), "--itemIDCol",
                    Integer.toString(options.getItemIDColumn()), "--userIDCol",
                    Integer.toString(options.getUserIDColumn()), });

    // need to get the number of users and items from the splitter, which also creates indexes
    this.numberOfUsers = HadoopUtil.readInt(new Path(indexesPath, aj.getOptions().getNumUsersFile()),
            getConf());
    this.numberOfItems = HadoopUtil.readInt(new Path(indexesPath, aj.getOptions().getNumItemsFile()),
            getConf());
    // these are single value binary files written with
    // HadoopUtil.writeInt(this.numberOfUsers, getOutputPath(NUM_USERS), getConf());

    options.setInputDir(prefFilesRootDir.toString());

    String action1PrefsPath = new Path(new Path(options.getPrefsDir()), aj.getOptions().getAction1Dir())
            .toString();
    String action2PrefsPath = new Path(new Path(options.getPrefsDir()), aj.getOptions().getAction2Dir())
            .toString();

    //LOGGER.info("prefFilesRootDir.toString() = "+prefFilesRootDir.toString());
    //LOGGER.info("options.getPrefsDir() = "+options.getPrefsDir());
    //LOGGER.info("aj.getOptions().getAction1Dir() = "+aj.getOptions().getAction1Dir());
    //LOGGER.info("action1PrefsPath = "+action1PrefsPath.toString());
    //LOGGER.info("action2PrefsPath = "+action2PrefsPath.toString());
    ToolRunner.run(getConf(), new RecommenderJob(),
            new String[] { "--input", action1PrefsPath, "--output", options.getPrimaryRecsPath(),
                    "--similarityClassname", options.getSimilairtyType(),
                    //need the seqfile for the similarity matrix even if output to Solr, this job puts it in the temp dir.
                    "--tempDir", options.getPrimaryTempDir(), "--sequencefileOutput" });
    //Now move the similarity matrix to the p-recs/sims location rather than leaving is in the tmp dir
    //this will be written to Solr if specified in the options.

    if (options.getDoXRecommender()) {
        //note: similairty class is not used, cooccurrence only for now
        ToolRunner.run(getConf(), new XRecommenderJob(),
                new String[] { "--input", options.getAllActionsDir(), "--output",
                        options.getSecondaryOutputDir(), "--similarityClassname", "SIMILARITY_LOGLIKELIHOOD",
                        "--outputPathForSimilarityMatrix", options.getSecondarySimilarityMatrixPath(),
                        "--tempDir", options.getSecondaryTempDir(), "--numUsers",
                        Integer.toString(this.numberOfUsers), "--numItems",
                        Integer.toString(this.numberOfItems), "--primaryPrefs", action1PrefsPath,
                        "--secondaryPrefs", action2PrefsPath, });
    }

    Path bBSimilarityMatrixDRM = new Path(options.getPrimarySimilarityMatrixPath());
    Path bASimilarityMatrixDRM = new Path(options.getSecondarySimilarityMatrixPath());
    Path primaryActionDRM = new Path(new Path(options.getPrimaryTempDir(), RecommenderJob.DEFAULT_PREPARE_PATH),
            PreparePreferenceMatrixJob.USER_VECTORS);
    Path secondaryActionDRM = new Path(
            new Path(options.getSecondaryTempDir(), XRecommenderJob.DEFAULT_PREPARE_DIR),
            PrepareActionMatricesJob.USER_VECTORS_A);

    if (options.getDoXRecommender()) {
        //Next step is to take the history and similarity matrices, join them by id and write to solr docs
        LOGGER.info("\n===========\n\n\n" + "  About to call WriteToSolr with cross-recommendations:\n"
                + "    B matrix path: " + primaryActionDRM.toString() + "\n" + "    A matrix path: "
                + secondaryActionDRM.toString() + "\n" + "    [B'B] matrix path: "
                + bBSimilarityMatrixDRM.toString() + "\n" + "    [B'A] matrix path: "
                + bASimilarityMatrixDRM.toString() + "\n" + "    Output path: " + options.getOutputDir() + "\n"
                + "\n\n===========\n");
        ToolRunner.run(getConf(), new WriteToSolrJob(),
                new String[] { "--itemCrossSimilarityMatrixDir", bASimilarityMatrixDRM.toString(), "--indexDir",
                        indexesPath.toString(), "--itemSimilarityMatrixDir", bBSimilarityMatrixDRM.toString(),
                        "--usersPrimaryHistoryDir", primaryActionDRM.toString(), "--usersSecondaryHistoryDir",
                        secondaryActionDRM.toString(), "--output", options.getOutputDir(), });
    } else {
        LOGGER.info("\n===========\n\n\n" + "  About to call WriteToSolr with single actions recommendations:\n"
                + "    B matrix path: " + primaryActionDRM.toString() + "\n" + "    [B'B] matrix path: "
                + bBSimilarityMatrixDRM.toString() + "\n" + "    Output path: " + options.getOutputDir() + "\n"
                + "\n\n===========\n");
        ToolRunner.run(getConf(), new WriteToSolrJob(),
                new String[] { "--indexDir", indexesPath.toString(), "--itemSimilarityMatrixDir",
                        bBSimilarityMatrixDRM.toString(), "--usersPrimaryHistoryDir",
                        primaryActionDRM.toString(), "--output", options.getOutputDir(), });
    }

    /*
    ToolRunner.run(getConf(), new WriteToSolrJob(), new String[]{
    "--itemCrossSimilarityMatrixDir", "../out/s-recs/sims",
    "--indexDir", "../out/id-indexes",
    "--itemSimilarityMatrixDir", "../out/p-recs/sims",
    "--usersPrimaryHistoryDir", "../out/actions/p-action",
    "--usersSecondaryHistoryDir", "../out/actions/s-action",
    "--output", "../out",
    });
    */

    //move user history and similarity matrices
    //move stuff out of temp for now, may not need all these
    moveMatrices();

    return 0;
}

From source file:finderbots.recommenders.hadoop.RecommenderUpdateJob.java

License:Apache License

private void moveMatrices() throws IOException {
    //so it can output to Solr if options specify
    FileSystem fs = FileSystem.get(getConf());
    Path from = new Path(options.getPrimarySimilarityMatrixPath());
    Path to = new Path(options.getPrimaryOutputDir(), XRecommenderJob.SIMS_MATRIX_DIR);//steal the dir name from Xrec
    fs.rename(from, to);/*from w  w  w . j a  va 2  s  .  c  o  m*/
    //move the primary user action matrix to output
    from = new Path(new Path(options.getPrimaryTempDir(), RecommenderJob.DEFAULT_PREPARE_PATH),
            PreparePreferenceMatrixJob.USER_VECTORS);
    to = new Path(options.getOutputDir(), options.getPrimaryActionHistoryDir());
    fs.rename(from, to);
    //if it was created move the secondary user action matrix to output
    if (options.getDoXRecommender()) {
        from = new Path(new Path(options.getSecondaryTempDir(), XRecommenderJob.DEFAULT_PREPARE_DIR),
                PrepareActionMatricesJob.USER_VECTORS_A);
        to = new Path(options.getOutputDir(), options.getSecondaryActionHistoryDir());
        fs.rename(from, to);
    }
}