Example usage for org.apache.mahout.cf.taste.hadoop.item RecommenderJob RecommenderJob

List of usage examples for org.apache.mahout.cf.taste.hadoop.item RecommenderJob RecommenderJob

Introduction

In this page you can find the example usage for org.apache.mahout.cf.taste.hadoop.item RecommenderJob RecommenderJob.

Prototype

RecommenderJob

Source Link

Usage

From source file:finderbots.recommenders.hadoop.RecommenderUpdateJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    options = new Options();
    CmdLineParser parser = new CmdLineParser(options);
    String s = options.toString();

    try {/* w  w w .j  a v  a  2  s .c  om*/
        parser.parseArgument(args);
    } catch (CmdLineException e) {
        System.err.println(e.getMessage());
        parser.printUsage(System.err);
        return -1;
    }

    cleanOutputDirs(options);
    Path prefFilesRootDir = new Path(options.getOutputDir());
    FileSystem fs = prefFilesRootDir.getFileSystem(getConf());
    Path indexesPath = new Path(prefFilesRootDir, options.getIndexesDir());
    Path prefsPath = new Path(prefFilesRootDir, options.getPrefsDir());
    options.setPrefsDir(prefsPath.toString());

    // split into actions and store in subdirs
    // create an index/dictionary for users and items
    // this job cleans out the output dir first
    ActionSplitterJob aj = new ActionSplitterJob();
    ToolRunner.run(getConf(), aj,
            new String[] { "--input", options.getInputDir(), "--output", prefsPath.toString(), "--indexDir",
                    indexesPath.toString(), "--inputFilePattern", options.getFileNamePatternString(),
                    "--action1", options.getAction1(), "--action2", options.getAction2(), "--inputDelim",
                    options.getInputDelim(), "--outputDelim", options.getOutputDelim(), "--actionIDCol",
                    Integer.toString(options.getActionColumn()), "--itemIDCol",
                    Integer.toString(options.getItemIDColumn()), "--userIDCol",
                    Integer.toString(options.getUserIDColumn()), });

    // need to get the number of users and items from the splitter, which also creates indexes
    this.numberOfUsers = HadoopUtil.readInt(new Path(indexesPath, aj.getOptions().getNumUsersFile()),
            getConf());
    this.numberOfItems = HadoopUtil.readInt(new Path(indexesPath, aj.getOptions().getNumItemsFile()),
            getConf());
    // these are single value binary files written with
    // HadoopUtil.writeInt(this.numberOfUsers, getOutputPath(NUM_USERS), getConf());

    options.setInputDir(prefFilesRootDir.toString());

    String action1PrefsPath = new Path(new Path(options.getPrefsDir()), aj.getOptions().getAction1Dir())
            .toString();
    String action2PrefsPath = new Path(new Path(options.getPrefsDir()), aj.getOptions().getAction2Dir())
            .toString();

    //LOGGER.info("prefFilesRootDir.toString() = "+prefFilesRootDir.toString());
    //LOGGER.info("options.getPrefsDir() = "+options.getPrefsDir());
    //LOGGER.info("aj.getOptions().getAction1Dir() = "+aj.getOptions().getAction1Dir());
    //LOGGER.info("action1PrefsPath = "+action1PrefsPath.toString());
    //LOGGER.info("action2PrefsPath = "+action2PrefsPath.toString());
    ToolRunner.run(getConf(), new RecommenderJob(),
            new String[] { "--input", action1PrefsPath, "--output", options.getPrimaryRecsPath(),
                    "--similarityClassname", options.getSimilairtyType(),
                    //need the seqfile for the similarity matrix even if output to Solr, this job puts it in the temp dir.
                    "--tempDir", options.getPrimaryTempDir(), "--sequencefileOutput" });
    //Now move the similarity matrix to the p-recs/sims location rather than leaving is in the tmp dir
    //this will be written to Solr if specified in the options.

    if (options.getDoXRecommender()) {
        //note: similairty class is not used, cooccurrence only for now
        ToolRunner.run(getConf(), new XRecommenderJob(),
                new String[] { "--input", options.getAllActionsDir(), "--output",
                        options.getSecondaryOutputDir(), "--similarityClassname", "SIMILARITY_LOGLIKELIHOOD",
                        "--outputPathForSimilarityMatrix", options.getSecondarySimilarityMatrixPath(),
                        "--tempDir", options.getSecondaryTempDir(), "--numUsers",
                        Integer.toString(this.numberOfUsers), "--numItems",
                        Integer.toString(this.numberOfItems), "--primaryPrefs", action1PrefsPath,
                        "--secondaryPrefs", action2PrefsPath, });
    }

    Path bBSimilarityMatrixDRM = new Path(options.getPrimarySimilarityMatrixPath());
    Path bASimilarityMatrixDRM = new Path(options.getSecondarySimilarityMatrixPath());
    Path primaryActionDRM = new Path(new Path(options.getPrimaryTempDir(), RecommenderJob.DEFAULT_PREPARE_PATH),
            PreparePreferenceMatrixJob.USER_VECTORS);
    Path secondaryActionDRM = new Path(
            new Path(options.getSecondaryTempDir(), XRecommenderJob.DEFAULT_PREPARE_DIR),
            PrepareActionMatricesJob.USER_VECTORS_A);

    if (options.getDoXRecommender()) {
        //Next step is to take the history and similarity matrices, join them by id and write to solr docs
        LOGGER.info("\n===========\n\n\n" + "  About to call WriteToSolr with cross-recommendations:\n"
                + "    B matrix path: " + primaryActionDRM.toString() + "\n" + "    A matrix path: "
                + secondaryActionDRM.toString() + "\n" + "    [B'B] matrix path: "
                + bBSimilarityMatrixDRM.toString() + "\n" + "    [B'A] matrix path: "
                + bASimilarityMatrixDRM.toString() + "\n" + "    Output path: " + options.getOutputDir() + "\n"
                + "\n\n===========\n");
        ToolRunner.run(getConf(), new WriteToSolrJob(),
                new String[] { "--itemCrossSimilarityMatrixDir", bASimilarityMatrixDRM.toString(), "--indexDir",
                        indexesPath.toString(), "--itemSimilarityMatrixDir", bBSimilarityMatrixDRM.toString(),
                        "--usersPrimaryHistoryDir", primaryActionDRM.toString(), "--usersSecondaryHistoryDir",
                        secondaryActionDRM.toString(), "--output", options.getOutputDir(), });
    } else {
        LOGGER.info("\n===========\n\n\n" + "  About to call WriteToSolr with single actions recommendations:\n"
                + "    B matrix path: " + primaryActionDRM.toString() + "\n" + "    [B'B] matrix path: "
                + bBSimilarityMatrixDRM.toString() + "\n" + "    Output path: " + options.getOutputDir() + "\n"
                + "\n\n===========\n");
        ToolRunner.run(getConf(), new WriteToSolrJob(),
                new String[] { "--indexDir", indexesPath.toString(), "--itemSimilarityMatrixDir",
                        bBSimilarityMatrixDRM.toString(), "--usersPrimaryHistoryDir",
                        primaryActionDRM.toString(), "--output", options.getOutputDir(), });
    }

    /*
    ToolRunner.run(getConf(), new WriteToSolrJob(), new String[]{
    "--itemCrossSimilarityMatrixDir", "../out/s-recs/sims",
    "--indexDir", "../out/id-indexes",
    "--itemSimilarityMatrixDir", "../out/p-recs/sims",
    "--usersPrimaryHistoryDir", "../out/actions/p-action",
    "--usersSecondaryHistoryDir", "../out/actions/s-action",
    "--output", "../out",
    });
    */

    //move user history and similarity matrices
    //move stuff out of temp for now, may not need all these
    moveMatrices();

    return 0;
}

From source file:net.aprendizajengrande.gitrecommender.Recommend.java

License:Open Source License

public static void main(String[] args) throws Exception {

    if (args.length != 4) {
        System.err.println("Usage: <db dir> <hdfs folder for input> <hdfs folder for output> <output file>");
        System.exit(1);//www.j  av  a  2 s.  co  m
    }

    Configuration conf = new Configuration();

    File dbDir = new File(args[0]);
    DB db = new DB(dbDir);

    File outputFile = new File(args[3]);

    String inputName = args[1] + "/ratings";
    String outputName = args[2] + "/recos";

    Path input = new Path(inputName);
    Path output = new Path(outputName);
    Path actualOutput = new Path(outputName + "/part-r-00000");

    // populate ratings file
    FSDataOutputStream fsdos = input.getFileSystem(conf).create(input);
    PrintWriter pw = new PrintWriter(new OutputStreamWriter(fsdos));

    // compute affinity for files as % of commits that touch that file
    int[] authorCommitCounts = db.commitsPerAuthor();
    Map<Integer, Integer> counts[] = db.counts();

    for (int author = 0; author < authorCommitCounts.length; author++) {
        for (Map.Entry<Integer, Integer> c : counts[author].entrySet()) {
            pw.println(author + "\t" + c.getKey() + "\t"
                    + ((c.getValue() / (authorCommitCounts[author] * 1.0)) * 10000.0) + "\t"
                    + c.getValue().intValue());
        }
    }
    pw.close();

    // compute recommendation in Hadoop
    ToolRunner.run(new Configuration(), new RecommenderJob(), new String[] { "--input", inputName, "--output",
            outputName, "--similarityClassname", "SIMILARITY_COSINE" });

    // read recommendations
    FSDataInputStream fsdis = output.getFileSystem(conf).open(actualOutput);
    BufferedReader br = new BufferedReader(new InputStreamReader(fsdis));
    String line = br.readLine();

    pw = new PrintWriter(new FileWriter(outputFile));
    List<String> files = db.files();
    List<String> authors = db.authors();
    while (line != null) {
        String[] parts = line.split("\\s+");
        String author = authors.get(Integer.parseInt(parts[0]));
        parts = parts[1].substring(1, parts[1].length() - 1).split(",");
        for (String pair : parts) {
            String[] pairsPart = pair.split(":");
            pw.println(author + "\t" + files.get(Integer.parseInt(pairsPart[0])) + "\t" + pairsPart[1]);
        }
        line = br.readLine();
    }
    pw.close();
}

From source file:nl.gridline.zieook.runners.cf.RecommenderJobZieOok.java

License:Apache License

public static void main(String[] args) throws Exception {
    ToolRunner.run(new Configuration(), new RecommenderJob(), args);
}

From source file:tv.icntv.grade.film.recommend.CFRecommendJob.java

License:Apache License

@Override
public int run(String[] strings) throws Exception {
    Configuration configuration = getConf();
    configuration.setLong("mapred.min.split.size", 512 * 1024 * 1024L);
    HadoopUtils.deleteIfExist(strings[1]);
    Job timeJob = new Job(configuration, "calculate film time middle job");
    MapReduceUtils.initMapperJob(TimeMaper.class, Text.class, Text.class, this.getClass(), timeJob,
            getPaths(strings[0].split(",")));
    timeJob.setCombinerClass(TimeCombiner.class);
    MapReduceUtils.initReducerJob(new Path(strings[1]), TimeReducer.class, timeJob);
    timeJob.waitForCompletion(true);/* w  ww .  j  ava 2 s  .c om*/

    HadoopUtils.deleteIfExist(strings[3]);
    HadoopUtils.deleteIfExist(strings[4]);
    return ToolRunner.run(configuration, new RecommenderJob(), strings[2].split(" "));
}

From source file:tv.icntv.recommend.algorithm.CFRecommendJob.java

License:Apache License

/**
 * new String[]{// w w w  . j av  a  2s . c  om
 String.format(configuration.get("hdfs.directory.input"),date),
 baseCfData,
 sb.toString(),
 output ,
 temp
 }
 * @param strings
 * @return
 * @throws Exception
 */
@Override
public int run(String[] strings) throws Exception {
    Configuration configuration = getConf();
    //        configuration.setLong("mapred.min.split.size",512*1024*1024L);
    Date date = getDateAdd(-1);

    String baseCfData = String.format(configuration.get(cfInputProperty), date);
    String output = String.format(configuration.get(cfOutProperty), date);
    String temp = String.format(configuration.get(cfTempPropery), date);

    StringBuilder sb = new StringBuilder();
    sb.append("--input ").append(baseCfData);
    sb.append(" --output ").append(output);
    sb.append(" --numRecommendations ").append(configuration.get("icntv.cf.recommend.num.per.user"));
    sb.append(" --similarityClassname ").append(configuration.get("icntv.cf.recommend.similarityClassname"));
    sb.append(" --tempDir ").append(temp);

    HadoopUtils.deleteIfExist(baseCfData);
    Job timeJob = Job.getInstance(configuration, "?");
    //        MapReduceUtils.initMapperJob(TimeMaper.class, Text.class, Text.class, this.getClass(), timeJob, new Path(String.format(configuration.get("hdfs.directory.input"),date)));
    //        timeJob.setCombinerClass(TimeCombiner.class);
    //        MapReduceUtils.initReducerJob(new Path(baseCfData),TimeReducer.class,timeJob);
    MapReduceUtils.initMapperJob(ViewTimeMapper.class, Text.class, Text.class, this.getClass(), timeJob,
            getInput(configuration)); //new Path(String.format(configuration.get("hdfs.directory.input"),date))
    MapReduceUtils.initReducerJob(new Path(baseCfData), ViewTimeReducer.class, timeJob);
    timeJob.waitForCompletion(true);

    HadoopUtils.deleteIfExist(output);
    HadoopUtils.deleteIfExist(temp);
    return ToolRunner.run(configuration, new RecommenderJob(), sb.toString().split(" "));
}