List of usage examples for org.apache.mahout.cf.taste.hadoop.preparation PreparePreferenceMatrixJob USER_VECTORS
String USER_VECTORS
To view the source code for org.apache.mahout.cf.taste.hadoop.preparation PreparePreferenceMatrixJob USER_VECTORS.
Click Source Link
From source file:finderbots.recommenders.hadoop.RecommenderUpdateJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { options = new Options(); CmdLineParser parser = new CmdLineParser(options); String s = options.toString(); try {/* w w w .j a v a 2 s .com*/ parser.parseArgument(args); } catch (CmdLineException e) { System.err.println(e.getMessage()); parser.printUsage(System.err); return -1; } cleanOutputDirs(options); Path prefFilesRootDir = new Path(options.getOutputDir()); FileSystem fs = prefFilesRootDir.getFileSystem(getConf()); Path indexesPath = new Path(prefFilesRootDir, options.getIndexesDir()); Path prefsPath = new Path(prefFilesRootDir, options.getPrefsDir()); options.setPrefsDir(prefsPath.toString()); // split into actions and store in subdirs // create an index/dictionary for users and items // this job cleans out the output dir first ActionSplitterJob aj = new ActionSplitterJob(); ToolRunner.run(getConf(), aj, new String[] { "--input", options.getInputDir(), "--output", prefsPath.toString(), "--indexDir", indexesPath.toString(), "--inputFilePattern", options.getFileNamePatternString(), "--action1", options.getAction1(), "--action2", options.getAction2(), "--inputDelim", options.getInputDelim(), "--outputDelim", options.getOutputDelim(), "--actionIDCol", Integer.toString(options.getActionColumn()), "--itemIDCol", Integer.toString(options.getItemIDColumn()), "--userIDCol", Integer.toString(options.getUserIDColumn()), }); // need to get the number of users and items from the splitter, which also creates indexes this.numberOfUsers = HadoopUtil.readInt(new Path(indexesPath, aj.getOptions().getNumUsersFile()), getConf()); this.numberOfItems = HadoopUtil.readInt(new Path(indexesPath, aj.getOptions().getNumItemsFile()), getConf()); // these are single value binary files written with // HadoopUtil.writeInt(this.numberOfUsers, getOutputPath(NUM_USERS), getConf()); options.setInputDir(prefFilesRootDir.toString()); String action1PrefsPath = new Path(new Path(options.getPrefsDir()), aj.getOptions().getAction1Dir()) .toString(); String action2PrefsPath = new Path(new Path(options.getPrefsDir()), aj.getOptions().getAction2Dir()) .toString(); //LOGGER.info("prefFilesRootDir.toString() = "+prefFilesRootDir.toString()); //LOGGER.info("options.getPrefsDir() = "+options.getPrefsDir()); //LOGGER.info("aj.getOptions().getAction1Dir() = "+aj.getOptions().getAction1Dir()); //LOGGER.info("action1PrefsPath = "+action1PrefsPath.toString()); //LOGGER.info("action2PrefsPath = "+action2PrefsPath.toString()); ToolRunner.run(getConf(), new RecommenderJob(), new String[] { "--input", action1PrefsPath, "--output", options.getPrimaryRecsPath(), "--similarityClassname", options.getSimilairtyType(), //need the seqfile for the similarity matrix even if output to Solr, this job puts it in the temp dir. "--tempDir", options.getPrimaryTempDir(), "--sequencefileOutput" }); //Now move the similarity matrix to the p-recs/sims location rather than leaving is in the tmp dir //this will be written to Solr if specified in the options. if (options.getDoXRecommender()) { //note: similairty class is not used, cooccurrence only for now ToolRunner.run(getConf(), new XRecommenderJob(), new String[] { "--input", options.getAllActionsDir(), "--output", options.getSecondaryOutputDir(), "--similarityClassname", "SIMILARITY_LOGLIKELIHOOD", "--outputPathForSimilarityMatrix", options.getSecondarySimilarityMatrixPath(), "--tempDir", options.getSecondaryTempDir(), "--numUsers", Integer.toString(this.numberOfUsers), "--numItems", Integer.toString(this.numberOfItems), "--primaryPrefs", action1PrefsPath, "--secondaryPrefs", action2PrefsPath, }); } Path bBSimilarityMatrixDRM = new Path(options.getPrimarySimilarityMatrixPath()); Path bASimilarityMatrixDRM = new Path(options.getSecondarySimilarityMatrixPath()); Path primaryActionDRM = new Path(new Path(options.getPrimaryTempDir(), RecommenderJob.DEFAULT_PREPARE_PATH), PreparePreferenceMatrixJob.USER_VECTORS); Path secondaryActionDRM = new Path( new Path(options.getSecondaryTempDir(), XRecommenderJob.DEFAULT_PREPARE_DIR), PrepareActionMatricesJob.USER_VECTORS_A); if (options.getDoXRecommender()) { //Next step is to take the history and similarity matrices, join them by id and write to solr docs LOGGER.info("\n===========\n\n\n" + " About to call WriteToSolr with cross-recommendations:\n" + " B matrix path: " + primaryActionDRM.toString() + "\n" + " A matrix path: " + secondaryActionDRM.toString() + "\n" + " [B'B] matrix path: " + bBSimilarityMatrixDRM.toString() + "\n" + " [B'A] matrix path: " + bASimilarityMatrixDRM.toString() + "\n" + " Output path: " + options.getOutputDir() + "\n" + "\n\n===========\n"); ToolRunner.run(getConf(), new WriteToSolrJob(), new String[] { "--itemCrossSimilarityMatrixDir", bASimilarityMatrixDRM.toString(), "--indexDir", indexesPath.toString(), "--itemSimilarityMatrixDir", bBSimilarityMatrixDRM.toString(), "--usersPrimaryHistoryDir", primaryActionDRM.toString(), "--usersSecondaryHistoryDir", secondaryActionDRM.toString(), "--output", options.getOutputDir(), }); } else { LOGGER.info("\n===========\n\n\n" + " About to call WriteToSolr with single actions recommendations:\n" + " B matrix path: " + primaryActionDRM.toString() + "\n" + " [B'B] matrix path: " + bBSimilarityMatrixDRM.toString() + "\n" + " Output path: " + options.getOutputDir() + "\n" + "\n\n===========\n"); ToolRunner.run(getConf(), new WriteToSolrJob(), new String[] { "--indexDir", indexesPath.toString(), "--itemSimilarityMatrixDir", bBSimilarityMatrixDRM.toString(), "--usersPrimaryHistoryDir", primaryActionDRM.toString(), "--output", options.getOutputDir(), }); } /* ToolRunner.run(getConf(), new WriteToSolrJob(), new String[]{ "--itemCrossSimilarityMatrixDir", "../out/s-recs/sims", "--indexDir", "../out/id-indexes", "--itemSimilarityMatrixDir", "../out/p-recs/sims", "--usersPrimaryHistoryDir", "../out/actions/p-action", "--usersSecondaryHistoryDir", "../out/actions/s-action", "--output", "../out", }); */ //move user history and similarity matrices //move stuff out of temp for now, may not need all these moveMatrices(); return 0; }
From source file:finderbots.recommenders.hadoop.RecommenderUpdateJob.java
License:Apache License
private void moveMatrices() throws IOException { //so it can output to Solr if options specify FileSystem fs = FileSystem.get(getConf()); Path from = new Path(options.getPrimarySimilarityMatrixPath()); Path to = new Path(options.getPrimaryOutputDir(), XRecommenderJob.SIMS_MATRIX_DIR);//steal the dir name from Xrec fs.rename(from, to);/*from w w w . j a va 2 s . c o m*/ //move the primary user action matrix to output from = new Path(new Path(options.getPrimaryTempDir(), RecommenderJob.DEFAULT_PREPARE_PATH), PreparePreferenceMatrixJob.USER_VECTORS); to = new Path(options.getOutputDir(), options.getPrimaryActionHistoryDir()); fs.rename(from, to); //if it was created move the secondary user action matrix to output if (options.getDoXRecommender()) { from = new Path(new Path(options.getSecondaryTempDir(), XRecommenderJob.DEFAULT_PREPARE_DIR), PrepareActionMatricesJob.USER_VECTORS_A); to = new Path(options.getOutputDir(), options.getSecondaryActionHistoryDir()); fs.rename(from, to); } }