List of usage examples for org.apache.mahout.cf.taste.hadoop.item RecommenderJob RecommenderJob
RecommenderJob
From source file:finderbots.recommenders.hadoop.RecommenderUpdateJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { options = new Options(); CmdLineParser parser = new CmdLineParser(options); String s = options.toString(); try {/* w w w .j a v a 2 s .c om*/ parser.parseArgument(args); } catch (CmdLineException e) { System.err.println(e.getMessage()); parser.printUsage(System.err); return -1; } cleanOutputDirs(options); Path prefFilesRootDir = new Path(options.getOutputDir()); FileSystem fs = prefFilesRootDir.getFileSystem(getConf()); Path indexesPath = new Path(prefFilesRootDir, options.getIndexesDir()); Path prefsPath = new Path(prefFilesRootDir, options.getPrefsDir()); options.setPrefsDir(prefsPath.toString()); // split into actions and store in subdirs // create an index/dictionary for users and items // this job cleans out the output dir first ActionSplitterJob aj = new ActionSplitterJob(); ToolRunner.run(getConf(), aj, new String[] { "--input", options.getInputDir(), "--output", prefsPath.toString(), "--indexDir", indexesPath.toString(), "--inputFilePattern", options.getFileNamePatternString(), "--action1", options.getAction1(), "--action2", options.getAction2(), "--inputDelim", options.getInputDelim(), "--outputDelim", options.getOutputDelim(), "--actionIDCol", Integer.toString(options.getActionColumn()), "--itemIDCol", Integer.toString(options.getItemIDColumn()), "--userIDCol", Integer.toString(options.getUserIDColumn()), }); // need to get the number of users and items from the splitter, which also creates indexes this.numberOfUsers = HadoopUtil.readInt(new Path(indexesPath, aj.getOptions().getNumUsersFile()), getConf()); this.numberOfItems = HadoopUtil.readInt(new Path(indexesPath, aj.getOptions().getNumItemsFile()), getConf()); // these are single value binary files written with // HadoopUtil.writeInt(this.numberOfUsers, getOutputPath(NUM_USERS), getConf()); options.setInputDir(prefFilesRootDir.toString()); String action1PrefsPath = new Path(new Path(options.getPrefsDir()), aj.getOptions().getAction1Dir()) .toString(); String action2PrefsPath = new Path(new Path(options.getPrefsDir()), aj.getOptions().getAction2Dir()) .toString(); //LOGGER.info("prefFilesRootDir.toString() = "+prefFilesRootDir.toString()); //LOGGER.info("options.getPrefsDir() = "+options.getPrefsDir()); //LOGGER.info("aj.getOptions().getAction1Dir() = "+aj.getOptions().getAction1Dir()); //LOGGER.info("action1PrefsPath = "+action1PrefsPath.toString()); //LOGGER.info("action2PrefsPath = "+action2PrefsPath.toString()); ToolRunner.run(getConf(), new RecommenderJob(), new String[] { "--input", action1PrefsPath, "--output", options.getPrimaryRecsPath(), "--similarityClassname", options.getSimilairtyType(), //need the seqfile for the similarity matrix even if output to Solr, this job puts it in the temp dir. "--tempDir", options.getPrimaryTempDir(), "--sequencefileOutput" }); //Now move the similarity matrix to the p-recs/sims location rather than leaving is in the tmp dir //this will be written to Solr if specified in the options. if (options.getDoXRecommender()) { //note: similairty class is not used, cooccurrence only for now ToolRunner.run(getConf(), new XRecommenderJob(), new String[] { "--input", options.getAllActionsDir(), "--output", options.getSecondaryOutputDir(), "--similarityClassname", "SIMILARITY_LOGLIKELIHOOD", "--outputPathForSimilarityMatrix", options.getSecondarySimilarityMatrixPath(), "--tempDir", options.getSecondaryTempDir(), "--numUsers", Integer.toString(this.numberOfUsers), "--numItems", Integer.toString(this.numberOfItems), "--primaryPrefs", action1PrefsPath, "--secondaryPrefs", action2PrefsPath, }); } Path bBSimilarityMatrixDRM = new Path(options.getPrimarySimilarityMatrixPath()); Path bASimilarityMatrixDRM = new Path(options.getSecondarySimilarityMatrixPath()); Path primaryActionDRM = new Path(new Path(options.getPrimaryTempDir(), RecommenderJob.DEFAULT_PREPARE_PATH), PreparePreferenceMatrixJob.USER_VECTORS); Path secondaryActionDRM = new Path( new Path(options.getSecondaryTempDir(), XRecommenderJob.DEFAULT_PREPARE_DIR), PrepareActionMatricesJob.USER_VECTORS_A); if (options.getDoXRecommender()) { //Next step is to take the history and similarity matrices, join them by id and write to solr docs LOGGER.info("\n===========\n\n\n" + " About to call WriteToSolr with cross-recommendations:\n" + " B matrix path: " + primaryActionDRM.toString() + "\n" + " A matrix path: " + secondaryActionDRM.toString() + "\n" + " [B'B] matrix path: " + bBSimilarityMatrixDRM.toString() + "\n" + " [B'A] matrix path: " + bASimilarityMatrixDRM.toString() + "\n" + " Output path: " + options.getOutputDir() + "\n" + "\n\n===========\n"); ToolRunner.run(getConf(), new WriteToSolrJob(), new String[] { "--itemCrossSimilarityMatrixDir", bASimilarityMatrixDRM.toString(), "--indexDir", indexesPath.toString(), "--itemSimilarityMatrixDir", bBSimilarityMatrixDRM.toString(), "--usersPrimaryHistoryDir", primaryActionDRM.toString(), "--usersSecondaryHistoryDir", secondaryActionDRM.toString(), "--output", options.getOutputDir(), }); } else { LOGGER.info("\n===========\n\n\n" + " About to call WriteToSolr with single actions recommendations:\n" + " B matrix path: " + primaryActionDRM.toString() + "\n" + " [B'B] matrix path: " + bBSimilarityMatrixDRM.toString() + "\n" + " Output path: " + options.getOutputDir() + "\n" + "\n\n===========\n"); ToolRunner.run(getConf(), new WriteToSolrJob(), new String[] { "--indexDir", indexesPath.toString(), "--itemSimilarityMatrixDir", bBSimilarityMatrixDRM.toString(), "--usersPrimaryHistoryDir", primaryActionDRM.toString(), "--output", options.getOutputDir(), }); } /* ToolRunner.run(getConf(), new WriteToSolrJob(), new String[]{ "--itemCrossSimilarityMatrixDir", "../out/s-recs/sims", "--indexDir", "../out/id-indexes", "--itemSimilarityMatrixDir", "../out/p-recs/sims", "--usersPrimaryHistoryDir", "../out/actions/p-action", "--usersSecondaryHistoryDir", "../out/actions/s-action", "--output", "../out", }); */ //move user history and similarity matrices //move stuff out of temp for now, may not need all these moveMatrices(); return 0; }
From source file:net.aprendizajengrande.gitrecommender.Recommend.java
License:Open Source License
public static void main(String[] args) throws Exception { if (args.length != 4) { System.err.println("Usage: <db dir> <hdfs folder for input> <hdfs folder for output> <output file>"); System.exit(1);//www.j av a 2 s. co m } Configuration conf = new Configuration(); File dbDir = new File(args[0]); DB db = new DB(dbDir); File outputFile = new File(args[3]); String inputName = args[1] + "/ratings"; String outputName = args[2] + "/recos"; Path input = new Path(inputName); Path output = new Path(outputName); Path actualOutput = new Path(outputName + "/part-r-00000"); // populate ratings file FSDataOutputStream fsdos = input.getFileSystem(conf).create(input); PrintWriter pw = new PrintWriter(new OutputStreamWriter(fsdos)); // compute affinity for files as % of commits that touch that file int[] authorCommitCounts = db.commitsPerAuthor(); Map<Integer, Integer> counts[] = db.counts(); for (int author = 0; author < authorCommitCounts.length; author++) { for (Map.Entry<Integer, Integer> c : counts[author].entrySet()) { pw.println(author + "\t" + c.getKey() + "\t" + ((c.getValue() / (authorCommitCounts[author] * 1.0)) * 10000.0) + "\t" + c.getValue().intValue()); } } pw.close(); // compute recommendation in Hadoop ToolRunner.run(new Configuration(), new RecommenderJob(), new String[] { "--input", inputName, "--output", outputName, "--similarityClassname", "SIMILARITY_COSINE" }); // read recommendations FSDataInputStream fsdis = output.getFileSystem(conf).open(actualOutput); BufferedReader br = new BufferedReader(new InputStreamReader(fsdis)); String line = br.readLine(); pw = new PrintWriter(new FileWriter(outputFile)); List<String> files = db.files(); List<String> authors = db.authors(); while (line != null) { String[] parts = line.split("\\s+"); String author = authors.get(Integer.parseInt(parts[0])); parts = parts[1].substring(1, parts[1].length() - 1).split(","); for (String pair : parts) { String[] pairsPart = pair.split(":"); pw.println(author + "\t" + files.get(Integer.parseInt(pairsPart[0])) + "\t" + pairsPart[1]); } line = br.readLine(); } pw.close(); }
From source file:nl.gridline.zieook.runners.cf.RecommenderJobZieOok.java
License:Apache License
public static void main(String[] args) throws Exception { ToolRunner.run(new Configuration(), new RecommenderJob(), args); }
From source file:tv.icntv.grade.film.recommend.CFRecommendJob.java
License:Apache License
@Override public int run(String[] strings) throws Exception { Configuration configuration = getConf(); configuration.setLong("mapred.min.split.size", 512 * 1024 * 1024L); HadoopUtils.deleteIfExist(strings[1]); Job timeJob = new Job(configuration, "calculate film time middle job"); MapReduceUtils.initMapperJob(TimeMaper.class, Text.class, Text.class, this.getClass(), timeJob, getPaths(strings[0].split(","))); timeJob.setCombinerClass(TimeCombiner.class); MapReduceUtils.initReducerJob(new Path(strings[1]), TimeReducer.class, timeJob); timeJob.waitForCompletion(true);/* w ww . j ava 2 s .c om*/ HadoopUtils.deleteIfExist(strings[3]); HadoopUtils.deleteIfExist(strings[4]); return ToolRunner.run(configuration, new RecommenderJob(), strings[2].split(" ")); }
From source file:tv.icntv.recommend.algorithm.CFRecommendJob.java
License:Apache License
/** * new String[]{// w w w . j av a 2s . c om String.format(configuration.get("hdfs.directory.input"),date), baseCfData, sb.toString(), output , temp } * @param strings * @return * @throws Exception */ @Override public int run(String[] strings) throws Exception { Configuration configuration = getConf(); // configuration.setLong("mapred.min.split.size",512*1024*1024L); Date date = getDateAdd(-1); String baseCfData = String.format(configuration.get(cfInputProperty), date); String output = String.format(configuration.get(cfOutProperty), date); String temp = String.format(configuration.get(cfTempPropery), date); StringBuilder sb = new StringBuilder(); sb.append("--input ").append(baseCfData); sb.append(" --output ").append(output); sb.append(" --numRecommendations ").append(configuration.get("icntv.cf.recommend.num.per.user")); sb.append(" --similarityClassname ").append(configuration.get("icntv.cf.recommend.similarityClassname")); sb.append(" --tempDir ").append(temp); HadoopUtils.deleteIfExist(baseCfData); Job timeJob = Job.getInstance(configuration, "?"); // MapReduceUtils.initMapperJob(TimeMaper.class, Text.class, Text.class, this.getClass(), timeJob, new Path(String.format(configuration.get("hdfs.directory.input"),date))); // timeJob.setCombinerClass(TimeCombiner.class); // MapReduceUtils.initReducerJob(new Path(baseCfData),TimeReducer.class,timeJob); MapReduceUtils.initMapperJob(ViewTimeMapper.class, Text.class, Text.class, this.getClass(), timeJob, getInput(configuration)); //new Path(String.format(configuration.get("hdfs.directory.input"),date)) MapReduceUtils.initReducerJob(new Path(baseCfData), ViewTimeReducer.class, timeJob); timeJob.waitForCompletion(true); HadoopUtils.deleteIfExist(output); HadoopUtils.deleteIfExist(temp); return ToolRunner.run(configuration, new RecommenderJob(), sb.toString().split(" ")); }