List of usage examples for org.apache.mahout.cf.taste.hadoop.similarity.item ItemSimilarityJob MAX_SIMILARITIES_PER_ITEM
String MAX_SIMILARITIES_PER_ITEM
To view the source code for org.apache.mahout.cf.taste.hadoop.similarity.item ItemSimilarityJob MAX_SIMILARITIES_PER_ITEM.
Click Source Link
From source file:hadoop.api.RecommenderJob.java
License:Apache License
/** * Calculate the co-occurrence matrix/* w w w.j a v a 2 s .c o m*/ * * @param args Information about the input path, numberOfColumns, similarityClassname, maxObservationsPerRow * @param numberOfUsers Number of Users * @return Similarities Per Item */ public int rowSimilarity(String[] args, int numberOfUsers) { try { prepareRecommender(args); } catch (IOException e) { e.printStackTrace(); } try { numberOfUsers = HadoopUtil.readInt(new Path(prepPath, PreparePreferenceMatrixJob.NUM_USERS), getConf()); } catch (IOException e) { e.printStackTrace(); } int maxPrefsInItemSimilarity = Integer.parseInt(getOption("maxPrefsInItemSimilarity")); int maxSimilaritiesPerItem = Integer.parseInt(getOption("maxSimilaritiesPerItem")); String similarityClassname = getOption("similarityClassname"); double threshold = hasOption("threshold") ? Double.parseDouble(getOption("threshold")) : RowSimilarityJob.NO_THRESHOLD; long randomSeed = hasOption("randomSeed") ? Long.parseLong(getOption("randomSeed")) : RowSimilarityJob.NO_FIXED_RANDOM_SEED; try { ToolRunner.run(getConf(), new RowSimilarityJob(), new String[] { "--input", new Path(prepPath, PreparePreferenceMatrixJob.RATING_MATRIX).toString(), "--output", new Path(prepPath, "similarityMatrix").toUri().toString(), "--numberOfColumns", String.valueOf(numberOfUsers), "--similarityClassname", similarityClassname, "--maxObservationsPerRow", String.valueOf(maxPrefsInItemSimilarity), "--maxObservationsPerColumn", String.valueOf(maxPrefsInItemSimilarity), "--maxSimilaritiesPerRow", String.valueOf(maxSimilaritiesPerItem), "--excludeSelfSimilarity", String.valueOf(Boolean.TRUE), "--threshold", String.valueOf(threshold), "--randomSeed", String.valueOf(randomSeed), "--tempDir", prepPath.toString() }); } catch (Exception e) { e.printStackTrace(); } // write out the similarity matrix if the user specified that behavior if (hasOption("outputPathForSimilarityMatrix")) { Path outputPathForSimilarityMatrix = new Path(getOption("outputPathForSimilarityMatrix")); Job outputSimilarityMatrix = null; try { outputSimilarityMatrix = prepareJob(getTempPath("similarityMatrix"), outputPathForSimilarityMatrix, SequenceFileInputFormat.class, ItemSimilarityJob.MostSimilarItemPairsMapper.class, EntityEntityWritable.class, DoubleWritable.class, ItemSimilarityJob.MostSimilarItemPairsReducer.class, EntityEntityWritable.class, DoubleWritable.class, TextOutputFormat.class); } catch (IOException e) { e.printStackTrace(); } Configuration mostSimilarItemsConf = outputSimilarityMatrix.getConfiguration(); mostSimilarItemsConf.set(ItemSimilarityJob.ITEM_ID_INDEX_PATH_STR, new Path(getTempPath(DEFAULT_PREPARE_PATH), PreparePreferenceMatrixJob.ITEMID_INDEX) .toString()); mostSimilarItemsConf.setInt(ItemSimilarityJob.MAX_SIMILARITIES_PER_ITEM, maxSimilaritiesPerItem); try { outputSimilarityMatrix.waitForCompletion(true); } catch (IOException e) { e.printStackTrace(); } catch (InterruptedException e) { e.printStackTrace(); } catch (ClassNotFoundException e) { e.printStackTrace(); } } return maxSimilaritiesPerItem; }