Example usage for org.apache.mahout.cf.taste.hadoop.similarity.item ItemSimilarityJob MAX_SIMILARITIES_PER_ITEM

List of usage examples for org.apache.mahout.cf.taste.hadoop.similarity.item ItemSimilarityJob MAX_SIMILARITIES_PER_ITEM

Introduction

In this page you can find the example usage for org.apache.mahout.cf.taste.hadoop.similarity.item ItemSimilarityJob MAX_SIMILARITIES_PER_ITEM.

Prototype

String MAX_SIMILARITIES_PER_ITEM

To view the source code for org.apache.mahout.cf.taste.hadoop.similarity.item ItemSimilarityJob MAX_SIMILARITIES_PER_ITEM.

Click Source Link

Usage

From source file:hadoop.api.RecommenderJob.java

License:Apache License

/**
 * Calculate the co-occurrence matrix/*  w w w.j a  v  a 2  s .c  o  m*/
 *
 * @param args          Information about the input path, numberOfColumns, similarityClassname, maxObservationsPerRow
 * @param numberOfUsers Number of Users
 * @return Similarities Per Item
 */
public int rowSimilarity(String[] args, int numberOfUsers) {
    try {
        prepareRecommender(args);
    } catch (IOException e) {
        e.printStackTrace();
    }

    try {
        numberOfUsers = HadoopUtil.readInt(new Path(prepPath, PreparePreferenceMatrixJob.NUM_USERS), getConf());
    } catch (IOException e) {
        e.printStackTrace();
    }

    int maxPrefsInItemSimilarity = Integer.parseInt(getOption("maxPrefsInItemSimilarity"));
    int maxSimilaritiesPerItem = Integer.parseInt(getOption("maxSimilaritiesPerItem"));
    String similarityClassname = getOption("similarityClassname");
    double threshold = hasOption("threshold") ? Double.parseDouble(getOption("threshold"))
            : RowSimilarityJob.NO_THRESHOLD;
    long randomSeed = hasOption("randomSeed") ? Long.parseLong(getOption("randomSeed"))
            : RowSimilarityJob.NO_FIXED_RANDOM_SEED;

    try {
        ToolRunner.run(getConf(), new RowSimilarityJob(), new String[] { "--input",
                new Path(prepPath, PreparePreferenceMatrixJob.RATING_MATRIX).toString(), "--output",
                new Path(prepPath, "similarityMatrix").toUri().toString(), "--numberOfColumns",
                String.valueOf(numberOfUsers), "--similarityClassname", similarityClassname,
                "--maxObservationsPerRow", String.valueOf(maxPrefsInItemSimilarity),
                "--maxObservationsPerColumn", String.valueOf(maxPrefsInItemSimilarity),
                "--maxSimilaritiesPerRow", String.valueOf(maxSimilaritiesPerItem), "--excludeSelfSimilarity",
                String.valueOf(Boolean.TRUE), "--threshold", String.valueOf(threshold), "--randomSeed",
                String.valueOf(randomSeed), "--tempDir", prepPath.toString() });
    } catch (Exception e) {
        e.printStackTrace();
    }

    // write out the similarity matrix if the user specified that behavior
    if (hasOption("outputPathForSimilarityMatrix")) {
        Path outputPathForSimilarityMatrix = new Path(getOption("outputPathForSimilarityMatrix"));

        Job outputSimilarityMatrix = null;
        try {
            outputSimilarityMatrix = prepareJob(getTempPath("similarityMatrix"), outputPathForSimilarityMatrix,
                    SequenceFileInputFormat.class, ItemSimilarityJob.MostSimilarItemPairsMapper.class,
                    EntityEntityWritable.class, DoubleWritable.class,
                    ItemSimilarityJob.MostSimilarItemPairsReducer.class, EntityEntityWritable.class,
                    DoubleWritable.class, TextOutputFormat.class);
        } catch (IOException e) {
            e.printStackTrace();
        }

        Configuration mostSimilarItemsConf = outputSimilarityMatrix.getConfiguration();
        mostSimilarItemsConf.set(ItemSimilarityJob.ITEM_ID_INDEX_PATH_STR,
                new Path(getTempPath(DEFAULT_PREPARE_PATH), PreparePreferenceMatrixJob.ITEMID_INDEX)
                        .toString());
        mostSimilarItemsConf.setInt(ItemSimilarityJob.MAX_SIMILARITIES_PER_ITEM, maxSimilaritiesPerItem);
        try {
            outputSimilarityMatrix.waitForCompletion(true);
        } catch (IOException e) {
            e.printStackTrace();
        } catch (InterruptedException e) {
            e.printStackTrace();
        } catch (ClassNotFoundException e) {
            e.printStackTrace();
        }
    }
    return maxSimilaritiesPerItem;
}