Example usage for org.apache.mahout.cf.taste.hadoop.preparation PreparePreferenceMatrixJob NUM_USERS

List of usage examples for org.apache.mahout.cf.taste.hadoop.preparation PreparePreferenceMatrixJob NUM_USERS

Introduction

In this page you can find the example usage for org.apache.mahout.cf.taste.hadoop.preparation PreparePreferenceMatrixJob NUM_USERS.

Prototype

String NUM_USERS

To view the source code for org.apache.mahout.cf.taste.hadoop.preparation PreparePreferenceMatrixJob NUM_USERS.

Click Source Link

Usage

From source file:org.hf.mls.mahout.cf.taste.hadoop.similarity.item.ItemSimilarityJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    addInputOption();/*from  www .j  av  a2s . com*/
    addOutputOption();
    addOption("similarityClassname", "s", "Name of distributed similarity measures class to instantiate, "
            + "alternatively use one of the predefined similarities (" + VectorSimilarityMeasures.list() + ')');
    addOption("maxSimilaritiesPerItem", "m",
            "try to cap the number of similar items per item to this number " + "(default: "
                    + DEFAULT_MAX_SIMILAR_ITEMS_PER_ITEM + ')',
            String.valueOf(DEFAULT_MAX_SIMILAR_ITEMS_PER_ITEM));
    addOption("maxPrefsPerUser", "mppu", "max number of preferences to consider per user, "
            + "users with more preferences will be sampled down (default: " + DEFAULT_MAX_PREFS_PER_USER + ')',
            String.valueOf(DEFAULT_MAX_PREFS_PER_USER));
    addOption("minPrefsPerUser", "mp",
            "ignore users with less preferences than this " + "(default: " + DEFAULT_MIN_PREFS_PER_USER + ')',
            String.valueOf(DEFAULT_MIN_PREFS_PER_USER));
    addOption("booleanData", "b", "Treat input as without pref values", String.valueOf(Boolean.FALSE));
    addOption("threshold", "tr", "discard item pairs with a similarity value below this", false);

    Map<String, List<String>> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    String similarityClassName = getOption("similarityClassname");
    int maxSimilarItemsPerItem = Integer.parseInt(getOption("maxSimilaritiesPerItem"));
    int maxPrefsPerUser = Integer.parseInt(getOption("maxPrefsPerUser"));
    int minPrefsPerUser = Integer.parseInt(getOption("minPrefsPerUser"));
    boolean booleanData = Boolean.valueOf(getOption("booleanData"));

    double threshold = hasOption("threshold") ? Double.parseDouble(getOption("threshold"))
            : RowSimilarityJob.NO_THRESHOLD;

    Path similarityMatrixPath = getTempPath("similarityMatrix");
    Path prepPath = getTempPath("prepareRatingMatrix");

    AtomicInteger currentPhase = new AtomicInteger();

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        ToolRunner.run(getConf(), new PreparePreferenceMatrixJob(),
                new String[] { "--input", getInputPath().toString(), "--output", prepPath.toString(),
                        "--maxPrefsPerUser", String.valueOf(maxPrefsPerUser), "--minPrefsPerUser",
                        String.valueOf(minPrefsPerUser), "--booleanData", String.valueOf(booleanData),
                        "--tempDir", getTempPath().toString(), });
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        int numberOfUsers = HadoopUtil.readInt(new Path(prepPath, PreparePreferenceMatrixJob.NUM_USERS),
                getConf());

        ToolRunner.run(getConf(), new RowSimilarityJob(), new String[] { "--input",
                new Path(prepPath, PreparePreferenceMatrixJob.RATING_MATRIX).toString(), "--output",
                similarityMatrixPath.toString(), "--numberOfColumns", String.valueOf(numberOfUsers),
                "--similarityClassname", similarityClassName, "--maxSimilaritiesPerRow",
                String.valueOf(maxSimilarItemsPerItem), "--excludeSelfSimilarity", String.valueOf(Boolean.TRUE),
                "--threshold", String.valueOf(threshold), "--tempDir", getTempPath().toString(), });
    }

    if (shouldRunNextPhase(parsedArgs, currentPhase)) {
        Job mostSimilarItems = prepareJob(similarityMatrixPath, getOutputPath(), SequenceFileInputFormat.class,
                MostSimilarItemPairsMapper.class, EntityEntityWritable.class, DoubleWritable.class,
                MostSimilarItemPairsReducer.class, EntityEntityWritable.class, DoubleWritable.class,
                TextOutputFormat.class);
        Configuration mostSimilarItemsConf = mostSimilarItems.getConfiguration();
        mostSimilarItemsConf.set(ITEM_ID_INDEX_PATH_STR,
                new Path(prepPath, PreparePreferenceMatrixJob.ITEMID_INDEX).toString());
        mostSimilarItemsConf.setInt(MAX_SIMILARITIES_PER_ITEM, maxSimilarItemsPerItem);
        boolean succeeded = mostSimilarItems.waitForCompletion(true);
        if (!succeeded) {
            return -1;
        }
    }

    return 0;
}