List of usage examples for org.apache.mahout.cf.taste.hadoop.preparation PreparePreferenceMatrixJob NUM_USERS
String NUM_USERS
To view the source code for org.apache.mahout.cf.taste.hadoop.preparation PreparePreferenceMatrixJob NUM_USERS.
Click Source Link
From source file:org.hf.mls.mahout.cf.taste.hadoop.similarity.item.ItemSimilarityJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();/*from www .j av a2s . com*/ addOutputOption(); addOption("similarityClassname", "s", "Name of distributed similarity measures class to instantiate, " + "alternatively use one of the predefined similarities (" + VectorSimilarityMeasures.list() + ')'); addOption("maxSimilaritiesPerItem", "m", "try to cap the number of similar items per item to this number " + "(default: " + DEFAULT_MAX_SIMILAR_ITEMS_PER_ITEM + ')', String.valueOf(DEFAULT_MAX_SIMILAR_ITEMS_PER_ITEM)); addOption("maxPrefsPerUser", "mppu", "max number of preferences to consider per user, " + "users with more preferences will be sampled down (default: " + DEFAULT_MAX_PREFS_PER_USER + ')', String.valueOf(DEFAULT_MAX_PREFS_PER_USER)); addOption("minPrefsPerUser", "mp", "ignore users with less preferences than this " + "(default: " + DEFAULT_MIN_PREFS_PER_USER + ')', String.valueOf(DEFAULT_MIN_PREFS_PER_USER)); addOption("booleanData", "b", "Treat input as without pref values", String.valueOf(Boolean.FALSE)); addOption("threshold", "tr", "discard item pairs with a similarity value below this", false); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } String similarityClassName = getOption("similarityClassname"); int maxSimilarItemsPerItem = Integer.parseInt(getOption("maxSimilaritiesPerItem")); int maxPrefsPerUser = Integer.parseInt(getOption("maxPrefsPerUser")); int minPrefsPerUser = Integer.parseInt(getOption("minPrefsPerUser")); boolean booleanData = Boolean.valueOf(getOption("booleanData")); double threshold = hasOption("threshold") ? Double.parseDouble(getOption("threshold")) : RowSimilarityJob.NO_THRESHOLD; Path similarityMatrixPath = getTempPath("similarityMatrix"); Path prepPath = getTempPath("prepareRatingMatrix"); AtomicInteger currentPhase = new AtomicInteger(); if (shouldRunNextPhase(parsedArgs, currentPhase)) { ToolRunner.run(getConf(), new PreparePreferenceMatrixJob(), new String[] { "--input", getInputPath().toString(), "--output", prepPath.toString(), "--maxPrefsPerUser", String.valueOf(maxPrefsPerUser), "--minPrefsPerUser", String.valueOf(minPrefsPerUser), "--booleanData", String.valueOf(booleanData), "--tempDir", getTempPath().toString(), }); } if (shouldRunNextPhase(parsedArgs, currentPhase)) { int numberOfUsers = HadoopUtil.readInt(new Path(prepPath, PreparePreferenceMatrixJob.NUM_USERS), getConf()); ToolRunner.run(getConf(), new RowSimilarityJob(), new String[] { "--input", new Path(prepPath, PreparePreferenceMatrixJob.RATING_MATRIX).toString(), "--output", similarityMatrixPath.toString(), "--numberOfColumns", String.valueOf(numberOfUsers), "--similarityClassname", similarityClassName, "--maxSimilaritiesPerRow", String.valueOf(maxSimilarItemsPerItem), "--excludeSelfSimilarity", String.valueOf(Boolean.TRUE), "--threshold", String.valueOf(threshold), "--tempDir", getTempPath().toString(), }); } if (shouldRunNextPhase(parsedArgs, currentPhase)) { Job mostSimilarItems = prepareJob(similarityMatrixPath, getOutputPath(), SequenceFileInputFormat.class, MostSimilarItemPairsMapper.class, EntityEntityWritable.class, DoubleWritable.class, MostSimilarItemPairsReducer.class, EntityEntityWritable.class, DoubleWritable.class, TextOutputFormat.class); Configuration mostSimilarItemsConf = mostSimilarItems.getConfiguration(); mostSimilarItemsConf.set(ITEM_ID_INDEX_PATH_STR, new Path(prepPath, PreparePreferenceMatrixJob.ITEMID_INDEX).toString()); mostSimilarItemsConf.setInt(MAX_SIMILARITIES_PER_ITEM, maxSimilarItemsPerItem); boolean succeeded = mostSimilarItems.waitForCompletion(true); if (!succeeded) { return -1; } } return 0; }