List of usage examples for org.apache.mahout.cf.taste.hadoop.item ToUserVectorsReducer MIN_PREFERENCES_PER_USER
String MIN_PREFERENCES_PER_USER
To view the source code for org.apache.mahout.cf.taste.hadoop.item ToUserVectorsReducer MIN_PREFERENCES_PER_USER.
Click Source Link
From source file:com.pocketx.gravity.recommender.cf.similarity.job.PreparePreferenceMatrixJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();/* w w w. j av a 2 s . c o m*/ addOutputOption(); addOption("maxPrefsPerUser", "mppu", "max number of preferences to consider per user, " + "users with more preferences will be sampled down"); addOption("minPrefsPerUser", "mp", "ignore users with less preferences than this " + "(default: " + DEFAULT_MIN_PREFS_PER_USER + ')', String.valueOf(DEFAULT_MIN_PREFS_PER_USER)); addOption("booleanData", "b", "Treat input as without pref values", Boolean.FALSE.toString()); addOption("ratingShift", "rs", "shift ratings by this value", "0.0"); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } int minPrefsPerUser = Integer.parseInt(getOption("minPrefsPerUser")); boolean booleanData = Boolean.valueOf(getOption("booleanData")); float ratingShift = Float.parseFloat(getOption("ratingShift")); //convert items to an internal index Job itemIDIndex = prepareJob(getInputPath(), getOutputPath(ITEMID_INDEX), TextInputFormat.class, ItemIDIndexMapper.class, VarIntWritable.class, VarLongWritable.class, ItemIDIndexReducer.class, VarIntWritable.class, VarLongWritable.class, SequenceFileOutputFormat.class); itemIDIndex.setCombinerClass(ItemIDIndexReducer.class); boolean succeeded = itemIDIndex.waitForCompletion(true); if (!succeeded) { return -1; } //convert user preferences into a vector per user Job toUserVectors = prepareJob(getInputPath(), getOutputPath(USER_VECTORS), TextInputFormat.class, ToItemPrefsMapper.class, VarLongWritable.class, booleanData ? VarLongWritable.class : EntityPrefWritable.class, ToUserVectorsReducer.class, VarLongWritable.class, VectorWritable.class, SequenceFileOutputFormat.class); toUserVectors.getConfiguration().setBoolean(RecommenderJob.BOOLEAN_DATA, booleanData); toUserVectors.getConfiguration().setInt(ToUserVectorsReducer.MIN_PREFERENCES_PER_USER, minPrefsPerUser); toUserVectors.getConfiguration().set(ToEntityPrefsMapper.RATING_SHIFT, String.valueOf(ratingShift)); succeeded = toUserVectors.waitForCompletion(true); if (!succeeded) { return -1; } //we need the number of users later int numberOfUsers = (int) toUserVectors.getCounters().findCounter(ToUserVectorsReducer.Counters.USERS) .getValue(); HadoopUtil.writeInt(numberOfUsers, getOutputPath(NUM_USERS), getConf()); //build the rating matrix Job toItemVectors = prepareJob(getOutputPath(USER_VECTORS), getOutputPath(RATING_MATRIX), ToItemVectorsMapper.class, IntWritable.class, VectorWritable.class, ToItemVectorsReducer.class, IntWritable.class, VectorWritable.class); toItemVectors.setCombinerClass(ToItemVectorsReducer.class); /* configure sampling regarding the uservectors */ if (hasOption("maxPrefsPerUser")) { int samplingSize = Integer.parseInt(getOption("maxPrefsPerUser")); toItemVectors.getConfiguration().setInt(ToItemVectorsMapper.SAMPLE_SIZE, samplingSize); } succeeded = toItemVectors.waitForCompletion(true); if (!succeeded) { return -1; } return 0; }