List of usage examples for org.apache.mahout.common HadoopUtil writeInt
public static void writeInt(int value, Path path, Configuration configuration) throws IOException
From source file:com.pocketx.gravity.recommender.cf.similarity.job.PreparePreferenceMatrixJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();/* w w w.jav a 2s .co m*/ addOutputOption(); addOption("maxPrefsPerUser", "mppu", "max number of preferences to consider per user, " + "users with more preferences will be sampled down"); addOption("minPrefsPerUser", "mp", "ignore users with less preferences than this " + "(default: " + DEFAULT_MIN_PREFS_PER_USER + ')', String.valueOf(DEFAULT_MIN_PREFS_PER_USER)); addOption("booleanData", "b", "Treat input as without pref values", Boolean.FALSE.toString()); addOption("ratingShift", "rs", "shift ratings by this value", "0.0"); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } int minPrefsPerUser = Integer.parseInt(getOption("minPrefsPerUser")); boolean booleanData = Boolean.valueOf(getOption("booleanData")); float ratingShift = Float.parseFloat(getOption("ratingShift")); //convert items to an internal index Job itemIDIndex = prepareJob(getInputPath(), getOutputPath(ITEMID_INDEX), TextInputFormat.class, ItemIDIndexMapper.class, VarIntWritable.class, VarLongWritable.class, ItemIDIndexReducer.class, VarIntWritable.class, VarLongWritable.class, SequenceFileOutputFormat.class); itemIDIndex.setCombinerClass(ItemIDIndexReducer.class); boolean succeeded = itemIDIndex.waitForCompletion(true); if (!succeeded) { return -1; } //convert user preferences into a vector per user Job toUserVectors = prepareJob(getInputPath(), getOutputPath(USER_VECTORS), TextInputFormat.class, ToItemPrefsMapper.class, VarLongWritable.class, booleanData ? VarLongWritable.class : EntityPrefWritable.class, ToUserVectorsReducer.class, VarLongWritable.class, VectorWritable.class, SequenceFileOutputFormat.class); toUserVectors.getConfiguration().setBoolean(RecommenderJob.BOOLEAN_DATA, booleanData); toUserVectors.getConfiguration().setInt(ToUserVectorsReducer.MIN_PREFERENCES_PER_USER, minPrefsPerUser); toUserVectors.getConfiguration().set(ToEntityPrefsMapper.RATING_SHIFT, String.valueOf(ratingShift)); succeeded = toUserVectors.waitForCompletion(true); if (!succeeded) { return -1; } //we need the number of users later int numberOfUsers = (int) toUserVectors.getCounters().findCounter(ToUserVectorsReducer.Counters.USERS) .getValue(); HadoopUtil.writeInt(numberOfUsers, getOutputPath(NUM_USERS), getConf()); //build the rating matrix Job toItemVectors = prepareJob(getOutputPath(USER_VECTORS), getOutputPath(RATING_MATRIX), ToItemVectorsMapper.class, IntWritable.class, VectorWritable.class, ToItemVectorsReducer.class, IntWritable.class, VectorWritable.class); toItemVectors.setCombinerClass(ToItemVectorsReducer.class); /* configure sampling regarding the uservectors */ if (hasOption("maxPrefsPerUser")) { int samplingSize = Integer.parseInt(getOption("maxPrefsPerUser")); toItemVectors.getConfiguration().setInt(ToItemVectorsMapper.SAMPLE_SIZE, samplingSize); } succeeded = toItemVectors.waitForCompletion(true); if (!succeeded) { return -1; } return 0; }
From source file:finderbots.recommenders.hadoop.ActionSplitterJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { options = new Options(); CmdLineParser parser = new CmdLineParser(options); String s = options.toString(); try {//from ww w . j a v a 2 s . c o m parser.parseArgument(args); } catch (CmdLineException e) { System.err.println(e.getMessage()); parser.printUsage(System.err); return -1; } this.userIndex = HashBiMap.create(); this.itemIndex = HashBiMap.create(); // split into actions and store in subdirs // create indexes for users and items Path inputPath = new Path(options.getInputDir()); FileSystem fs = inputPath.getFileSystem(new JobConf()); Path outputPath = new Path(options.getOutputDir()); // todo: can put this into m/r if it helps speed up split(inputPath, outputPath);// split into actions and store in subdirs Path indexesPath = new Path(options.getIndexDir()); Path userIndexPath = new Path(options.getIndexDir(), options.getUserIndexFile()); Path itemIndexPath = new Path(options.getIndexDir(), options.getItemIndexFile()); if (fs.exists(userIndexPath)) fs.delete(userIndexPath, false);//delete file only! if (fs.exists(itemIndexPath)) fs.delete(itemIndexPath, false);//delete file only! // get the size of the matrices and put them where the calling job // can find them HadoopUtil.writeInt(getNumberOfUsers(), new Path(indexesPath, options.getNumUsersFile()), getConf()); HadoopUtil.writeInt(getNumberOfItems(), new Path(indexesPath, options.getNumItemsFile()), getConf()); //write the indexes to tsv files saveIndexes(indexesPath); return 0; }
From source file:org.gpfvic.mahout.cf.taste.hadoop.preparation.PreparePreferenceMatrixJob.java
License:Apache License
@Override public int run(String[] args) throws Exception { addInputOption();//from w ww .j a v a 2 s. c om addOutputOption(); addOption("minPrefsPerUser", "mp", "ignore users with less preferences than this " + "(default: " + DEFAULT_MIN_PREFS_PER_USER + ')', String.valueOf(DEFAULT_MIN_PREFS_PER_USER)); addOption("booleanData", "b", "Treat input as without pref values", Boolean.FALSE.toString()); addOption("ratingShift", "rs", "shift ratings by this value", "0.0"); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } int minPrefsPerUser = Integer.parseInt(getOption("minPrefsPerUser")); boolean booleanData = Boolean.valueOf(getOption("booleanData")); float ratingShift = Float.parseFloat(getOption("ratingShift")); //convert items to an internal index Job itemIDIndex = prepareJob(getInputPath(), getOutputPath(ITEMID_INDEX), TextInputFormat.class, ItemIDIndexMapper.class, VarIntWritable.class, VarLongWritable.class, ItemIDIndexReducer.class, VarIntWritable.class, VarLongWritable.class, SequenceFileOutputFormat.class); itemIDIndex.setCombinerClass(ItemIDIndexReducer.class); boolean succeeded = itemIDIndex.waitForCompletion(true); if (!succeeded) { return -1; } //convert user preferences into a vector per user Job toUserVectors = prepareJob(getInputPath(), getOutputPath(USER_VECTORS), TextInputFormat.class, ToItemPrefsMapper.class, VarLongWritable.class, booleanData ? VarLongWritable.class : EntityPrefWritable.class, ToUserVectorsReducer.class, VarLongWritable.class, VectorWritable.class, SequenceFileOutputFormat.class); toUserVectors.getConfiguration().setBoolean(RecommenderJob.BOOLEAN_DATA, booleanData); toUserVectors.getConfiguration().setInt(ToUserVectorsReducer.MIN_PREFERENCES_PER_USER, minPrefsPerUser); toUserVectors.getConfiguration().set(ToEntityPrefsMapper.RATING_SHIFT, String.valueOf(ratingShift)); succeeded = toUserVectors.waitForCompletion(true); if (!succeeded) { return -1; } //we need the number of users later int numberOfUsers = (int) toUserVectors.getCounters().findCounter(ToUserVectorsReducer.Counters.USERS) .getValue(); HadoopUtil.writeInt(numberOfUsers, getOutputPath(NUM_USERS), getConf()); //build the rating matrix Job toItemVectors = prepareJob(getOutputPath(USER_VECTORS), getOutputPath(RATING_MATRIX), ToItemVectorsMapper.class, IntWritable.class, VectorWritable.class, ToItemVectorsReducer.class, IntWritable.class, VectorWritable.class); toItemVectors.setCombinerClass(ToItemVectorsReducer.class); succeeded = toItemVectors.waitForCompletion(true); if (!succeeded) { return -1; } return 0; }
From source file:org.hf.mls.mahout.cf.taste.hadoop.item.ToUserVectorsReducer.java
License:Apache License
@Override protected void cleanup(Context context) throws IOException, InterruptedException { super.cleanup(context); //we need the number of users later Configuration conf = context.getConfiguration(); int numberOfUsers = (int) context.getCounter(Counters.USERS).getValue(); HadoopUtil.writeInt(numberOfUsers, new Path(conf.get("NUM_USERS", "")), conf); }