Example usage for org.apache.mahout.common HadoopUtil writeInt

List of usage examples for org.apache.mahout.common HadoopUtil writeInt

Introduction

In this page you can find the example usage for org.apache.mahout.common HadoopUtil writeInt.

Prototype

public static void writeInt(int value, Path path, Configuration configuration) throws IOException 

Source Link

Usage

From source file:com.pocketx.gravity.recommender.cf.similarity.job.PreparePreferenceMatrixJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    addInputOption();/*  w  w w.jav  a  2s  .co  m*/
    addOutputOption();
    addOption("maxPrefsPerUser", "mppu", "max number of preferences to consider per user, "
            + "users with more preferences will be sampled down");
    addOption("minPrefsPerUser", "mp",
            "ignore users with less preferences than this " + "(default: " + DEFAULT_MIN_PREFS_PER_USER + ')',
            String.valueOf(DEFAULT_MIN_PREFS_PER_USER));
    addOption("booleanData", "b", "Treat input as without pref values", Boolean.FALSE.toString());
    addOption("ratingShift", "rs", "shift ratings by this value", "0.0");

    Map<String, List<String>> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    int minPrefsPerUser = Integer.parseInt(getOption("minPrefsPerUser"));
    boolean booleanData = Boolean.valueOf(getOption("booleanData"));
    float ratingShift = Float.parseFloat(getOption("ratingShift"));
    //convert items to an internal index
    Job itemIDIndex = prepareJob(getInputPath(), getOutputPath(ITEMID_INDEX), TextInputFormat.class,
            ItemIDIndexMapper.class, VarIntWritable.class, VarLongWritable.class, ItemIDIndexReducer.class,
            VarIntWritable.class, VarLongWritable.class, SequenceFileOutputFormat.class);
    itemIDIndex.setCombinerClass(ItemIDIndexReducer.class);
    boolean succeeded = itemIDIndex.waitForCompletion(true);
    if (!succeeded) {
        return -1;
    }
    //convert user preferences into a vector per user
    Job toUserVectors = prepareJob(getInputPath(), getOutputPath(USER_VECTORS), TextInputFormat.class,
            ToItemPrefsMapper.class, VarLongWritable.class,
            booleanData ? VarLongWritable.class : EntityPrefWritable.class, ToUserVectorsReducer.class,
            VarLongWritable.class, VectorWritable.class, SequenceFileOutputFormat.class);
    toUserVectors.getConfiguration().setBoolean(RecommenderJob.BOOLEAN_DATA, booleanData);
    toUserVectors.getConfiguration().setInt(ToUserVectorsReducer.MIN_PREFERENCES_PER_USER, minPrefsPerUser);
    toUserVectors.getConfiguration().set(ToEntityPrefsMapper.RATING_SHIFT, String.valueOf(ratingShift));
    succeeded = toUserVectors.waitForCompletion(true);
    if (!succeeded) {
        return -1;
    }
    //we need the number of users later
    int numberOfUsers = (int) toUserVectors.getCounters().findCounter(ToUserVectorsReducer.Counters.USERS)
            .getValue();
    HadoopUtil.writeInt(numberOfUsers, getOutputPath(NUM_USERS), getConf());
    //build the rating matrix
    Job toItemVectors = prepareJob(getOutputPath(USER_VECTORS), getOutputPath(RATING_MATRIX),
            ToItemVectorsMapper.class, IntWritable.class, VectorWritable.class, ToItemVectorsReducer.class,
            IntWritable.class, VectorWritable.class);
    toItemVectors.setCombinerClass(ToItemVectorsReducer.class);

    /* configure sampling regarding the uservectors */
    if (hasOption("maxPrefsPerUser")) {
        int samplingSize = Integer.parseInt(getOption("maxPrefsPerUser"));
        toItemVectors.getConfiguration().setInt(ToItemVectorsMapper.SAMPLE_SIZE, samplingSize);
    }

    succeeded = toItemVectors.waitForCompletion(true);
    if (!succeeded) {
        return -1;
    }

    return 0;
}

From source file:finderbots.recommenders.hadoop.ActionSplitterJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    options = new Options();
    CmdLineParser parser = new CmdLineParser(options);
    String s = options.toString();

    try {//from   ww  w  . j  a  v  a  2 s .  c o  m
        parser.parseArgument(args);
    } catch (CmdLineException e) {
        System.err.println(e.getMessage());
        parser.printUsage(System.err);
        return -1;
    }

    this.userIndex = HashBiMap.create();
    this.itemIndex = HashBiMap.create();

    // split into actions and store in subdirs
    // create indexes for users and items
    Path inputPath = new Path(options.getInputDir());
    FileSystem fs = inputPath.getFileSystem(new JobConf());
    Path outputPath = new Path(options.getOutputDir());
    // todo: can put this into m/r if it helps speed up
    split(inputPath, outputPath);// split into actions and store in subdirs

    Path indexesPath = new Path(options.getIndexDir());
    Path userIndexPath = new Path(options.getIndexDir(), options.getUserIndexFile());
    Path itemIndexPath = new Path(options.getIndexDir(), options.getItemIndexFile());
    if (fs.exists(userIndexPath))
        fs.delete(userIndexPath, false);//delete file only!
    if (fs.exists(itemIndexPath))
        fs.delete(itemIndexPath, false);//delete file only!
    // get the size of the matrices and put them where the calling job
    // can find them
    HadoopUtil.writeInt(getNumberOfUsers(), new Path(indexesPath, options.getNumUsersFile()), getConf());
    HadoopUtil.writeInt(getNumberOfItems(), new Path(indexesPath, options.getNumItemsFile()), getConf());
    //write the indexes to tsv files
    saveIndexes(indexesPath);
    return 0;
}

From source file:org.gpfvic.mahout.cf.taste.hadoop.preparation.PreparePreferenceMatrixJob.java

License:Apache License

@Override
public int run(String[] args) throws Exception {

    addInputOption();//from w ww  .j  a  v a  2 s.  c om
    addOutputOption();
    addOption("minPrefsPerUser", "mp",
            "ignore users with less preferences than this " + "(default: " + DEFAULT_MIN_PREFS_PER_USER + ')',
            String.valueOf(DEFAULT_MIN_PREFS_PER_USER));
    addOption("booleanData", "b", "Treat input as without pref values", Boolean.FALSE.toString());
    addOption("ratingShift", "rs", "shift ratings by this value", "0.0");

    Map<String, List<String>> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    int minPrefsPerUser = Integer.parseInt(getOption("minPrefsPerUser"));
    boolean booleanData = Boolean.valueOf(getOption("booleanData"));
    float ratingShift = Float.parseFloat(getOption("ratingShift"));
    //convert items to an internal index
    Job itemIDIndex = prepareJob(getInputPath(), getOutputPath(ITEMID_INDEX), TextInputFormat.class,
            ItemIDIndexMapper.class, VarIntWritable.class, VarLongWritable.class, ItemIDIndexReducer.class,
            VarIntWritable.class, VarLongWritable.class, SequenceFileOutputFormat.class);
    itemIDIndex.setCombinerClass(ItemIDIndexReducer.class);
    boolean succeeded = itemIDIndex.waitForCompletion(true);
    if (!succeeded) {
        return -1;
    }
    //convert user preferences into a vector per user
    Job toUserVectors = prepareJob(getInputPath(), getOutputPath(USER_VECTORS), TextInputFormat.class,
            ToItemPrefsMapper.class, VarLongWritable.class,
            booleanData ? VarLongWritable.class : EntityPrefWritable.class, ToUserVectorsReducer.class,
            VarLongWritable.class, VectorWritable.class, SequenceFileOutputFormat.class);
    toUserVectors.getConfiguration().setBoolean(RecommenderJob.BOOLEAN_DATA, booleanData);
    toUserVectors.getConfiguration().setInt(ToUserVectorsReducer.MIN_PREFERENCES_PER_USER, minPrefsPerUser);
    toUserVectors.getConfiguration().set(ToEntityPrefsMapper.RATING_SHIFT, String.valueOf(ratingShift));
    succeeded = toUserVectors.waitForCompletion(true);
    if (!succeeded) {
        return -1;
    }
    //we need the number of users later
    int numberOfUsers = (int) toUserVectors.getCounters().findCounter(ToUserVectorsReducer.Counters.USERS)
            .getValue();
    HadoopUtil.writeInt(numberOfUsers, getOutputPath(NUM_USERS), getConf());
    //build the rating matrix
    Job toItemVectors = prepareJob(getOutputPath(USER_VECTORS), getOutputPath(RATING_MATRIX),
            ToItemVectorsMapper.class, IntWritable.class, VectorWritable.class, ToItemVectorsReducer.class,
            IntWritable.class, VectorWritable.class);
    toItemVectors.setCombinerClass(ToItemVectorsReducer.class);

    succeeded = toItemVectors.waitForCompletion(true);
    if (!succeeded) {
        return -1;
    }

    return 0;
}

From source file:org.hf.mls.mahout.cf.taste.hadoop.item.ToUserVectorsReducer.java

License:Apache License

@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
    super.cleanup(context);
    //we need the number of users later
    Configuration conf = context.getConfiguration();
    int numberOfUsers = (int) context.getCounter(Counters.USERS).getValue();
    HadoopUtil.writeInt(numberOfUsers, new Path(conf.get("NUM_USERS", "")), conf);
}