Example usage for org.apache.mahout.common Parameters getInt

Introduction

In this page you can find the example usage for org.apache.mahout.common Parameters getInt.

Prototype

public int getInt(String key, int defaultValue)

Source Link

Usage

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.ParallelFPGrowthMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);

    int i = 0;/* w w  w .  j ava2s. c  om*/
    for (Pair<String, Long> e : PFPGrowth.readFList(context.getConfiguration())) {
        fMap.put(e.getFirst(), i++);
    }

    Parameters params = new Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, ""));

    splitter = Pattern.compile(params.get(PFPGrowth.SPLIT_PATTERN, PFPGrowth.SPLITTER.toString()));

    maxPerGroup = params.getInt(PFPGrowth.MAX_PER_GROUP, 0);
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.ParallelFPGrowthReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {

    super.setup(context);
    Parameters params = new Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, ""));

    for (Pair<String, Long> e : PFPGrowth.readFList(context.getConfiguration())) {
        featureReverseMap.add(e.getFirst());
        freqList.add(e.getSecond());//from   ww  w.  j  a  v a2 s .  co  m
    }

    maxHeapSize = Integer.valueOf(params.get(PFPGrowth.MAX_HEAPSIZE, "50"));
    minSupport = Integer.valueOf(params.get(PFPGrowth.MIN_SUPPORT, "3"));

    maxPerGroup = params.getInt(PFPGrowth.MAX_PER_GROUP, 0);
    numFeatures = featureReverseMap.size();
    useFP2 = "true".equals(params.get(PFPGrowth.USE_FPG2));
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.PFPGrowth.java

License:Apache License

/**
 * @throws ClassNotFoundException /*from   w  ww.ja  v  a  2s  .  co  m*/
* @throws InterruptedException 
* @throws IOException 
* @params
 *    input, output locations, additional parameters like minSupport(3), maxHeapSize(50), numGroups(1000)
 * @conf
 *    initial Hadoop configuration to use.
 * 
 * */
public static void runPFPGrowth(Parameters params, Configuration conf)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");
    startParallelCounting(params, conf);

    // save feature list to dcache
    List<Pair<String, Long>> fList = readFList(params);
    saveFList(fList, params, conf);

    // set param to control group size in MR jobs
    int numGroups = params.getInt(NUM_GROUPS, NUM_GROUPS_DEFAULT);
    int maxPerGroup = fList.size() / numGroups;
    if (fList.size() % numGroups != 0) {
        maxPerGroup++;
    }
    params.set(MAX_PER_GROUP, Integer.toString(maxPerGroup));

    startParallelFPGrowth(params, conf);
    //startAggregating(params, conf);
}

From source file:com.cg.mapreduce.myfpgrowth.ParallelFPGrowthMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);

    int i = 0;/* w ww  .j av  a2 s  . c om*/
    for (Pair<String, Long> e : readFList(context.getConfiguration())) {
        fList.add(new TreeNode(e.getFirst(), e.getSecond().intValue()));
        fMap.put(e.getFirst(), i++);
    }

    Collections.sort(fList);

    Parameters params = new Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, ""));

    splitter = Pattern.compile(params.get(PFPGrowth.SPLIT_PATTERN, PFPGrowth.SPLITTER.toString()));

    maxPerGroup = params.getInt(PFPGrowth.MAX_PER_GROUP, 0);
}

From source file:com.cg.mapreduce.myfpgrowth.ParallelFPGrowthReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {

    super.setup(context);
    Parameters params = new Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, ""));
    minSupport = Integer.valueOf(params.get(PFPGrowth.MIN_SUPPORT, "3"));
    maxPerGroup = params.getInt(PFPGrowth.MAX_PER_GROUP, 0);

    for (Pair<String, Long> e : readFList(context.getConfiguration())) {
        fList.add(new TreeNode(e.getFirst(), e.getSecond().intValue()));
    }// ww  w.ja v  a  2  s .c  om
}

From source file:com.cg.mapreduce.myfpgrowth.PFPGrowth.java

License:Apache License

/**
 * Serializes the fList and returns the string representation of the List
 */// www  . j a  va 2  s  . c om
public static void saveFList(List<Pair<String, Long>> fList, Parameters params, Configuration conf)
        throws IOException {
    Path flistPath = new Path(params.get(OUTPUT) + "/oldlist", F_LIST);
    FileSystem fs = FileSystem.get(flistPath.toUri(), conf);
    flistPath = fs.makeQualified(flistPath);
    HadoopUtil.delete(conf, flistPath);
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, flistPath, Text.class, Pair.class);

    // set param to control group size in MR jobs
    int numGroups = params.getInt(NUM_GROUPS, NUM_GROUPS_DEFAULT);
    int maxPerGroup = fList.size() / numGroups;
    if (fList.size() % numGroups != 0) {
        maxPerGroup++;
    }
    params.set(MAX_PER_GROUP, Integer.toString(maxPerGroup));

    try {
        int group = 0;
        int count = 0;
        for (Pair<String, Long> pair : fList) {
            if (count == maxPerGroup) {
                group++;
                count = 0;
            }
            writer.append(new Text(pair.getFirst()), new Pair<Integer, Long>(group, pair.getSecond()));
            //writer.append(new Text(pair.getFirst()), new LongWritable(pair.getSecond()));
        }
    } finally {
        writer.close();
    }
    DistributedCache.addCacheFile(flistPath.toUri(), conf);
}

From source file:com.skp.experiment.fpm.pfpgrowth.PFPGrowth.java

License:Apache License

/**
 * //www . j a v  a  2  s . co m
 * @param params
 *          params should contain input and output locations as a string value, the additional parameters
 *          include minSupport(3), maxHeapSize(50), numGroups(1000)
 */
public static void runPFPGrowth(Configuration conf, Parameters params)
        throws IOException, InterruptedException, ClassNotFoundException {
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");
    startParallelCounting(params, conf);

    // save feature list to dcache
    List<Pair<String, Long>> fList = readFList(params);
    saveFList(fList, params, conf);

    // set param to control group size in MR jobs
    int numGroups = params.getInt(PFPGrowth.NUM_GROUPS, PFPGrowth.NUM_GROUPS_DEFAULT);
    int maxPerGroup = fList.size() / numGroups;
    if (fList.size() % numGroups != 0)
        maxPerGroup++;
    params.set(MAX_PER_GROUP, Integer.toString(maxPerGroup));
    fList = null;

    startParallelFPGrowth(params, conf);
    startAggregating(params, conf);
}

From source file:it.polito.dbdmg.searum.ARM.java

License:Apache License

/**
 * Execute the chain of MapReduce jobs.// w w  w  .  ja v a  2  s . c  om
 * 
 * @param params
 *            params contains input and output locations as a string value,
 *            the additional parameters include discretize flag, minSupport
 *            and minConfidence
 */
public static void runPFPGrowth(Parameters params)
        throws IOException, InterruptedException, ClassNotFoundException {
    Configuration conf = new Configuration();
    conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization,"
            + "org.apache.hadoop.io.serializer.WritableSerialization");
    Integer enableDiscretization = new Integer(params.get(ENABLE_DISCRETIZATION));
    Integer enableRules = new Integer(params.get(ENABLE_RULES));

    if (enableDiscretization.compareTo(new Integer(1)) == 0) {
        startDiscretization(params, conf);
    }

    startParallelCounting(params, conf);
    List<Pair<String, Long>> headerTable = readFList(params);
    saveFList(headerTable, params, conf);

    int numGroups = params.getInt(NUM_GROUPS, NUM_GROUPS_DEFAULT);
    int maxPerGroup = headerTable.size() / numGroups;
    if (headerTable.size() % numGroups != 0) {
        maxPerGroup++;
    }
    params.set(MAX_PER_GROUP, Integer.toString(maxPerGroup));

    startParallelFPGrowth(params, conf);
    startClosedSorting(params, conf);
    startExpandClosed(params, conf);
    startItemsetSorting(params, conf);

    if (enableRules.compareTo(new Integer(1)) == 0) {
        startRuleMining(params, conf);
        startRuleAggregating(params, conf);
    }
}

From source file:it.polito.dbdmg.searum.itemsets.ParallelFPGrowthMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);

    int i = 0;//from   ww w. jav a 2  s .  co m
    for (Pair<String, Long> e : ARM.readFList(context.getConfiguration())) {
        fMap.put(e.getFirst(), i++);
    }

    Parameters params = new Parameters(context.getConfiguration().get(ARM.PFP_PARAMETERS, ""));

    splitter = Pattern.compile(params.get(ARM.SPLIT_PATTERN, ARM.SPLITTER.toString()));

    maxPerGroup = params.getInt(ARM.MAX_PER_GROUP, 0);
}

From source file:it.polito.dbdmg.searum.itemsets.ParallelFPGrowthReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {

    super.setup(context);
    Parameters params = new Parameters(context.getConfiguration().get(ARM.PFP_PARAMETERS, ""));

    for (Pair<String, Long> e : ARM.readFList(context.getConfiguration())) {
        if (!e.equals("dataset")) {
            featureReverseMap.add(e.getFirst());
            freqList.add(e.getSecond());
        }//  w w w. j a va2  s.c om
    }

    maxHeapSize = Integer.valueOf(params.get(ARM.MAX_HEAPSIZE, "50"));
    minSupport = Integer.valueOf(params.get(ARM.MIN_SUPPORT, "5"));
    log.info("Support count: " + minSupport);
    maxPerGroup = params.getInt(ARM.MAX_PER_GROUP, 0);
    numFeatures = featureReverseMap.size();
}