List of usage examples for org.apache.mahout.common Parameters getInt
public int getInt(String key, int defaultValue)
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.ParallelFPGrowthMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); int i = 0;/* w w w . j ava2s. c om*/ for (Pair<String, Long> e : PFPGrowth.readFList(context.getConfiguration())) { fMap.put(e.getFirst(), i++); } Parameters params = new Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, "")); splitter = Pattern.compile(params.get(PFPGrowth.SPLIT_PATTERN, PFPGrowth.SPLITTER.toString())); maxPerGroup = params.getInt(PFPGrowth.MAX_PER_GROUP, 0); }
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.ParallelFPGrowthReducer.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Parameters params = new Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, "")); for (Pair<String, Long> e : PFPGrowth.readFList(context.getConfiguration())) { featureReverseMap.add(e.getFirst()); freqList.add(e.getSecond());//from ww w. j a v a2 s . co m } maxHeapSize = Integer.valueOf(params.get(PFPGrowth.MAX_HEAPSIZE, "50")); minSupport = Integer.valueOf(params.get(PFPGrowth.MIN_SUPPORT, "3")); maxPerGroup = params.getInt(PFPGrowth.MAX_PER_GROUP, 0); numFeatures = featureReverseMap.size(); useFP2 = "true".equals(params.get(PFPGrowth.USE_FPG2)); }
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.PFPGrowth.java
License:Apache License
/** * @throws ClassNotFoundException /*from w ww.ja v a 2s . co m*/ * @throws InterruptedException * @throws IOException * @params * input, output locations, additional parameters like minSupport(3), maxHeapSize(50), numGroups(1000) * @conf * initial Hadoop configuration to use. * * */ public static void runPFPGrowth(Parameters params, Configuration conf) throws IOException, InterruptedException, ClassNotFoundException { conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); startParallelCounting(params, conf); // save feature list to dcache List<Pair<String, Long>> fList = readFList(params); saveFList(fList, params, conf); // set param to control group size in MR jobs int numGroups = params.getInt(NUM_GROUPS, NUM_GROUPS_DEFAULT); int maxPerGroup = fList.size() / numGroups; if (fList.size() % numGroups != 0) { maxPerGroup++; } params.set(MAX_PER_GROUP, Integer.toString(maxPerGroup)); startParallelFPGrowth(params, conf); //startAggregating(params, conf); }
From source file:com.cg.mapreduce.myfpgrowth.ParallelFPGrowthMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); int i = 0;/* w ww .j av a2 s . c om*/ for (Pair<String, Long> e : readFList(context.getConfiguration())) { fList.add(new TreeNode(e.getFirst(), e.getSecond().intValue())); fMap.put(e.getFirst(), i++); } Collections.sort(fList); Parameters params = new Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, "")); splitter = Pattern.compile(params.get(PFPGrowth.SPLIT_PATTERN, PFPGrowth.SPLITTER.toString())); maxPerGroup = params.getInt(PFPGrowth.MAX_PER_GROUP, 0); }
From source file:com.cg.mapreduce.myfpgrowth.ParallelFPGrowthReducer.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Parameters params = new Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, "")); minSupport = Integer.valueOf(params.get(PFPGrowth.MIN_SUPPORT, "3")); maxPerGroup = params.getInt(PFPGrowth.MAX_PER_GROUP, 0); for (Pair<String, Long> e : readFList(context.getConfiguration())) { fList.add(new TreeNode(e.getFirst(), e.getSecond().intValue())); }// ww w.ja v a 2 s .c om }
From source file:com.cg.mapreduce.myfpgrowth.PFPGrowth.java
License:Apache License
/** * Serializes the fList and returns the string representation of the List */// www . j a va 2 s . c om public static void saveFList(List<Pair<String, Long>> fList, Parameters params, Configuration conf) throws IOException { Path flistPath = new Path(params.get(OUTPUT) + "/oldlist", F_LIST); FileSystem fs = FileSystem.get(flistPath.toUri(), conf); flistPath = fs.makeQualified(flistPath); HadoopUtil.delete(conf, flistPath); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, flistPath, Text.class, Pair.class); // set param to control group size in MR jobs int numGroups = params.getInt(NUM_GROUPS, NUM_GROUPS_DEFAULT); int maxPerGroup = fList.size() / numGroups; if (fList.size() % numGroups != 0) { maxPerGroup++; } params.set(MAX_PER_GROUP, Integer.toString(maxPerGroup)); try { int group = 0; int count = 0; for (Pair<String, Long> pair : fList) { if (count == maxPerGroup) { group++; count = 0; } writer.append(new Text(pair.getFirst()), new Pair<Integer, Long>(group, pair.getSecond())); //writer.append(new Text(pair.getFirst()), new LongWritable(pair.getSecond())); } } finally { writer.close(); } DistributedCache.addCacheFile(flistPath.toUri(), conf); }
From source file:com.skp.experiment.fpm.pfpgrowth.PFPGrowth.java
License:Apache License
/** * //www . j a v a 2 s . co m * @param params * params should contain input and output locations as a string value, the additional parameters * include minSupport(3), maxHeapSize(50), numGroups(1000) */ public static void runPFPGrowth(Configuration conf, Parameters params) throws IOException, InterruptedException, ClassNotFoundException { conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); startParallelCounting(params, conf); // save feature list to dcache List<Pair<String, Long>> fList = readFList(params); saveFList(fList, params, conf); // set param to control group size in MR jobs int numGroups = params.getInt(PFPGrowth.NUM_GROUPS, PFPGrowth.NUM_GROUPS_DEFAULT); int maxPerGroup = fList.size() / numGroups; if (fList.size() % numGroups != 0) maxPerGroup++; params.set(MAX_PER_GROUP, Integer.toString(maxPerGroup)); fList = null; startParallelFPGrowth(params, conf); startAggregating(params, conf); }
From source file:it.polito.dbdmg.searum.ARM.java
License:Apache License
/** * Execute the chain of MapReduce jobs.// w w w . ja v a 2 s . c om * * @param params * params contains input and output locations as a string value, * the additional parameters include discretize flag, minSupport * and minConfidence */ public static void runPFPGrowth(Parameters params) throws IOException, InterruptedException, ClassNotFoundException { Configuration conf = new Configuration(); conf.set("io.serializations", "org.apache.hadoop.io.serializer.JavaSerialization," + "org.apache.hadoop.io.serializer.WritableSerialization"); Integer enableDiscretization = new Integer(params.get(ENABLE_DISCRETIZATION)); Integer enableRules = new Integer(params.get(ENABLE_RULES)); if (enableDiscretization.compareTo(new Integer(1)) == 0) { startDiscretization(params, conf); } startParallelCounting(params, conf); List<Pair<String, Long>> headerTable = readFList(params); saveFList(headerTable, params, conf); int numGroups = params.getInt(NUM_GROUPS, NUM_GROUPS_DEFAULT); int maxPerGroup = headerTable.size() / numGroups; if (headerTable.size() % numGroups != 0) { maxPerGroup++; } params.set(MAX_PER_GROUP, Integer.toString(maxPerGroup)); startParallelFPGrowth(params, conf); startClosedSorting(params, conf); startExpandClosed(params, conf); startItemsetSorting(params, conf); if (enableRules.compareTo(new Integer(1)) == 0) { startRuleMining(params, conf); startRuleAggregating(params, conf); } }
From source file:it.polito.dbdmg.searum.itemsets.ParallelFPGrowthMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); int i = 0;//from ww w. jav a 2 s . co m for (Pair<String, Long> e : ARM.readFList(context.getConfiguration())) { fMap.put(e.getFirst(), i++); } Parameters params = new Parameters(context.getConfiguration().get(ARM.PFP_PARAMETERS, "")); splitter = Pattern.compile(params.get(ARM.SPLIT_PATTERN, ARM.SPLITTER.toString())); maxPerGroup = params.getInt(ARM.MAX_PER_GROUP, 0); }
From source file:it.polito.dbdmg.searum.itemsets.ParallelFPGrowthReducer.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Parameters params = new Parameters(context.getConfiguration().get(ARM.PFP_PARAMETERS, "")); for (Pair<String, Long> e : ARM.readFList(context.getConfiguration())) { if (!e.equals("dataset")) { featureReverseMap.add(e.getFirst()); freqList.add(e.getSecond()); }// w w w. j a va2 s.c om } maxHeapSize = Integer.valueOf(params.get(ARM.MAX_HEAPSIZE, "50")); minSupport = Integer.valueOf(params.get(ARM.MIN_SUPPORT, "5")); log.info("Support count: " + minSupport); maxPerGroup = params.getInt(ARM.MAX_PER_GROUP, 0); numFeatures = featureReverseMap.size(); }