List of usage examples for org.apache.mahout.common Parameters get
public String get(String key, String defaultValue)
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.AggregatorReducer.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Parameters params = new Parameters(context.getConfiguration().get("pfp.parameters", "")); maxHeapSize = Integer.valueOf(params.get("maxHeapSize", "50")); }
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.FPGrowthDriver.java
License:Apache License
private static void runFPGrowth(Parameters params) throws IOException { log.info("Starting Sequential FPGrowth"); int maxHeapSize = Integer.valueOf(params.get("maxHeapSize", "50")); int minSupport = Integer.valueOf(params.get("minSupport", "3")); Path output = new Path(params.get("output", "output.txt")); Path input = new Path(params.get("input")); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(output.toUri(), conf); Charset encoding = Charset.forName(params.get("encoding")); String pattern = params.get("splitPattern", PFPGrowth.SPLITTER.toString()); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, output, Text.class, TopKStringPatterns.class); FSDataInputStream inputStream = null; FSDataInputStream inputStreamAgain = null; Collection<String> features = Sets.newHashSet(); if ("true".equals(params.get(PFPGrowth.USE_FPG2))) { com.cg.mapreduce.fpgrowth.mahout.fpm.fpgrowth2.FPGrowthObj<String> fp = new com.cg.mapreduce.fpgrowth.mahout.fpm.fpgrowth2.FPGrowthObj<String>(); try {/*from ww w .j a va2 s.c o m*/ inputStream = fs.open(input); inputStreamAgain = fs.open(input); fp.generateTopKFrequentPatterns( new StringRecordIterator(new FileLineIterable(inputStream, encoding, false), pattern), fp.generateFList(new StringRecordIterator( new FileLineIterable(inputStreamAgain, encoding, false), pattern), minSupport), minSupport, maxHeapSize, features, new StringOutputConverter( new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)), new ContextStatusUpdater(null)); } finally { Closeables.close(writer, false); Closeables.close(inputStream, true); Closeables.close(inputStreamAgain, true); } } else { FPGrowth<String> fp = new FPGrowth<String>(); inputStream = fs.open(input); inputStreamAgain = fs.open(input); try { fp.generateTopKFrequentPatterns( new StringRecordIterator(new FileLineIterable(inputStream, encoding, false), pattern), fp.generateFList(new StringRecordIterator( new FileLineIterable(inputStreamAgain, encoding, false), pattern), minSupport), minSupport, maxHeapSize, features, new StringOutputConverter( new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)), new ContextStatusUpdater(null)); } finally { Closeables.close(writer, false); Closeables.close(inputStream, true); Closeables.close(inputStreamAgain, true); } } List<Pair<String, TopKStringPatterns>> frequentPatterns = FPGrowth.readFrequentPattern(conf, output); for (Pair<String, TopKStringPatterns> entry : frequentPatterns) { log.info("Dumping Patterns for Feature: {} \n{}", entry.getFirst(), entry.getSecond()); } }
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.ParallelCountingMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Parameters params = new Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, "")); splitter = Pattern.compile(params.get(PFPGrowth.SPLIT_PATTERN, PFPGrowth.SPLITTER.toString())); }
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.ParallelFPGrowthMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); int i = 0;//from w w w . j a v a 2 s . c om for (Pair<String, Long> e : PFPGrowth.readFList(context.getConfiguration())) { fMap.put(e.getFirst(), i++); } Parameters params = new Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, "")); splitter = Pattern.compile(params.get(PFPGrowth.SPLIT_PATTERN, PFPGrowth.SPLITTER.toString())); maxPerGroup = params.getInt(PFPGrowth.MAX_PER_GROUP, 0); }
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.ParallelFPGrowthReducer.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Parameters params = new Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, "")); for (Pair<String, Long> e : PFPGrowth.readFList(context.getConfiguration())) { featureReverseMap.add(e.getFirst()); freqList.add(e.getSecond());/* w ww . ja v a 2s. c o m*/ } maxHeapSize = Integer.valueOf(params.get(PFPGrowth.MAX_HEAPSIZE, "50")); minSupport = Integer.valueOf(params.get(PFPGrowth.MIN_SUPPORT, "3")); maxPerGroup = params.getInt(PFPGrowth.MAX_PER_GROUP, 0); numFeatures = featureReverseMap.size(); useFP2 = "true".equals(params.get(PFPGrowth.USE_FPG2)); }
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.PFPGrowth.java
License:Apache License
/** * read the feature frequency List which is built at the end of the Parallel counting job * /* w w w. j a v a 2 s . c om*/ * @return Feature Frequency List */ public static List<Pair<String, Long>> readFList(Parameters params) { int minSupport = Integer.valueOf(params.get(MIN_SUPPORT, "3")); Configuration conf = new Configuration(); Path parallelCountingPath = new Path(params.get(OUTPUT), PARALLEL_COUNTING); PriorityQueue<Pair<String, Long>> queue = new PriorityQueue<Pair<String, Long>>(11, new Comparator<Pair<String, Long>>() { @Override public int compare(Pair<String, Long> o1, Pair<String, Long> o2) { int ret = o2.getSecond().compareTo(o1.getSecond()); if (ret != 0) { return ret; } return o1.getFirst().compareTo(o2.getFirst()); } }); for (Pair<Text, LongWritable> record : new SequenceFileDirIterable<Text, LongWritable>( new Path(parallelCountingPath, FILE_PATTERN), PathType.GLOB, null, null, true, conf)) { long value = record.getSecond().get(); if (value >= minSupport) { queue.add(new Pair<String, Long>(record.getFirst().toString(), value)); } } List<Pair<String, Long>> fList = Lists.newArrayList(); while (!queue.isEmpty()) { fList.add(queue.poll()); } return fList; }
From source file:com.cg.mapreduce.myfpgrowth.ParallelFPGrowthMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); int i = 0;/*from w ww . j a va2s . c o m*/ for (Pair<String, Long> e : readFList(context.getConfiguration())) { fList.add(new TreeNode(e.getFirst(), e.getSecond().intValue())); fMap.put(e.getFirst(), i++); } Collections.sort(fList); Parameters params = new Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, "")); splitter = Pattern.compile(params.get(PFPGrowth.SPLIT_PATTERN, PFPGrowth.SPLITTER.toString())); maxPerGroup = params.getInt(PFPGrowth.MAX_PER_GROUP, 0); }
From source file:com.cg.mapreduce.myfpgrowth.ParallelFPGrowthReducer.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); Parameters params = new Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, "")); minSupport = Integer.valueOf(params.get(PFPGrowth.MIN_SUPPORT, "3")); maxPerGroup = params.getInt(PFPGrowth.MAX_PER_GROUP, 0); for (Pair<String, Long> e : readFList(context.getConfiguration())) { fList.add(new TreeNode(e.getFirst(), e.getSecond().intValue())); }/* w ww . j av a 2 s. c o m*/ }
From source file:com.skp.experiment.fpm.pfpgrowth.FPGrowthDriver.java
License:Apache License
private static void runFPGrowth(Parameters params) throws IOException { log.info("Starting Sequential FPGrowth"); int maxHeapSize = Integer.valueOf(params.get("maxHeapSize", "50")); int minSupport = Integer.valueOf(params.get("minSupport", "3")); String output = params.get("output", "output.txt"); Path path = new Path(output); Configuration conf = new Configuration(); FileSystem fs = FileSystem.get(path.toUri(), conf); Charset encoding = Charset.forName(params.get("encoding")); String input = params.get("input"); String pattern = params.get("splitPattern", PFPGrowth.SPLITTER.toString()); SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, TopKStringPatterns.class); if ("true".equals(params.get("useFPG2"))) { org.apache.mahout.fpm.pfpgrowth.fpgrowth2.FPGrowthObj<String> fp = new org.apache.mahout.fpm.pfpgrowth.fpgrowth2.FPGrowthObj<String>(); Collection<String> features = new HashSet<String>(); try {//from w ww. java 2s . co m fp.generateTopKFrequentPatterns( new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern), fp.generateFList(new StringRecordIterator( new FileLineIterable(new File(input), encoding, false), pattern), minSupport), minSupport, maxHeapSize, features, new StringOutputConverter( new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)), new ContextStatusUpdater(null)); } finally { Closeables.closeQuietly(writer); } } else { org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth<String> fp = new org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth<String>(); Collection<String> features = new HashSet<String>(); try { fp.generateTopKFrequentPatterns( new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern), fp.generateFList(new StringRecordIterator( new FileLineIterable(new File(input), encoding, false), pattern), minSupport), minSupport, maxHeapSize, features, new StringOutputConverter( new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)), new ContextStatusUpdater(null)); } finally { Closeables.closeQuietly(writer); } } List<Pair<String, TopKStringPatterns>> frequentPatterns = org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth .readFrequentPattern(conf, path); for (Pair<String, TopKStringPatterns> entry : frequentPatterns) { log.info("Dumping Patterns for Feature: {} \n{}", entry.getFirst(), entry.getSecond()); } }
From source file:it.polito.dbdmg.searum.itemsets.ParallelFPGrowthMapper.java
License:Apache License
@Override protected void setup(Context context) throws IOException, InterruptedException { super.setup(context); int i = 0;/*from ww w . j ava 2 s .co m*/ for (Pair<String, Long> e : ARM.readFList(context.getConfiguration())) { fMap.put(e.getFirst(), i++); } Parameters params = new Parameters(context.getConfiguration().get(ARM.PFP_PARAMETERS, "")); splitter = Pattern.compile(params.get(ARM.SPLIT_PATTERN, ARM.SPLITTER.toString())); maxPerGroup = params.getInt(ARM.MAX_PER_GROUP, 0); }