Example usage for org.apache.mahout.common Parameters get

List of usage examples for org.apache.mahout.common Parameters get

Introduction

In this page you can find the example usage for org.apache.mahout.common Parameters get.

Prototype

public String get(String key, String defaultValue) 

Source Link

Usage

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.AggregatorReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Parameters params = new Parameters(context.getConfiguration().get("pfp.parameters", ""));
    maxHeapSize = Integer.valueOf(params.get("maxHeapSize", "50"));

}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.FPGrowthDriver.java

License:Apache License

private static void runFPGrowth(Parameters params) throws IOException {
    log.info("Starting Sequential FPGrowth");
    int maxHeapSize = Integer.valueOf(params.get("maxHeapSize", "50"));
    int minSupport = Integer.valueOf(params.get("minSupport", "3"));

    Path output = new Path(params.get("output", "output.txt"));
    Path input = new Path(params.get("input"));

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(output.toUri(), conf);

    Charset encoding = Charset.forName(params.get("encoding"));

    String pattern = params.get("splitPattern", PFPGrowth.SPLITTER.toString());

    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, output, Text.class,
            TopKStringPatterns.class);

    FSDataInputStream inputStream = null;
    FSDataInputStream inputStreamAgain = null;

    Collection<String> features = Sets.newHashSet();

    if ("true".equals(params.get(PFPGrowth.USE_FPG2))) {
        com.cg.mapreduce.fpgrowth.mahout.fpm.fpgrowth2.FPGrowthObj<String> fp = new com.cg.mapreduce.fpgrowth.mahout.fpm.fpgrowth2.FPGrowthObj<String>();

        try {/*from  ww w .j  a va2 s.c o  m*/
            inputStream = fs.open(input);
            inputStreamAgain = fs.open(input);
            fp.generateTopKFrequentPatterns(
                    new StringRecordIterator(new FileLineIterable(inputStream, encoding, false), pattern),
                    fp.generateFList(new StringRecordIterator(
                            new FileLineIterable(inputStreamAgain, encoding, false), pattern), minSupport),
                    minSupport, maxHeapSize, features,
                    new StringOutputConverter(
                            new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
                    new ContextStatusUpdater(null));
        } finally {
            Closeables.close(writer, false);
            Closeables.close(inputStream, true);
            Closeables.close(inputStreamAgain, true);
        }
    } else {
        FPGrowth<String> fp = new FPGrowth<String>();

        inputStream = fs.open(input);
        inputStreamAgain = fs.open(input);
        try {
            fp.generateTopKFrequentPatterns(
                    new StringRecordIterator(new FileLineIterable(inputStream, encoding, false), pattern),
                    fp.generateFList(new StringRecordIterator(
                            new FileLineIterable(inputStreamAgain, encoding, false), pattern), minSupport),
                    minSupport, maxHeapSize, features,
                    new StringOutputConverter(
                            new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
                    new ContextStatusUpdater(null));
        } finally {
            Closeables.close(writer, false);
            Closeables.close(inputStream, true);
            Closeables.close(inputStreamAgain, true);
        }
    }

    List<Pair<String, TopKStringPatterns>> frequentPatterns = FPGrowth.readFrequentPattern(conf, output);
    for (Pair<String, TopKStringPatterns> entry : frequentPatterns) {
        log.info("Dumping Patterns for Feature: {} \n{}", entry.getFirst(), entry.getSecond());
    }
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.ParallelCountingMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);
    Parameters params = new Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, ""));
    splitter = Pattern.compile(params.get(PFPGrowth.SPLIT_PATTERN, PFPGrowth.SPLITTER.toString()));
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.ParallelFPGrowthMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);

    int i = 0;//from  w w  w .  j a v  a 2 s  . c  om
    for (Pair<String, Long> e : PFPGrowth.readFList(context.getConfiguration())) {
        fMap.put(e.getFirst(), i++);
    }

    Parameters params = new Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, ""));

    splitter = Pattern.compile(params.get(PFPGrowth.SPLIT_PATTERN, PFPGrowth.SPLITTER.toString()));

    maxPerGroup = params.getInt(PFPGrowth.MAX_PER_GROUP, 0);
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.ParallelFPGrowthReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {

    super.setup(context);
    Parameters params = new Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, ""));

    for (Pair<String, Long> e : PFPGrowth.readFList(context.getConfiguration())) {
        featureReverseMap.add(e.getFirst());
        freqList.add(e.getSecond());/*  w  ww  . ja  v a  2s. c o m*/
    }

    maxHeapSize = Integer.valueOf(params.get(PFPGrowth.MAX_HEAPSIZE, "50"));
    minSupport = Integer.valueOf(params.get(PFPGrowth.MIN_SUPPORT, "3"));

    maxPerGroup = params.getInt(PFPGrowth.MAX_PER_GROUP, 0);
    numFeatures = featureReverseMap.size();
    useFP2 = "true".equals(params.get(PFPGrowth.USE_FPG2));
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.PFPGrowth.java

License:Apache License

/**
 * read the feature frequency List which is built at the end of the Parallel counting job
 * /*  w  w w. j a v a 2 s  .  c  om*/
 * @return Feature Frequency List
 */
public static List<Pair<String, Long>> readFList(Parameters params) {
    int minSupport = Integer.valueOf(params.get(MIN_SUPPORT, "3"));
    Configuration conf = new Configuration();

    Path parallelCountingPath = new Path(params.get(OUTPUT), PARALLEL_COUNTING);

    PriorityQueue<Pair<String, Long>> queue = new PriorityQueue<Pair<String, Long>>(11,
            new Comparator<Pair<String, Long>>() {
                @Override
                public int compare(Pair<String, Long> o1, Pair<String, Long> o2) {
                    int ret = o2.getSecond().compareTo(o1.getSecond());
                    if (ret != 0) {
                        return ret;
                    }
                    return o1.getFirst().compareTo(o2.getFirst());
                }
            });

    for (Pair<Text, LongWritable> record : new SequenceFileDirIterable<Text, LongWritable>(
            new Path(parallelCountingPath, FILE_PATTERN), PathType.GLOB, null, null, true, conf)) {
        long value = record.getSecond().get();
        if (value >= minSupport) {
            queue.add(new Pair<String, Long>(record.getFirst().toString(), value));
        }
    }
    List<Pair<String, Long>> fList = Lists.newArrayList();
    while (!queue.isEmpty()) {
        fList.add(queue.poll());
    }
    return fList;
}

From source file:com.cg.mapreduce.myfpgrowth.ParallelFPGrowthMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);

    int i = 0;/*from  w ww . j  a va2s . c o  m*/
    for (Pair<String, Long> e : readFList(context.getConfiguration())) {
        fList.add(new TreeNode(e.getFirst(), e.getSecond().intValue()));
        fMap.put(e.getFirst(), i++);
    }

    Collections.sort(fList);

    Parameters params = new Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, ""));

    splitter = Pattern.compile(params.get(PFPGrowth.SPLIT_PATTERN, PFPGrowth.SPLITTER.toString()));

    maxPerGroup = params.getInt(PFPGrowth.MAX_PER_GROUP, 0);
}

From source file:com.cg.mapreduce.myfpgrowth.ParallelFPGrowthReducer.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {

    super.setup(context);
    Parameters params = new Parameters(context.getConfiguration().get(PFPGrowth.PFP_PARAMETERS, ""));
    minSupport = Integer.valueOf(params.get(PFPGrowth.MIN_SUPPORT, "3"));
    maxPerGroup = params.getInt(PFPGrowth.MAX_PER_GROUP, 0);

    for (Pair<String, Long> e : readFList(context.getConfiguration())) {
        fList.add(new TreeNode(e.getFirst(), e.getSecond().intValue()));
    }/*  w  ww  . j  av  a 2 s.  c  o  m*/
}

From source file:com.skp.experiment.fpm.pfpgrowth.FPGrowthDriver.java

License:Apache License

private static void runFPGrowth(Parameters params) throws IOException {
    log.info("Starting Sequential FPGrowth");
    int maxHeapSize = Integer.valueOf(params.get("maxHeapSize", "50"));
    int minSupport = Integer.valueOf(params.get("minSupport", "3"));

    String output = params.get("output", "output.txt");

    Path path = new Path(output);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(path.toUri(), conf);

    Charset encoding = Charset.forName(params.get("encoding"));
    String input = params.get("input");

    String pattern = params.get("splitPattern", PFPGrowth.SPLITTER.toString());

    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, TopKStringPatterns.class);

    if ("true".equals(params.get("useFPG2"))) {
        org.apache.mahout.fpm.pfpgrowth.fpgrowth2.FPGrowthObj<String> fp = new org.apache.mahout.fpm.pfpgrowth.fpgrowth2.FPGrowthObj<String>();
        Collection<String> features = new HashSet<String>();

        try {//from   w  ww. java  2s  .  co m
            fp.generateTopKFrequentPatterns(
                    new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
                    fp.generateFList(new StringRecordIterator(
                            new FileLineIterable(new File(input), encoding, false), pattern), minSupport),
                    minSupport, maxHeapSize, features,
                    new StringOutputConverter(
                            new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
                    new ContextStatusUpdater(null));
        } finally {
            Closeables.closeQuietly(writer);
        }
    } else {
        org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth<String> fp = new org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth<String>();
        Collection<String> features = new HashSet<String>();
        try {
            fp.generateTopKFrequentPatterns(
                    new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
                    fp.generateFList(new StringRecordIterator(
                            new FileLineIterable(new File(input), encoding, false), pattern), minSupport),
                    minSupport, maxHeapSize, features,
                    new StringOutputConverter(
                            new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
                    new ContextStatusUpdater(null));
        } finally {
            Closeables.closeQuietly(writer);
        }
    }

    List<Pair<String, TopKStringPatterns>> frequentPatterns = org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth
            .readFrequentPattern(conf, path);
    for (Pair<String, TopKStringPatterns> entry : frequentPatterns) {
        log.info("Dumping Patterns for Feature: {} \n{}", entry.getFirst(), entry.getSecond());
    }
}

From source file:it.polito.dbdmg.searum.itemsets.ParallelFPGrowthMapper.java

License:Apache License

@Override
protected void setup(Context context) throws IOException, InterruptedException {
    super.setup(context);

    int i = 0;/*from   ww w . j  ava  2 s  .co m*/
    for (Pair<String, Long> e : ARM.readFList(context.getConfiguration())) {
        fMap.put(e.getFirst(), i++);
    }

    Parameters params = new Parameters(context.getConfiguration().get(ARM.PFP_PARAMETERS, ""));

    splitter = Pattern.compile(params.get(ARM.SPLIT_PATTERN, ARM.SPLITTER.toString()));

    maxPerGroup = params.getInt(ARM.MAX_PER_GROUP, 0);
}