Example usage for org.apache.mahout.common.iterator FileLineIterable FileLineIterable

List of usage examples for org.apache.mahout.common.iterator FileLineIterable FileLineIterable

Introduction

In this page you can find the example usage for org.apache.mahout.common.iterator FileLineIterable FileLineIterable.

Prototype

public FileLineIterable(InputStream is, Charset encoding, boolean skipFirstLine) 

Source Link

Usage

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.FPGrowthDriver.java

License:Apache License

private static void runFPGrowth(Parameters params) throws IOException {
    log.info("Starting Sequential FPGrowth");
    int maxHeapSize = Integer.valueOf(params.get("maxHeapSize", "50"));
    int minSupport = Integer.valueOf(params.get("minSupport", "3"));

    Path output = new Path(params.get("output", "output.txt"));
    Path input = new Path(params.get("input"));

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(output.toUri(), conf);

    Charset encoding = Charset.forName(params.get("encoding"));

    String pattern = params.get("splitPattern", PFPGrowth.SPLITTER.toString());

    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, output, Text.class,
            TopKStringPatterns.class);

    FSDataInputStream inputStream = null;
    FSDataInputStream inputStreamAgain = null;

    Collection<String> features = Sets.newHashSet();

    if ("true".equals(params.get(PFPGrowth.USE_FPG2))) {
        com.cg.mapreduce.fpgrowth.mahout.fpm.fpgrowth2.FPGrowthObj<String> fp = new com.cg.mapreduce.fpgrowth.mahout.fpm.fpgrowth2.FPGrowthObj<String>();

        try {//w ww .  j a  v a2 s  .c o  m
            inputStream = fs.open(input);
            inputStreamAgain = fs.open(input);
            fp.generateTopKFrequentPatterns(
                    new StringRecordIterator(new FileLineIterable(inputStream, encoding, false), pattern),
                    fp.generateFList(new StringRecordIterator(
                            new FileLineIterable(inputStreamAgain, encoding, false), pattern), minSupport),
                    minSupport, maxHeapSize, features,
                    new StringOutputConverter(
                            new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
                    new ContextStatusUpdater(null));
        } finally {
            Closeables.close(writer, false);
            Closeables.close(inputStream, true);
            Closeables.close(inputStreamAgain, true);
        }
    } else {
        FPGrowth<String> fp = new FPGrowth<String>();

        inputStream = fs.open(input);
        inputStreamAgain = fs.open(input);
        try {
            fp.generateTopKFrequentPatterns(
                    new StringRecordIterator(new FileLineIterable(inputStream, encoding, false), pattern),
                    fp.generateFList(new StringRecordIterator(
                            new FileLineIterable(inputStreamAgain, encoding, false), pattern), minSupport),
                    minSupport, maxHeapSize, features,
                    new StringOutputConverter(
                            new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
                    new ContextStatusUpdater(null));
        } finally {
            Closeables.close(writer, false);
            Closeables.close(inputStream, true);
            Closeables.close(inputStreamAgain, true);
        }
    }

    List<Pair<String, TopKStringPatterns>> frequentPatterns = FPGrowth.readFrequentPattern(conf, output);
    for (Pair<String, TopKStringPatterns> entry : frequentPatterns) {
        log.info("Dumping Patterns for Feature: {} \n{}", entry.getFirst(), entry.getSecond());
    }
}

From source file:com.skp.experiment.fpm.pfpgrowth.FPGrowthDriver.java

License:Apache License

private static void runFPGrowth(Parameters params) throws IOException {
    log.info("Starting Sequential FPGrowth");
    int maxHeapSize = Integer.valueOf(params.get("maxHeapSize", "50"));
    int minSupport = Integer.valueOf(params.get("minSupport", "3"));

    String output = params.get("output", "output.txt");

    Path path = new Path(output);
    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(path.toUri(), conf);

    Charset encoding = Charset.forName(params.get("encoding"));
    String input = params.get("input");

    String pattern = params.get("splitPattern", PFPGrowth.SPLITTER.toString());

    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, TopKStringPatterns.class);

    if ("true".equals(params.get("useFPG2"))) {
        org.apache.mahout.fpm.pfpgrowth.fpgrowth2.FPGrowthObj<String> fp = new org.apache.mahout.fpm.pfpgrowth.fpgrowth2.FPGrowthObj<String>();
        Collection<String> features = new HashSet<String>();

        try {/*from   w w  w  .  ja  v a  2 s.com*/
            fp.generateTopKFrequentPatterns(
                    new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
                    fp.generateFList(new StringRecordIterator(
                            new FileLineIterable(new File(input), encoding, false), pattern), minSupport),
                    minSupport, maxHeapSize, features,
                    new StringOutputConverter(
                            new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
                    new ContextStatusUpdater(null));
        } finally {
            Closeables.closeQuietly(writer);
        }
    } else {
        org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth<String> fp = new org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth<String>();
        Collection<String> features = new HashSet<String>();
        try {
            fp.generateTopKFrequentPatterns(
                    new StringRecordIterator(new FileLineIterable(new File(input), encoding, false), pattern),
                    fp.generateFList(new StringRecordIterator(
                            new FileLineIterable(new File(input), encoding, false), pattern), minSupport),
                    minSupport, maxHeapSize, features,
                    new StringOutputConverter(
                            new SequenceFileOutputCollector<Text, TopKStringPatterns>(writer)),
                    new ContextStatusUpdater(null));
        } finally {
            Closeables.closeQuietly(writer);
        }
    }

    List<Pair<String, TopKStringPatterns>> frequentPatterns = org.apache.mahout.fpm.pfpgrowth.fpgrowth.FPGrowth
            .readFrequentPattern(conf, path);
    for (Pair<String, TopKStringPatterns> entry : frequentPatterns) {
        log.info("Dumping Patterns for Feature: {} \n{}", entry.getFirst(), entry.getSecond());
    }
}