Example usage for org.apache.mahout.common Pair Pair

List of usage examples for org.apache.mahout.common Pair Pair

Introduction

In this page you can find the example usage for org.apache.mahout.common Pair Pair.

Prototype

public Pair(A first, B second) 

Source Link

Usage

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.convertors.integer.IntegerStringOutputConverter.java

License:Apache License

@Override
public void collect(Integer key, List<Pair<List<Integer>, Long>> value) throws IOException {
    String stringKey = featureReverseMap.get(key);
    List<Pair<List<String>, Long>> stringValues = Lists.newArrayList();
    for (Pair<List<Integer>, Long> e : value) {
        List<String> pattern = Lists.newArrayList();
        for (Integer i : e.getFirst()) {
            pattern.add(featureReverseMap.get(i));
        }//from  w  ww . j av  a  2  s .  com
        stringValues.add(new Pair<List<String>, Long>(pattern, e.getSecond()));
    }
    collector.collect(new Text(stringKey), new TopKStringPatterns(stringValues));
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.convertors.string.TopKStringPatterns.java

License:Apache License

@Override
public void readFields(DataInput in) throws IOException {
    frequentPatterns.clear();/*from  ww w  .j  a va 2 s .c om*/
    int length = in.readInt();
    for (int i = 0; i < length; i++) {
        List<String> items = Lists.newArrayList();
        int itemsetLength = in.readInt();
        long support = in.readLong();
        for (int j = 0; j < itemsetLength; j++) {
            items.add(in.readUTF());
        }
        frequentPatterns.add(new Pair<List<String>, Long>(items, support));
    }
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.convertors.TopKPatternsOutputConverter.java

License:Apache License

@Override
public void collect(Integer key, FrequentPatternMaxHeap value) throws IOException {
    List<Pair<List<A>, Long>> perAttributePatterns = Lists.newArrayList();
    PriorityQueue<Pattern> t = value.getHeap();
    while (!t.isEmpty()) {
        Pattern itemSet = t.poll();
        List<A> frequentPattern = Lists.newArrayList();

        for (int j = 0; j < itemSet.length(); j++) {
            frequentPattern.add(reverseMapping.get(itemSet.getPattern()[j]));
        }//from  w  w  w. ja  v a 2 s.  c  om
        Collections.sort(frequentPattern);

        Pair<List<A>, Long> returnItemSet = new Pair<List<A>, Long>(frequentPattern, itemSet.support());
        perAttributePatterns.add(returnItemSet);
    }
    Collections.reverse(perAttributePatterns);
    //??
    collector.collect(reverseMapping.get(key), perAttributePatterns);
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.convertors.TransactionIterator.java

License:Apache License

public TransactionIterator(Iterator<Pair<List<T>, Long>> transactions,
        final Map<T, Integer> attributeIdMapping) {
    transactionBuffer = new int[attributeIdMapping.size()];
    delegate = Iterators.transform(transactions, new Function<Pair<List<T>, Long>, Pair<int[], Long>>() {
        @Override//from  w  w  w. j  ava 2s . com
        public Pair<int[], Long> apply(Pair<List<T>, Long> from) {
            if (from == null) {
                return null;
            }
            int index = 0;
            for (T attribute : from.getFirst()) {
                if (attributeIdMapping.containsKey(attribute)) {
                    transactionBuffer[index++] = attributeIdMapping.get(attribute);
                }
            }
            int[] transactionList = new int[index];
            System.arraycopy(transactionBuffer, 0, transactionList, 0, index);
            return new Pair<int[], Long>(transactionList, from.getSecond());
        }
    });
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.fpgrowth.FPGrowth.java

License:Apache License

public static List<Pair<String, TopKStringPatterns>> readFrequentPattern(Configuration conf, Path path) {
    List<Pair<String, TopKStringPatterns>> ret = Lists.newArrayList();
    // key is feature value is count
    for (Pair<Writable, TopKStringPatterns> record : new SequenceFileIterable<Writable, TopKStringPatterns>(
            path, true, conf)) {/*w  w  w  .  java  2  s . com*/
        ret.add(new Pair<String, TopKStringPatterns>(record.getFirst().toString(),
                new TopKStringPatterns(record.getSecond().getPatterns())));
    }
    return ret;
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.fpgrowth.FPGrowth.java

License:Apache License

/**
 * Generate the Feature Frequency list from the given transaction whose
 * frequency > minSupport/*from   w w w.  jav a2 s . c  om*/
 *
 * @param transactions
 *          Iterator over the transaction database
 * @param minSupport
 *          minSupport of the feature to be included
 * @return the List of features and their associated frequency as a Pair
 */
public final List<Pair<A, Long>> generateFList(Iterator<Pair<List<A>, Long>> transactions, int minSupport) {

    Map<A, MutableLong> attributeSupport = Maps.newHashMap();
    while (transactions.hasNext()) {
        Pair<List<A>, Long> transaction = transactions.next();
        for (A attribute : transaction.getFirst()) {
            if (attributeSupport.containsKey(attribute)) {
                attributeSupport.get(attribute).add(transaction.getSecond().longValue());
            } else {
                attributeSupport.put(attribute, new MutableLong(transaction.getSecond()));
            }
        }
    }
    List<Pair<A, Long>> fList = Lists.newArrayList();
    for (Entry<A, MutableLong> e : attributeSupport.entrySet()) {
        long value = e.getValue().longValue();
        if (value >= minSupport) {
            fList.add(new Pair<A, Long>(e.getKey(), value));
        }
    }

    Collections.sort(fList, new CountDescendingPairComparator<A, Long>());

    return fList;
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.fpgrowth2.FPTree.java

License:Apache License

/**
 *  Return a pair of trees that result from separating a common prefix
 *  (if one exists) from the lower portion of this tree.
 *///w  ww  . ja v a  2s  .com
public Pair<FPTree, FPTree> splitSinglePrefix() {
    if (root.numChildren() != 1) {
        return new Pair<FPTree, FPTree>(null, this);
    }
    LongArrayList pAttrCountList = new LongArrayList();
    LongArrayList qAttrCountList = attrCountList.copy();

    FPNode currNode = root;
    while (currNode.numChildren() == 1) {
        currNode = currNode.children().iterator().next();
        if (pAttrCountList.size() <= currNode.attribute()) {
            pAttrCountList.setSize(currNode.attribute() + 1);
        }
        pAttrCountList.set(currNode.attribute(), currNode.count());
        qAttrCountList.set(currNode.attribute(), 0);
    }

    FPTree pTree = new FPTree(pAttrCountList, minSupport);
    FPTree qTree = new FPTree(qAttrCountList, minSupport);
    recursivelyAddPrefixPats(pTree, qTree, root, null);

    return new Pair<FPTree, FPTree>(pTree, qTree);
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.ParallelFPGrowthReducer.java

License:Apache License

@Override
protected void reduce(IntWritable key, Iterable<TransactionTree> values, Context context) throws IOException {
    TransactionTree cTree = new TransactionTree();
    for (TransactionTree tr : values) {
        for (Pair<IntArrayList, Long> p : tr) {
            cTree.addPattern(p.getFirst(), p.getSecond());
        }/*from ww w.j a  v a  2 s. co m*/
    }

    List<Pair<Integer, Long>> localFList = Lists.newArrayList();
    for (Entry<Integer, MutableLong> fItem : cTree.generateFList().entrySet()) {
        localFList.add(new Pair<Integer, Long>(fItem.getKey(), fItem.getValue().toLong()));
    }

    Collections.sort(localFList, new CountDescendingPairComparator<Integer, Long>());

    if (useFP2) {
        FPGrowthIds.generateTopKFrequentPatterns(cTree.iterator(), freqList, minSupport, maxHeapSize,
                PFPGrowth.getGroupMembers(key.get(), maxPerGroup, numFeatures),
                new IntegerStringOutputConverter(
                        new ContextWriteOutputCollector<IntWritable, TransactionTree, Text, TopKStringPatterns>(
                                context),
                        featureReverseMap),
                new ContextStatusUpdater<IntWritable, TransactionTree, Text, TopKStringPatterns>(context));
    } else {
        FPGrowth<Integer> fpGrowth = new FPGrowth<Integer>();
        fpGrowth.generateTopKFrequentPatterns(new IteratorAdapter(cTree.iterator()), localFList, minSupport,
                maxHeapSize,
                Sets.newHashSet(PFPGrowth.getGroupMembers(key.get(), maxPerGroup, numFeatures).toList()),
                new IntegerStringOutputConverter(
                        new ContextWriteOutputCollector<IntWritable, TransactionTree, Text, TopKStringPatterns>(
                                context),
                        featureReverseMap),
                new ContextStatusUpdater<IntWritable, TransactionTree, Text, TopKStringPatterns>(context));
    }
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.PFPGrowth.java

License:Apache License

/**
 * Generates the fList from the serialized string representation
 * /* w ww.j av a2 s .co m*/
 * @return Deserialized Feature Frequency List
 */
public static List<Pair<String, Long>> readFList(Configuration conf) throws IOException {
    List<Pair<String, Long>> list = Lists.newArrayList();

    Path[] files = HadoopUtil.getCachedFiles(conf);
    if (files.length != 1) {
        throw new IOException("Cannot read Frequency list from Distributed Cache (" + files.length + ')');
    }

    for (Pair<Text, LongWritable> record : new SequenceFileIterable<Text, LongWritable>(files[0], true, conf)) {
        list.add(new Pair<String, Long>(record.getFirst().toString(), record.getSecond().get()));
    }
    return list;
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.PFPGrowth.java

License:Apache License

/**
 * read the feature frequency List which is built at the end of the Parallel counting job
 * // w  ww.  j  a v  a2  s . c o m
 * @return Feature Frequency List
 */
public static List<Pair<String, Long>> readFList(Parameters params) {
    int minSupport = Integer.valueOf(params.get(MIN_SUPPORT, "3"));
    Configuration conf = new Configuration();

    Path parallelCountingPath = new Path(params.get(OUTPUT), PARALLEL_COUNTING);

    PriorityQueue<Pair<String, Long>> queue = new PriorityQueue<Pair<String, Long>>(11,
            new Comparator<Pair<String, Long>>() {
                @Override
                public int compare(Pair<String, Long> o1, Pair<String, Long> o2) {
                    int ret = o2.getSecond().compareTo(o1.getSecond());
                    if (ret != 0) {
                        return ret;
                    }
                    return o1.getFirst().compareTo(o2.getFirst());
                }
            });

    for (Pair<Text, LongWritable> record : new SequenceFileDirIterable<Text, LongWritable>(
            new Path(parallelCountingPath, FILE_PATTERN), PathType.GLOB, null, null, true, conf)) {
        long value = record.getSecond().get();
        if (value >= minSupport) {
            queue.add(new Pair<String, Long>(record.getFirst().toString(), value));
        }
    }
    List<Pair<String, Long>> fList = Lists.newArrayList();
    while (!queue.isEmpty()) {
        fList.add(queue.poll());
    }
    return fList;
}