Example usage for org.apache.mahout.common Pair getSecond

List of usage examples for org.apache.mahout.common Pair getSecond

Introduction

In this page you can find the example usage for org.apache.mahout.common Pair getSecond.

Prototype

public B getSecond() 

Source Link

Usage

From source file:ClassifierHD.java

License:Apache License

public static Map<String, Integer> readDictionnary(Configuration conf, Path dictionnaryPath) {
    Map<String, Integer> dictionnary = new HashMap<String, Integer>();
    for (Pair<Text, IntWritable> pair : new SequenceFileIterable<Text, IntWritable>(dictionnaryPath, true,
            conf)) {//from  w  ww . j a v a2s .  co  m
        dictionnary.put(pair.getFirst().toString(), pair.getSecond().get());
    }
    return dictionnary;
}

From source file:ClassifierHD.java

License:Apache License

public static Map<Integer, Long> readDocumentFrequency(Configuration conf, Path documentFrequencyPath) {
    Map<Integer, Long> documentFrequency = new HashMap<Integer, Long>();
    for (Pair<IntWritable, LongWritable> pair : new SequenceFileIterable<IntWritable, LongWritable>(
            documentFrequencyPath, true, conf)) {
        documentFrequency.put(pair.getFirst().get(), pair.getSecond().get());
    }//from ww w. j a  v a 2s  .  c om
    return documentFrequency;
}

From source file:ac.keio.sslab.nlp.lda.RowIdJob.java

License:Apache License

@SuppressWarnings("deprecation")
@Override//from w  w w  .ja v  a  2 s.com
public int run(String[] args) throws Exception {

    addInputOption();
    addOutputOption();

    Map<String, List<String>> parsedArgs = parseArguments(args);
    if (parsedArgs == null) {
        return -1;
    }

    Configuration conf = getConf();
    FileSystem fs = FileSystem.get(conf);

    Path outputPath = getOutputPath();
    Path indexPath = new Path(outputPath, "docIndex");
    Path matrixPath = new Path(outputPath, "matrix");

    try (SequenceFile.Writer indexWriter = SequenceFile.createWriter(fs, conf, indexPath, IntWritable.class,
            Text.class);
            SequenceFile.Writer matrixWriter = SequenceFile.createWriter(fs, conf, matrixPath,
                    IntWritable.class, VectorWritable.class)) {
        IntWritable docId = new IntWritable();
        int i = 0;
        int numCols = 0;
        for (Pair<Text, VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>(
                getInputPath(), PathType.LIST, PathFilters.logsCRCFilter(), null, true, conf)) {
            VectorWritable value = record.getSecond();
            docId.set(i);
            indexWriter.append(docId, record.getFirst());
            matrixWriter.append(docId, value);
            i++;
            numCols = value.get().size();
        }

        log.info("Wrote out matrix with {} rows and {} columns to {}", i, numCols, matrixPath);
        return 0;
    }
}

From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java

License:Apache License

@Override
public Iterator<MatrixSlice> iterateAll() {
    try {// w  w  w  .j a  v a2  s  . co m
        Path pathPattern = rowPath;
        if (FileSystem.get(conf).getFileStatus(rowPath).isDir()) {
            pathPattern = new Path(rowPath, "*");
        }
        return Iterators.transform(
                new SequenceFileDirIterator<IntWritable, VectorWritable>(pathPattern, PathType.GLOB,
                        PathFilters.logsCRCFilter(), null, true, conf),
                new Function<Pair<IntWritable, VectorWritable>, MatrixSlice>() {
                    @Override
                    public MatrixSlice apply(Pair<IntWritable, VectorWritable> from) {
                        return new MatrixSlice(from.getSecond().get(), from.getFirst().get());
                    }
                });
    } catch (IOException ioe) {
        throw new IllegalStateException(ioe);
    }
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.convertors.integer.IntegerStringOutputConverter.java

License:Apache License

@Override
public void collect(Integer key, List<Pair<List<Integer>, Long>> value) throws IOException {
    String stringKey = featureReverseMap.get(key);
    List<Pair<List<String>, Long>> stringValues = Lists.newArrayList();
    for (Pair<List<Integer>, Long> e : value) {
        List<String> pattern = Lists.newArrayList();
        for (Integer i : e.getFirst()) {
            pattern.add(featureReverseMap.get(i));
        }//from ww  w.  j  a  v  a2  s .  c o m
        stringValues.add(new Pair<List<String>, Long>(pattern, e.getSecond()));
    }
    collector.collect(new Text(stringKey), new TopKStringPatterns(stringValues));
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.convertors.string.TopKStringPatterns.java

License:Apache License

public TopKStringPatterns merge(TopKStringPatterns pattern, int heapSize) {
    List<Pair<List<String>, Long>> patterns = Lists.newArrayList();
    Iterator<Pair<List<String>, Long>> myIterator = frequentPatterns.iterator();
    Iterator<Pair<List<String>, Long>> otherIterator = pattern.iterator();
    Pair<List<String>, Long> myItem = null;
    Pair<List<String>, Long> otherItem = null;
    for (int i = 0; i < heapSize; i++) {
        if (myItem == null && myIterator.hasNext()) {
            myItem = myIterator.next();// www  . ja v  a2  s .c  o m
        }
        if (otherItem == null && otherIterator.hasNext()) {
            otherItem = otherIterator.next();
        }
        if (myItem != null && otherItem != null) {
            int cmp = myItem.getSecond().compareTo(otherItem.getSecond());
            if (cmp == 0) {
                cmp = myItem.getFirst().size() - otherItem.getFirst().size();
                if (cmp == 0) {
                    for (int j = 0; j < myItem.getFirst().size(); j++) {
                        cmp = myItem.getFirst().get(j).compareTo(otherItem.getFirst().get(j));
                        if (cmp != 0) {
                            break;
                        }
                    }
                }
            }
            if (cmp <= 0) {
                patterns.add(otherItem);
                if (cmp == 0) {
                    myItem = null;
                }
                otherItem = null;
            } else if (cmp > 0) {
                patterns.add(myItem);
                myItem = null;
            }
        } else if (myItem != null) {
            patterns.add(myItem);
            myItem = null;
        } else if (otherItem != null) {
            patterns.add(otherItem);
            otherItem = null;
        } else {
            break;
        }
    }
    return new TopKStringPatterns(patterns);
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.convertors.string.TopKStringPatterns.java

License:Apache License

@Override
public void write(DataOutput out) throws IOException {
    out.writeInt(frequentPatterns.size());
    for (Pair<List<String>, Long> pattern : frequentPatterns) {
        out.writeInt(pattern.getFirst().size());
        out.writeLong(pattern.getSecond());
        for (String item : pattern.getFirst()) {
            out.writeUTF(item);//  w w w  .  ja  v  a2 s. co  m
        }
    }
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.convertors.TransactionIterator.java

License:Apache License

public TransactionIterator(Iterator<Pair<List<T>, Long>> transactions,
        final Map<T, Integer> attributeIdMapping) {
    transactionBuffer = new int[attributeIdMapping.size()];
    delegate = Iterators.transform(transactions, new Function<Pair<List<T>, Long>, Pair<int[], Long>>() {
        @Override/*from w  ww .j av a 2s .  c  o m*/
        public Pair<int[], Long> apply(Pair<List<T>, Long> from) {
            if (from == null) {
                return null;
            }
            int index = 0;
            for (T attribute : from.getFirst()) {
                if (attributeIdMapping.containsKey(attribute)) {
                    transactionBuffer[index++] = attributeIdMapping.get(attribute);
                }
            }
            int[] transactionList = new int[index];
            System.arraycopy(transactionBuffer, 0, transactionList, 0, index);
            return new Pair<int[], Long>(transactionList, from.getSecond());
        }
    });
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.CountDescendingPairComparator.java

License:Apache License

@Override
public int compare(Pair<A, B> a, Pair<A, B> b) {
    int ret = b.getSecond().compareTo(a.getSecond());
    if (ret != 0) {
        return ret;
    }/*w w w .ja v  a2s . c  o m*/
    return a.getFirst().compareTo(b.getFirst());
}

From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.fpgrowth.FPGrowth.java

License:Apache License

public static List<Pair<String, TopKStringPatterns>> readFrequentPattern(Configuration conf, Path path) {
    List<Pair<String, TopKStringPatterns>> ret = Lists.newArrayList();
    // key is feature value is count
    for (Pair<Writable, TopKStringPatterns> record : new SequenceFileIterable<Writable, TopKStringPatterns>(
            path, true, conf)) {/*from  ww w .  j a v  a  2s .c  om*/
        ret.add(new Pair<String, TopKStringPatterns>(record.getFirst().toString(),
                new TopKStringPatterns(record.getSecond().getPatterns())));
    }
    return ret;
}