List of usage examples for org.apache.mahout.common Pair getSecond
public B getSecond()
From source file:ClassifierHD.java
License:Apache License
public static Map<String, Integer> readDictionnary(Configuration conf, Path dictionnaryPath) { Map<String, Integer> dictionnary = new HashMap<String, Integer>(); for (Pair<Text, IntWritable> pair : new SequenceFileIterable<Text, IntWritable>(dictionnaryPath, true, conf)) {//from w ww . j a v a2s . co m dictionnary.put(pair.getFirst().toString(), pair.getSecond().get()); } return dictionnary; }
From source file:ClassifierHD.java
License:Apache License
public static Map<Integer, Long> readDocumentFrequency(Configuration conf, Path documentFrequencyPath) { Map<Integer, Long> documentFrequency = new HashMap<Integer, Long>(); for (Pair<IntWritable, LongWritable> pair : new SequenceFileIterable<IntWritable, LongWritable>( documentFrequencyPath, true, conf)) { documentFrequency.put(pair.getFirst().get(), pair.getSecond().get()); }//from ww w. j a v a 2s . c om return documentFrequency; }
From source file:ac.keio.sslab.nlp.lda.RowIdJob.java
License:Apache License
@SuppressWarnings("deprecation") @Override//from w w w .ja v a 2 s.com public int run(String[] args) throws Exception { addInputOption(); addOutputOption(); Map<String, List<String>> parsedArgs = parseArguments(args); if (parsedArgs == null) { return -1; } Configuration conf = getConf(); FileSystem fs = FileSystem.get(conf); Path outputPath = getOutputPath(); Path indexPath = new Path(outputPath, "docIndex"); Path matrixPath = new Path(outputPath, "matrix"); try (SequenceFile.Writer indexWriter = SequenceFile.createWriter(fs, conf, indexPath, IntWritable.class, Text.class); SequenceFile.Writer matrixWriter = SequenceFile.createWriter(fs, conf, matrixPath, IntWritable.class, VectorWritable.class)) { IntWritable docId = new IntWritable(); int i = 0; int numCols = 0; for (Pair<Text, VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>( getInputPath(), PathType.LIST, PathFilters.logsCRCFilter(), null, true, conf)) { VectorWritable value = record.getSecond(); docId.set(i); indexWriter.append(docId, record.getFirst()); matrixWriter.append(docId, value); i++; numCols = value.get().size(); } log.info("Wrote out matrix with {} rows and {} columns to {}", i, numCols, matrixPath); return 0; } }
From source file:at.illecker.hadoop.rootbeer.examples.matrixmultiplication.DistributedRowMatrix.java
License:Apache License
@Override public Iterator<MatrixSlice> iterateAll() { try {// w w w .j a v a2 s . co m Path pathPattern = rowPath; if (FileSystem.get(conf).getFileStatus(rowPath).isDir()) { pathPattern = new Path(rowPath, "*"); } return Iterators.transform( new SequenceFileDirIterator<IntWritable, VectorWritable>(pathPattern, PathType.GLOB, PathFilters.logsCRCFilter(), null, true, conf), new Function<Pair<IntWritable, VectorWritable>, MatrixSlice>() { @Override public MatrixSlice apply(Pair<IntWritable, VectorWritable> from) { return new MatrixSlice(from.getSecond().get(), from.getFirst().get()); } }); } catch (IOException ioe) { throw new IllegalStateException(ioe); } }
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.convertors.integer.IntegerStringOutputConverter.java
License:Apache License
@Override public void collect(Integer key, List<Pair<List<Integer>, Long>> value) throws IOException { String stringKey = featureReverseMap.get(key); List<Pair<List<String>, Long>> stringValues = Lists.newArrayList(); for (Pair<List<Integer>, Long> e : value) { List<String> pattern = Lists.newArrayList(); for (Integer i : e.getFirst()) { pattern.add(featureReverseMap.get(i)); }//from ww w. j a v a2 s . c o m stringValues.add(new Pair<List<String>, Long>(pattern, e.getSecond())); } collector.collect(new Text(stringKey), new TopKStringPatterns(stringValues)); }
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.convertors.string.TopKStringPatterns.java
License:Apache License
public TopKStringPatterns merge(TopKStringPatterns pattern, int heapSize) { List<Pair<List<String>, Long>> patterns = Lists.newArrayList(); Iterator<Pair<List<String>, Long>> myIterator = frequentPatterns.iterator(); Iterator<Pair<List<String>, Long>> otherIterator = pattern.iterator(); Pair<List<String>, Long> myItem = null; Pair<List<String>, Long> otherItem = null; for (int i = 0; i < heapSize; i++) { if (myItem == null && myIterator.hasNext()) { myItem = myIterator.next();// www . ja v a2 s .c o m } if (otherItem == null && otherIterator.hasNext()) { otherItem = otherIterator.next(); } if (myItem != null && otherItem != null) { int cmp = myItem.getSecond().compareTo(otherItem.getSecond()); if (cmp == 0) { cmp = myItem.getFirst().size() - otherItem.getFirst().size(); if (cmp == 0) { for (int j = 0; j < myItem.getFirst().size(); j++) { cmp = myItem.getFirst().get(j).compareTo(otherItem.getFirst().get(j)); if (cmp != 0) { break; } } } } if (cmp <= 0) { patterns.add(otherItem); if (cmp == 0) { myItem = null; } otherItem = null; } else if (cmp > 0) { patterns.add(myItem); myItem = null; } } else if (myItem != null) { patterns.add(myItem); myItem = null; } else if (otherItem != null) { patterns.add(otherItem); otherItem = null; } else { break; } } return new TopKStringPatterns(patterns); }
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.convertors.string.TopKStringPatterns.java
License:Apache License
@Override public void write(DataOutput out) throws IOException { out.writeInt(frequentPatterns.size()); for (Pair<List<String>, Long> pattern : frequentPatterns) { out.writeInt(pattern.getFirst().size()); out.writeLong(pattern.getSecond()); for (String item : pattern.getFirst()) { out.writeUTF(item);// w w w . ja v a2 s. co m } } }
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.convertors.TransactionIterator.java
License:Apache License
public TransactionIterator(Iterator<Pair<List<T>, Long>> transactions, final Map<T, Integer> attributeIdMapping) { transactionBuffer = new int[attributeIdMapping.size()]; delegate = Iterators.transform(transactions, new Function<Pair<List<T>, Long>, Pair<int[], Long>>() { @Override/*from w ww .j av a 2s . c o m*/ public Pair<int[], Long> apply(Pair<List<T>, Long> from) { if (from == null) { return null; } int index = 0; for (T attribute : from.getFirst()) { if (attributeIdMapping.containsKey(attribute)) { transactionBuffer[index++] = attributeIdMapping.get(attribute); } } int[] transactionList = new int[index]; System.arraycopy(transactionBuffer, 0, transactionList, 0, index); return new Pair<int[], Long>(transactionList, from.getSecond()); } }); }
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.CountDescendingPairComparator.java
License:Apache License
@Override public int compare(Pair<A, B> a, Pair<A, B> b) { int ret = b.getSecond().compareTo(a.getSecond()); if (ret != 0) { return ret; }/*w w w .ja v a2s . c o m*/ return a.getFirst().compareTo(b.getFirst()); }
From source file:com.cg.mapreduce.fpgrowth.mahout.fpm.fpgrowth.FPGrowth.java
License:Apache License
public static List<Pair<String, TopKStringPatterns>> readFrequentPattern(Configuration conf, Path path) { List<Pair<String, TopKStringPatterns>> ret = Lists.newArrayList(); // key is feature value is count for (Pair<Writable, TopKStringPatterns> record : new SequenceFileIterable<Writable, TopKStringPatterns>( path, true, conf)) {/*from ww w . j a v a 2s .c om*/ ret.add(new Pair<String, TopKStringPatterns>(record.getFirst().toString(), new TopKStringPatterns(record.getSecond().getPatterns()))); } return ret; }