Example usage for org.apache.mahout.math NamedVector NamedVector

List of usage examples for org.apache.mahout.math NamedVector NamedVector

Introduction

In this page you can find the example usage for org.apache.mahout.math NamedVector NamedVector.

Prototype

public NamedVector(Vector delegate, String name) 

Source Link

Usage

From source file:com.cloudera.science.ml.core.vectors.Vectors.java

License:Open Source License

/**
 * Constructs a {@code NamedVector} from the given name and
 * values.//from  w  ww.  ja  v a 2  s  .c  o m
 * 
 * @param name The name of the vector
 * @param v The values it contains
 * @return A new {@code NamedVector}
 */
public static Vector named(String name, double... v) {
    return new NamedVector(of(v), name);
}

From source file:com.cloudera.science.ml.parallel.fn.SvmLightFnTest.java

License:Open Source License

@Test
public void testNamedVector() throws Exception {
    Vector v = Vectors.named("foo", 1.0, 2.0, 3.0);
    assertEquals("foo 0:1.0 1:2.0 2:3.0", fn.map(v));

    v = Vectors.sparse(10);//w  w w  .  j  a v a2  s.c  o  m
    v.set(3, 7.2);
    v.set(6, 12.0);
    v = new NamedVector(v, "bar");
    assertEquals("bar 3:7.2 6:12.0", fn.map(v));
}

From source file:com.elex.dmp.vectorizer.TFPartialVectorReducer.java

License:Apache License

@Override
protected void reduce(Text key, Iterable<StringTuple> values, Context context)
        throws IOException, InterruptedException {
    Iterator<StringTuple> it = values.iterator();
    if (!it.hasNext()) {
        return;//from  www .j av  a 2 s.  c  o m
    }
    StringTuple value = it.next();

    Vector vector = new RandomAccessSparseVector(dimension, value.length()); // guess at initial size

    if (maxNGramSize >= 2) {
        ShingleFilter sf = new ShingleFilter(new IteratorTokenStream(value.getEntries().iterator()),
                maxNGramSize);
        try {
            do {
                String term = sf.getAttribute(CharTermAttribute.class).toString();
                if (!term.isEmpty() && dictionary.containsKey(term)) { // ngram
                    int termId = dictionary.get(term);
                    vector.setQuick(termId, vector.getQuick(termId) + 1);
                }
            } while (sf.incrementToken());

            sf.end();
        } finally {
            Closeables.closeQuietly(sf);
        }
    } else {
        for (String term : value.getEntries()) {
            if (!term.isEmpty() && dictionary.containsKey(term)) { // unigram
                int termId = dictionary.get(term);
                vector.setQuick(termId, vector.getQuick(termId) + 1);
            }
        }
    }
    if (sequentialAccess) {
        vector = new SequentialAccessSparseVector(vector);
    }

    if (namedVector) {
        vector = new NamedVector(vector, key.toString());
    }

    // if the vector has no nonZero entries (nothing in the dictionary), let's not waste space sending it to disk.
    if (vector.getNumNondefaultElements() > 0) {
        VectorWritable vectorWritable = new VectorWritable(vector);
        context.write(key, vectorWritable);
    } else {
        context.getCounter("TFParticalVectorReducer", "emptyVectorCount").increment(1);
    }
}

From source file:com.lakhani.anchorgraph.applestovectors.java

public static void main(String args[]) throws Exception {
    List<NamedVector> apples = new ArrayList<NamedVector>();

    NamedVector apple;/*  ww w  . j  a v a 2  s  .com*/
    apple = new NamedVector(new DenseVector(new double[] { 0.11, 510, 1 }), "Small round green apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.23, 650, 3 }), "Large oval red apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.09, 630, 1 }), "Small elongated red apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.25, 590, 3 }), "Large round yellow apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.18, 520, 2 }), "Medium oval green apple");

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    Path path = new Path("/user/cloudera/anchorgraph/output");
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class);
    VectorWritable vec = new VectorWritable();
    for (NamedVector vector : apples) {
        vec.set(vector);
        writer.append(new Text(vector.getName()), vec);
    }
    writer.close();

    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path("appledata/apples"), conf);

    Text key = new Text();
    VectorWritable value = new VectorWritable();
    while (reader.next(key, value)) {
        System.out.println(key.toString() + " " + value.get().asFormatString());
    }
    reader.close();
}

From source file:com.mozilla.grouperfish.pig.storage.DocumentVectorStorage.java

License:Apache License

@SuppressWarnings("unchecked")
@Override/* w  ww .jav a2 s.  c o  m*/
public void putNext(Tuple tuple) throws IOException {
    outputKey.set((String) tuple.get(0));
    Tuple vectorTuple = (Tuple) tuple.get(1);
    Vector vector = new NamedVector(new RandomAccessSparseVector(dimensions, vectorTuple.size()),
            outputKey.toString());
    for (int i = 0; i < vectorTuple.size(); i++) {
        Object o = vectorTuple.get(i);
        switch (vectorTuple.getType(i)) {
        case DataType.INTEGER:
            // If this is just an integer then we just want to set the index to 1.0
            vector.set((Integer) o, 1.0);
            break;
        case DataType.TUPLE:
            // If this is a tuple then we want to set the index and the weight
            Tuple subt = (Tuple) o;
            vector.set((Integer) subt.get(0), (Double) subt.get(1));
            break;
        default:
            throw new RuntimeException("Unexpected tuple form");
        }

    }
    outputValue.set(vector);
    try {
        writer.write(outputKey, outputValue);
    } catch (InterruptedException e) {
        LOG.error("Interrupted while writing", e);
    }
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java

License:Apache License

@Override
public void putNext(Tuple t) throws IOException {
    IntWritable outputKey = new IntWritable();
    VectorWritable outputValue = new VectorWritable();
    outputKey.set((Integer) t.get(0));
    Tuple currRow = (Tuple) t.get(1);//from w  ww . j  a  v  a2s .  c o  m
    Vector currRowVector;
    if (dimensions == 0) {
        throw new IllegalArgumentException("Trying to create 0 dimension vector");
    }
    if (STORE_AS_DENSE) {
        currRowVector = new NamedVector(new DenseVector(dimensions), outputKey.toString());
    } else if (STORE_AS_SEQUENTIAL) {
        currRowVector = new NamedVector(new SequentialAccessSparseVector(dimensions, currRow.size()),
                outputKey.toString());
    } else {
        currRowVector = new NamedVector(new RandomAccessSparseVector(dimensions, currRow.size()),
                outputKey.toString());
    }
    for (int ii = 0; ii < currRow.size(); ii++) {
        Object o = currRow.get(ii);
        switch (currRow.getType(ii)) {
        case DataType.INTEGER:
        case DataType.LONG:
        case DataType.FLOAT:
        case DataType.DOUBLE:
            currRowVector.set(ii, (Double) o);
            break;
        case DataType.TUPLE:
            // If this is a tuple then we want to set column and element
            Tuple subt = (Tuple) o;
            currRowVector.set((Integer) subt.get(0), (Double) subt.get(1));
            break;
        default:
            throw new RuntimeException("Unexpected tuple form");
        }
    }
    outputValue.set(currRowVector);
    try {
        writer.write(outputKey, outputValue);
    } catch (InterruptedException e) {
        LOG.error("Interrupted while writing", e);
    }
}

From source file:csvToSequence.ConvertToSeqLargeTxtVec.java

public static void main(String[] args) throws IOException {
    String filename = "/home/ivan/WorkDir/ccFraud.csv";
    String outputfilename = "/home/ivan/WorkDir/part-0000";

    SequenceFile.Writer writer;// w  w w . ja va  2s .com
    Configuration conf = new Configuration();

    FileSystem fs = FileSystem.get(conf);
    Path path = new Path(outputfilename);

    writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class);
    VectorWritable vec = new VectorWritable();

    BufferedReader br = new BufferedReader(new FileReader(filename));
    String s;
    br.readLine(); //skip line

    while ((s = br.readLine()) != null) {
        String[] value = s.split(",");
        double[] numValue = new double[8];

        for (int i = 0; i < 8; i++)
            numValue[i] = Double.parseDouble(value[i]);

        if (Integer.parseInt(value[8]) == 1)
            value[8] = "Fraud/" + value[8];
        else
            value[8] = "Normal/" + value[8];

        NamedVector oneV = new NamedVector(new DenseVector(numValue), value[8]);

        vec.set(oneV.getDelegate());
        writer.append(new Text(oneV.getName()), vec);

    }
    writer.close();
}

From source file:csvToSequence.ConvertToSeqTextVecWritable.java

public static void main(String[] args) throws FileNotFoundException, IOException {

    String filename = "/home/ivan/WorkDir/ccFraud.csv";
    String outputfilename = "/home/ivan/WorkDir/part-0000";

    SequenceFile.Writer writer;/*from   w w w  .j  a va 2s.c om*/
    Configuration conf = new Configuration();
    List<NamedVector> namedVectors = new ArrayList<>();
    /*Integer i = 1;
            
    CSVVectorIterator vectorCSVVectorIterator = new CSVVectorIterator(new FileReader(filename));
    //System.out.println("Densvector"+vec.next()):
            
            
            
    while(vectorCSVVectorIterator.hasNext()){
    NamedVector vecIt = new NamedVector(vectorCSVVectorIterator.next(),i.toString());
    namedVectors.add(vecIt);
    i++;
    }*/
    BufferedReader br = new BufferedReader(new FileReader(filename));
    String s;
    br.readLine(); //skip line
    while ((s = br.readLine()) != null) {
        String[] value = s.split(",");
        double[] numValue = new double[8];

        for (int i = 0; i < 8; i++)
            numValue[i] = Double.parseDouble(value[i]);

        if (Integer.parseInt(value[8]) == 1)
            value[8] = "Fraud/" + value[8];
        else
            value[8] = "Normal/" + value[8];

        NamedVector oneV = new NamedVector(new DenseVector(numValue), value[8]);
        namedVectors.add(oneV);

    }

    FileSystem fs = FileSystem.get(conf);
    Path path = new Path(outputfilename);

    writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class);

    VectorWritable vec = new VectorWritable();

    for (NamedVector iter : namedVectors) {
        vec.set(iter.getDelegate());
        writer.append(new Text(iter.getName()), vec);
    }

    writer.close();

    /*try (SequenceFile.Reader reader = new SequenceFile.Reader(fs,path, conf)) {
    Text key = new Text();
    VectorWritable value = new VectorWritable();
    while (reader.next(key, value)) {
                
        System.out.println(key + " "+ value);
    }
    }*/

}

From source file:edu.dfci.cccb.mev.kmeans.domain.hadoop.HadoopKMeansBuilder.java

License:Open Source License

@Override
public KMeans build() throws DatasetException {
    try (TemporaryFolder hadoop = new TemporaryFolder()) {
        File points = new File(hadoop, "points");
        points.mkdir();/*from   w  w  w .j a va2s . c o m*/

        Configuration configuration = new Configuration();
        FileSystem system = get(configuration);
        final Dimension other = dataset().dimension(dimension().type() == ROW ? COLUMN : ROW);

        List<NamedVector> vectors = new AbstractList<NamedVector>() {

            @Override
            public NamedVector get(int index) {
                final String vector = dimension().keys().get(index);
                return new NamedVector(new AbstractVector(other.keys().size()) {

                    @Override
                    public void setQuick(int index, double value) {
                        throw new UnsupportedOperationException();
                    }

                    @Override
                    public Vector like() {
                        return new RandomAccessSparseVector(size());
                    }

                    @Override
                    public Iterator<Element> iterator() {
                        return new Iterator<Element>() {
                            private int current = 0;

                            @Override
                            public boolean hasNext() {
                                return current < other.keys().size();
                            }

                            @Override
                            public Element next() {
                                return new Element() {
                                    private final int index = current++;

                                    @Override
                                    public void set(double value) {
                                        throw new UnsupportedOperationException();
                                    }

                                    @Override
                                    public int index() {
                                        return index;
                                    }

                                    @Override
                                    @SneakyThrows(InvalidCoordinateException.class)
                                    public double get() {
                                        return dimension().type() == ROW
                                                ? dataset().values().get(vector, other.keys().get(index))
                                                : dataset().values().get(other.keys().get(index), vector);
                                    }
                                };
                            }

                            @Override
                            public void remove() {
                                throw new UnsupportedOperationException();
                            }
                        };
                    }

                    @Override
                    public Iterator<Element> iterateNonZero() {
                        return iterator();
                    }

                    @Override
                    public boolean isSequentialAccess() {
                        return true;
                    }

                    @Override
                    public boolean isDense() {
                        return true;
                    }

                    @Override
                    @SneakyThrows(InvalidCoordinateException.class)
                    public double getQuick(int index) {
                        return dimension().type() == ROW
                                ? dataset().values().get(vector, other.keys().get(index))
                                : dataset().values().get(other.keys().get(index), vector);
                    }

                    @Override
                    public int getNumNondefaultElements() {
                        return other.keys().size();
                    }

                    @Override
                    protected Matrix matrixLike(int rows, int columns) {
                        throw new UnsupportedOperationException();
                    }
                }, vector);
            }

            @Override
            public int size() {
                return dimension().keys().size();
            }
        };

        // write input
        try (Writer writer = new Writer(system, configuration,
                new Path(new File(points, "file1").getAbsolutePath()), LongWritable.class,
                VectorWritable.class)) {
            VectorWritable writable = new VectorWritable();
            long record = 0;
            for (Vector vector : vectors) {
                writable.set(vector);
                writer.append(new LongWritable(record++), writable);
            }
        }

        // prepare clusters
        File clusters = new File(hadoop, "clusters");
        clusters.mkdir();
        try (Writer writer = new Writer(system, configuration,
                new Path(new File(clusters, "part-00000").getAbsolutePath()), Text.class, Cluster.class)) {
            for (int i = 0; i < k(); i++) {
                Vector vec = vectors.get(i);
                Cluster cluster = new Cluster(vec, i, new EuclideanDistanceMeasure());
                writer.append(new Text(cluster.getIdentifier()), cluster);
            }
        }

        File output = new File(hadoop, "output");
        output.mkdir();

        try {
            run(configuration, new Path(points.getAbsolutePath()), new Path(clusters.getAbsolutePath()),
                    new Path(output.getAbsolutePath()), metric.measurer(), convergence(), iterations(), true,
                    false);

            try (Reader reader = new Reader(system, new Path(
                    new File(new File(output, CLUSTERED_POINTS_DIR), "/part-m-00000").getAbsolutePath()),
                    configuration)) {
                IntWritable key = new IntWritable();
                WeightedVectorWritable value = new WeightedVectorWritable();
                Map<String, Set<String>> result = new HashMap<>();

                while (reader.next(key, value)) {
                    Set<String> cluster = result.get(key.toString());
                    if (cluster == null)
                        result.put(key.toString(), cluster = new HashSet<>());
                    cluster.add(((NamedVector) value.getVector()).getName());
                }

                return new AbstractKMeans() {
                }.dataset(dataset()).dimension(dimension()).name(name()).type(type())
                        .clusters(new HashSet<>(result.values()));
            }
        } catch (ClassNotFoundException | InterruptedException e) {
            throw new DatasetException(e);
        }
    } catch (IOException e) {
        throw new DatasetException(e);
    }
}

From source file:edu.indiana.d2i.htrc.io.index.solr.SolrClient.java

License:Apache License

private NamedVector parseOneVolume(InputStream content) throws XMLStreamException, IOException {
    //      java.io.BufferedReader br = new java.io.BufferedReader(new java.io.InputStreamReader(content));
    //      String line = "";
    //      while ((line = br.readLine()) != null) {
    //         System.out.println(line);
    //      }/*from   w w  w .  ja  v  a 2  s  . co  m*/
    //      br.close();

    String volumeID = null;
    Vector vector = null;
    XMLStreamReader parser = factory.createXMLStreamReader(content);
    while (parser.hasNext()) {
        int event = parser.next();
        if (event == XMLStreamConstants.START_ELEMENT) {
            String attributeValue = parser.getAttributeValue(null, "name");
            if (attributeValue != null) {
                if (attributeValue.equals(VOLUME_ID)) {
                    volumeID = parser.getElementText();
                    volumeID = pairtree.uncleanId(volumeID);
                } else if (attributeValue.equals(VOLUME_OCR)) {
                    vector = createVector(parser);
                    break;
                }
            }
        }
    }

    NamedVector tv = new NamedVector(vector, volumeID);
    return tv;
}