Example usage for org.apache.mahout.math NamedVector getName

List of usage examples for org.apache.mahout.math NamedVector getName

Introduction

In this page you can find the example usage for org.apache.mahout.math NamedVector getName.

Prototype

public String getName() 

Source Link

Usage

From source file:com.lakhani.anchorgraph.applestovectors.java

public static void main(String args[]) throws Exception {
    List<NamedVector> apples = new ArrayList<NamedVector>();

    NamedVector apple;//from  w  w w. j  av  a  2s  . co m
    apple = new NamedVector(new DenseVector(new double[] { 0.11, 510, 1 }), "Small round green apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.23, 650, 3 }), "Large oval red apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.09, 630, 1 }), "Small elongated red apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.25, 590, 3 }), "Large round yellow apple");
    apples.add(apple);
    apple = new NamedVector(new DenseVector(new double[] { 0.18, 520, 2 }), "Medium oval green apple");

    Configuration conf = new Configuration();
    FileSystem fs = FileSystem.get(conf);

    Path path = new Path("/user/cloudera/anchorgraph/output");
    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class);
    VectorWritable vec = new VectorWritable();
    for (NamedVector vector : apples) {
        vec.set(vector);
        writer.append(new Text(vector.getName()), vec);
    }
    writer.close();

    SequenceFile.Reader reader = new SequenceFile.Reader(fs, new Path("appledata/apples"), conf);

    Text key = new Text();
    VectorWritable value = new VectorWritable();
    while (reader.next(key, value)) {
        System.out.println(key.toString() + " " + value.get().asFormatString());
    }
    reader.close();
}

From source file:com.mozilla.grouperfish.mahout.clustering.display.kmeans.OriginalText.java

License:Apache License

public Map<Integer, Set<String>> getDocIds(double sampleRate) {
    Random rand = new Random();
    Map<Integer, Set<String>> docIdMap = new HashMap<Integer, Set<String>>();
    SequenceFileDirectoryReader pointsReader = null;
    try {//from   w w w  .  j a v a  2 s.  c o  m
        IntWritable k = new IntWritable();
        WeightedVectorWritable wvw = new WeightedVectorWritable();
        pointsReader = new SequenceFileDirectoryReader(clusteredPointsPath);
        while (pointsReader.next(k, wvw)) {
            int clusterId = k.get();
            Vector v = wvw.getVector();
            if (v instanceof NamedVector) {
                if (rand.nextDouble() < sampleRate) {
                    NamedVector nv = (NamedVector) v;
                    nv.getName();
                    Set<String> curDocIds = docIdMap.get(clusterId);
                    if (curDocIds == null) {
                        curDocIds = new HashSet<String>();
                    }
                    curDocIds.add(nv.getName());
                    docIdMap.put(clusterId, curDocIds);
                }
            }
        }
    } catch (IOException e) {
        LOG.error("IOException caught while reading clustered points", e);
    } finally {
        if (pointsReader != null) {
            pointsReader.close();
        }
    }

    return docIdMap;
}

From source file:com.mozilla.grouperfish.transforms.coclustering.display.WriteCoClusteringOutput.java

License:Apache License

private void loadPoints() throws IOException {
    SequenceFile.Reader currReader = null;
    IntWritable k = new IntWritable();
    CoCluster currCluster;/* w  w w .jav  a 2s  .  c  o m*/
    int currVID;
    WeightedVectorWritable wvw = new WeightedVectorWritable();
    try {
        fs = FileSystem.get(clusteredPointsPath.toUri(), conf);
        for (FileStatus status : fs.listStatus(clusteredPointsPath)) {
            Path p = status.getPath();
            if (!status.isDir() && !p.getName().startsWith("_")) {
                try {
                    currReader = new SequenceFile.Reader(fs, p, conf);
                    while (currReader.next(k, wvw)) {
                        currCluster = coclusters.get(k.get());
                        NamedVector v = (NamedVector) wvw.getVector();
                        currVID = Integer.parseInt(v.getName());
                        if (docIDMap.containsKey(currVID)) {
                            currCluster.put(v, docIDMap.get(currVID), true);
                        } else if (featureIDMap.containsKey(currVID)) {
                            currCluster.put(v, featureIDMap.get(currVID), false);
                        } else {
                            LOG.error("Key not feature or document!");
                        }
                    }
                } finally {
                    if (currReader != null) {
                        IOUtils.closeStream(currReader);
                    }
                }
            }
        }
    } catch (IOException ie) {
        LOG.info("Error while reading points", ie);
    } catch (ClassCastException ce) {
        LOG.info("NamedVectors possibly not used", ce);
    } finally {
        if (currReader != null) {
            IOUtils.closeStream(currReader);
        }
        if (fs != null) {
            fs.close();
        }
    }
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.KMeansOutputLoader.java

License:Apache License

@Override
public Tuple getNext() throws IOException {
    try {/*from  w w w  .j ava 2s.  com*/
        if (!this.reader.nextKeyValue()) {
            return null;
        }
        Tuple currRow = tupleFactory.newTuple(3);
        DataBag rowInfoBag = bagFactory.newDefaultBag();
        IntWritable key = (IntWritable) reader.getCurrentKey();
        int clusterID = key.get();
        WeightedVectorWritable value = (WeightedVectorWritable) reader.getCurrentValue();
        Vector rowInfo = value.getVector();
        NamedVector nrowInfo = (NamedVector) rowInfo;
        int vectorID = Integer.parseInt(nrowInfo.getName());
        for (Iterator<Vector.Element> itr = rowInfo.iterateNonZero(); itr.hasNext();) {
            Vector.Element elemInfo = itr.next();
            Tuple currElement = tupleFactory.newTuple(2);
            currElement.set(0, elemInfo.index());
            currElement.set(1, elemInfo.get());
            rowInfoBag.add(currElement);
        }
        currRow.set(0, clusterID);
        currRow.set(1, vectorID);
        currRow.set(2, rowInfoBag);
        return currRow;
    } catch (InterruptedException ie) {
        LOG.error("Interrupted while reading", ie);
        throw new IOException(ie);
    } catch (NumberFormatException ne) {
        LOG.error("Possible use of non int values for NamedVector keys", ne);
        throw new IOException(ne);
    } catch (ClassCastException e) {
        LOG.error("Possible cast of normal Vector to NamedVector", e);
        throw new IOException(e);
    }
}

From source file:com.mozilla.grouperfish.transforms.coclustering.pig.storage.MahoutVectorStorage.java

License:Apache License

@Override
public Tuple getNext() throws IOException {
    try {//from ww  w .ja  v a2s.c  om
        if (!this.reader.nextKeyValue()) {
            return null;
        }
        Tuple currRow = tupleFactory.newTuple(2);
        DataBag rowInfoBag = bagFactory.newDefaultBag();
        IntWritable key = reader.getCurrentKey();
        int rowID = key.get();
        VectorWritable value = reader.getCurrentValue();
        Vector rowInfo = value.get();
        if (rowInfo instanceof NamedVector) {
            NamedVector nrowInfo = (NamedVector) rowInfo;
            rowID = Integer.parseInt(nrowInfo.getName());
        }
        for (Iterator<Vector.Element> itr = rowInfo.iterateNonZero(); itr.hasNext();) {
            Vector.Element elemInfo = itr.next();
            Tuple currElement = tupleFactory.newTuple(2);
            currElement.set(0, elemInfo.index());
            currElement.set(1, elemInfo.get());
            rowInfoBag.add(currElement);
        }
        currRow.set(0, rowID);
        currRow.set(1, rowInfoBag);
        return currRow;
    } catch (InterruptedException ie) {
        LOG.error("Interrupted while reading", ie);
        throw new IOException(ie);
    } catch (NumberFormatException ne) {
        LOG.error("Possible use of non int values for NamedVector keys", ne);
        throw new IOException(ne);
    }
}

From source file:csvToSequence.ConvertToSeqLargeTxtVec.java

public static void main(String[] args) throws IOException {
    String filename = "/home/ivan/WorkDir/ccFraud.csv";
    String outputfilename = "/home/ivan/WorkDir/part-0000";

    SequenceFile.Writer writer;/*from  w ww. j a  va2  s  .  c  o  m*/
    Configuration conf = new Configuration();

    FileSystem fs = FileSystem.get(conf);
    Path path = new Path(outputfilename);

    writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class);
    VectorWritable vec = new VectorWritable();

    BufferedReader br = new BufferedReader(new FileReader(filename));
    String s;
    br.readLine(); //skip line

    while ((s = br.readLine()) != null) {
        String[] value = s.split(",");
        double[] numValue = new double[8];

        for (int i = 0; i < 8; i++)
            numValue[i] = Double.parseDouble(value[i]);

        if (Integer.parseInt(value[8]) == 1)
            value[8] = "Fraud/" + value[8];
        else
            value[8] = "Normal/" + value[8];

        NamedVector oneV = new NamedVector(new DenseVector(numValue), value[8]);

        vec.set(oneV.getDelegate());
        writer.append(new Text(oneV.getName()), vec);

    }
    writer.close();
}

From source file:csvToSequence.ConvertToSeqTextVecWritable.java

public static void main(String[] args) throws FileNotFoundException, IOException {

    String filename = "/home/ivan/WorkDir/ccFraud.csv";
    String outputfilename = "/home/ivan/WorkDir/part-0000";

    SequenceFile.Writer writer;/*from   www. j a va  2s .  c o m*/
    Configuration conf = new Configuration();
    List<NamedVector> namedVectors = new ArrayList<>();
    /*Integer i = 1;
            
    CSVVectorIterator vectorCSVVectorIterator = new CSVVectorIterator(new FileReader(filename));
    //System.out.println("Densvector"+vec.next()):
            
            
            
    while(vectorCSVVectorIterator.hasNext()){
    NamedVector vecIt = new NamedVector(vectorCSVVectorIterator.next(),i.toString());
    namedVectors.add(vecIt);
    i++;
    }*/
    BufferedReader br = new BufferedReader(new FileReader(filename));
    String s;
    br.readLine(); //skip line
    while ((s = br.readLine()) != null) {
        String[] value = s.split(",");
        double[] numValue = new double[8];

        for (int i = 0; i < 8; i++)
            numValue[i] = Double.parseDouble(value[i]);

        if (Integer.parseInt(value[8]) == 1)
            value[8] = "Fraud/" + value[8];
        else
            value[8] = "Normal/" + value[8];

        NamedVector oneV = new NamedVector(new DenseVector(numValue), value[8]);
        namedVectors.add(oneV);

    }

    FileSystem fs = FileSystem.get(conf);
    Path path = new Path(outputfilename);

    writer = new SequenceFile.Writer(fs, conf, path, Text.class, VectorWritable.class);

    VectorWritable vec = new VectorWritable();

    for (NamedVector iter : namedVectors) {
        vec.set(iter.getDelegate());
        writer.append(new Text(iter.getName()), vec);
    }

    writer.close();

    /*try (SequenceFile.Reader reader = new SequenceFile.Reader(fs,path, conf)) {
    Text key = new Text();
    VectorWritable value = new VectorWritable();
    while (reader.next(key, value)) {
                
        System.out.println(key + " "+ value);
    }
    }*/

}

From source file:edu.indiana.d2i.htrc.io.index.solr.SequentialVectorFromSolr.java

License:Apache License

@Override
public int run(String[] args) throws Exception {
    if (args.length != 4) {
        printUsage();/*from  www.j  av a 2 s  .c  om*/
    }

    String solrURL = args[0];
    String dictionaryFile = args[1];
    String idsFile = args[2];
    String outputFile = args[3];

    logger.info("SequentialVectorFromSolr ");
    logger.info(" - solrURL: " + solrURL);
    logger.info(" - dictionaryFile: " + dictionaryFile);
    logger.info(" - idsFile: " + idsFile); // on HDFS
    logger.info(" - outputFile: " + outputFile); // on HDFS

    Configuration conf = getConf();
    //      conf.set(HTRCConstants.SOLR_MAIN_URL, solrURL);
    conf.set("htrc.solr.url", solrURL);
    conf.set(HTRCConstants.DICTIONARY_PATH, dictionaryFile);

    SolrClient client = new SolrClient(conf, true);
    FileSystem fs = FileSystem.get(conf);

    SequenceFile.Writer writer = new SequenceFile.Writer(fs, conf, new Path(outputFile), Text.class,
            VectorWritable.class);

    long t0 = System.nanoTime();
    DataInputStream fsinput = new DataInputStream(fs.open(new Path(idsFile)));
    BufferedReader reader = new BufferedReader(new InputStreamReader(fsinput));
    String line = null;
    String[] ids = new String[1];
    VectorWritable value = new VectorWritable();
    Text key = new Text();
    int count = 0;
    while ((line = reader.readLine()) != null) {
        ids[0] = line;
        Iterable<NamedVector> termVectors = client.getTermVectors(ids);
        for (NamedVector namedVector : termVectors) {
            value.set(namedVector);
            key.set(namedVector.getName());
            writer.append(key, value);
            count++;
        }
        if (count % 1000 == 0)
            System.out.println("Finish " + count + " volumes.");
    }
    long t1 = System.nanoTime();
    System.out.println("Takes " + (t1 - t0) / 1e9 + " seconds");

    writer.close();
    reader.close();

    return 0;
}

From source file:hk.newsRecommender.MatrixAndCluster.java

License:Open Source License

public static void clusterOutput(Configuration conf, Path path) {
    try {//  w w w  .  jav a  2  s  .c o m
        BufferedWriter bw;
        FileSystem fs = FileSystem.get(conf);

        SequenceFile.Reader reader = null;
        reader = new SequenceFile.Reader(fs, path, conf);

        // ?uidOfgrp.txt?? uid \t groupID
        bw = new BufferedWriter(new FileWriter(new File("C:\\Users\\Hk\\Desktop\\ClusterPointsInfo.txt")));
        HashMap<String, Integer> clusterIds;
        clusterIds = new HashMap<String, Integer>(120);
        IntWritable key = new IntWritable();
        WeightedPropertyVectorWritable value = new WeightedPropertyVectorWritable();
        //         WeightedVectorWritable value = new WeightedVectorWritable();
        while (reader.next(key, value)) {
            NamedVector vector = (NamedVector) value.getVector();
            // VectorName
            String vectorName = vector.getName();
            System.out.println(vectorName + "\t" + key.toString());
            bw.write(vectorName + "\t" + key.toString() + "\n");
            // ?group?
            if (clusterIds.containsKey(key.toString())) {
                clusterIds.put(key.toString(), clusterIds.get(key.toString()) + 1);
            } else
                clusterIds.put(key.toString(), 1);
        }
        bw.flush();
        reader.close();
        // ?group?grpSize
        bw = new BufferedWriter(new FileWriter(new File("C:\\Users\\Hk\\Desktop\\ClusterPointsSize.txt")));
        Set<String> keys = clusterIds.keySet();
        for (String k : keys) {
            System.out.println(k + " " + clusterIds.get(k));
            bw.write(k + " " + clusterIds.get(k) + "\n");
        }
        bw.flush();
        bw.close();
    } catch (IOException e) {
        e.printStackTrace();
    }
}

From source file:org.qcri.pca.MahoutCompatibilityTest.java

License:Apache License

private static void doTestVectorWritableEquals(Vector v) throws IOException {
    Writable vectorWritable = new VectorWritable(v);
    VectorWritable vectorWritable2 = new VectorWritable();
    writeAndRead(vectorWritable, vectorWritable2);
    Vector v2 = vectorWritable2.get();
    if (v instanceof NamedVector) {
        assertTrue(v2 instanceof NamedVector);
        NamedVector nv = (NamedVector) v;
        NamedVector nv2 = (NamedVector) v2;
        assertEquals(nv.getName(), nv2.getName());
        assertEquals("Victor", nv.getName());
    }// w  w w. j  a va2  s.  co  m
    assertEquals(v, v2);
}