List of usage examples for org.apache.mahout.math Matrix assignRow
Matrix assignRow(int row, Vector other);
From source file:com.netease.news.classifier.naivebayes.NaiveBayesModel.java
License:Apache License
public static NaiveBayesModel materialize(Path output, Configuration conf) throws IOException { FileSystem fs = output.getFileSystem(conf); Vector weightsPerLabel = null; Vector perLabelThetaNormalizer = null; Vector weightsPerFeature = null; Matrix weightsPerLabelAndFeature; float alphaI; FSDataInputStream in = fs.open(new Path(output, "naiveBayesModel.bin")); try {//from w ww .j av a2s. c om alphaI = in.readFloat(); weightsPerFeature = VectorWritable.readVector(in); weightsPerLabel = new DenseVector(VectorWritable.readVector(in)); perLabelThetaNormalizer = new DenseVector(VectorWritable.readVector(in)); weightsPerLabelAndFeature = new SparseRowMatrix(weightsPerLabel.size(), weightsPerFeature.size()); for (int label = 0; label < weightsPerLabelAndFeature.numRows(); label++) { weightsPerLabelAndFeature.assignRow(label, VectorWritable.readVector(in)); } } finally { Closeables.close(in, true); } NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel, perLabelThetaNormalizer, alphaI); model.validate(); return model; }
From source file:com.netease.news.classifier.naivebayes.NaiveBayesModel.java
License:Apache License
public static NaiveBayesModel materializeLocal(String modelfile) throws IOException { Vector weightsPerLabel = null; Vector perLabelThetaNormalizer = null; Vector weightsPerFeature = null; Matrix weightsPerLabelAndFeature; float alphaI; System.out.println(modelfile); ClassLoader loader = NaiveBayesModel.class.getClassLoader(); InputStream sin = loader.getResourceAsStream(modelfile); DataInputStream in = new DataInputStream(sin); try {// w ww.ja va 2s.co m alphaI = in.readFloat(); weightsPerFeature = VectorWritable.readVector(in); weightsPerLabel = new DenseVector(VectorWritable.readVector(in)); perLabelThetaNormalizer = new DenseVector(VectorWritable.readVector(in)); weightsPerLabelAndFeature = new SparseRowMatrix(weightsPerLabel.size(), weightsPerFeature.size()); for (int label = 0; label < weightsPerLabelAndFeature.numRows(); label++) { weightsPerLabelAndFeature.assignRow(label, VectorWritable.readVector(in)); } } finally { in.close(); } NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel, perLabelThetaNormalizer, alphaI); model.validate(); return model; }
From source file:com.skp.experiment.math.als.hadoop.ImplicitFeedbackAlternatingLeastSquaresReasonSolver.java
License:Apache License
private Matrix YtransposeY(OpenIntObjectHashMap<Vector> Y) { Matrix compactedY = new DenseMatrix(Y.size(), numFeatures); IntArrayList indexes = Y.keys();//from w w w. j a va2s . com indexes.quickSort(); int row = 0; for (int index : indexes.elements()) { compactedY.assignRow(row++, Y.get(index)); } return compactedY.transpose().times(compactedY); }
From source file:io.ssc.relationdiscovery.SVD.java
License:Open Source License
public void compute() { new LanczosSolver().solve(lanczosState, rank + OVERSHOOT, false); Matrix singularVectorCandidates = new DenseMatrix(rank + OVERSHOOT, A.numCols()); for (int n = 0; n < rank + OVERSHOOT; n++) { singularVectorCandidates.assignRow(n, lanczosState.getRightSingularVector(n)); }/*from w w w. j av a 2 s .c o m*/ findSingularVectors(singularVectorCandidates); }
From source file:mlbench.bayes.BayesUtils.java
License:Apache License
public static NaiveBayesModel readModelFromDir(Path base, Configuration conf) { float alphaI = conf.getFloat(ThetaMapper.ALPHA_I, 1.0f); // read feature sums and label sums Vector scoresPerLabel = null; Vector scoresPerFeature = null; for (Pair<Text, VectorWritable> record : new SequenceFileDirIterable<Text, VectorWritable>( new Path(base, TrainNaiveBayesJob.WEIGHTS), PathType.LIST, PathFilters.partFilter(), conf)) { String key = record.getFirst().toString(); VectorWritable value = record.getSecond(); if (key.equals(TrainNaiveBayesJob.WEIGHTS_PER_FEATURE)) { scoresPerFeature = value.get(); } else if (key.equals(TrainNaiveBayesJob.WEIGHTS_PER_LABEL)) { scoresPerLabel = value.get(); }/*from w ww .jav a2 s . c o m*/ } // Preconditions.checkNotNull(scoresPerFeature); // Preconditions.checkNotNull(scoresPerLabel); Matrix scoresPerLabelAndFeature = new SparseMatrix(scoresPerLabel.size(), scoresPerFeature.size()); for (Pair<IntWritable, VectorWritable> entry : new SequenceFileDirIterable<IntWritable, VectorWritable>( new Path(base, TrainNaiveBayesJob.SUMMED_OBSERVATIONS), PathType.LIST, PathFilters.partFilter(), conf)) { scoresPerLabelAndFeature.assignRow(entry.getFirst().get(), entry.getSecond().get()); } Vector perlabelThetaNormalizer = scoresPerLabel.like(); /* * for (Pair<Text,VectorWritable> entry : new * SequenceFileDirIterable<Text,VectorWritable>( new Path(base, * TrainNaiveBayesJob.THETAS), PathType.LIST, PathFilters.partFilter(), * conf)) { if (entry.getFirst().toString().equals(TrainNaiveBayesJob. * LABEL_THETA_NORMALIZER)) { perlabelThetaNormalizer = * entry.getSecond().get(); } } * * Preconditions.checkNotNull(perlabelThetaNormalizer); */ return new NaiveBayesModel(scoresPerLabelAndFeature, scoresPerFeature, scoresPerLabel, perlabelThetaNormalizer, alphaI, false); }
From source file:org.pigml.classify.naivebayes.NaiveBayesModel.java
License:Apache License
public static NaiveBayesModel materialize(Path modelDir, Configuration conf) throws IOException { OpenIntDoubleHashMap weightsPerLabel = new OpenIntDoubleHashMap(); OpenIntDoubleHashMap weightsPerFeature = new OpenIntDoubleHashMap(); SequenceFileDirIterable<IntWritable, DoubleWritable> kvs; kvs = new SequenceFileDirIterable<IntWritable, DoubleWritable>(new Path(modelDir, "label_weights"), PathType.LIST, PathFilters.logsCRCFilter(), conf); for (Pair<IntWritable, DoubleWritable> kv : kvs) { weightsPerLabel.put(kv.getFirst().get(), kv.getSecond().get()); }//from w w w . j a va 2 s . c o m kvs = new SequenceFileDirIterable<IntWritable, DoubleWritable>(new Path(modelDir, "feature_weights"), PathType.LIST, PathFilters.logsCRCFilter(), conf); for (Pair<IntWritable, DoubleWritable> kv : kvs) { weightsPerFeature.put(kv.getFirst().get(), kv.getSecond().get()); } Matrix weightsPerLabelAndFeature = null; SequenceFileDirIterable<IntWritable, VectorWritable> labelVectors = new SequenceFileDirIterable<IntWritable, VectorWritable>( new Path(modelDir, "label_feature_weights"), PathType.LIST, PathFilters.logsCRCFilter(), conf); for (Pair<IntWritable, VectorWritable> labelVector : labelVectors) { int label = labelVector.getFirst().get(); Vector vector = labelVector.getSecond().get(); if (weightsPerLabelAndFeature == null) { weightsPerLabelAndFeature = new SparseRowMatrix(weightsPerLabel.size(), vector.size()); } weightsPerLabelAndFeature.assignRow(label, vector); } // TODO alphaI is hard-coded to 1.0 // TODO perLabelThetaNormalizer is not supported yet NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel, 1.0f); model.validate(); return model; }
From source file:org.qcri.pca.PCATest.java
License:Apache License
@Test public void crossTestIterationOfMapReducePPCASequentialPPCA() throws Exception { Matrix C_central = PCACommon.randomMatrix(D, d); double ss = PCACommon.randSS(); InitialValues initValSeq = new InitialValues(C_central, ss); InitialValues initValMR = new InitialValues(C_central.clone(), ss); //1. run sequential Matrix Ye_central = new DenseMatrix(N, D); int row = 0;/* w ww .j a va2 s . com*/ for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(input, PathType.LIST, null, conf)) { Ye_central.assignRow(row, vw.get()); row++; } double bishopSeqErr = ppcaDriver.runSequential(conf, Ye_central, initValSeq, 1); //2. run mapreduce DistributedRowMatrix Ye = new DistributedRowMatrix(input, tmp, N, D); Ye.setConf(conf); double bishopMRErr = ppcaDriver.runMapReduce(conf, Ye, initValMR, output, N, D, d, 1, 1, 1, 1); Assert.assertEquals("ss value is different in sequential and mapreduce PCA", initValSeq.ss, initValMR.ss, EPSILON); double seqCTrace = PCACommon.trace(initValSeq.C); double mrCTrace = PCACommon.trace(initValMR.C); Assert.assertEquals("C value is different in sequential and mapreduce PCA", seqCTrace, mrCTrace, EPSILON); Assert.assertEquals("The PPCA error between sequntial and mapreduce methods is too different: " + bishopSeqErr + "!= " + bishopMRErr, bishopSeqErr, bishopMRErr, EPSILON); }
From source file:org.qcri.pca.SPCADriver.java
/*** * PPCA: sequential PPCA based on the paper from Tipping and Bishop * /*from w ww. j a va 2 s . c om*/ * @param conf * the configuration * @param input * the path to the input matrix Y * @param output * the output path (not used currently) * @param nRows * number or rows in Y * @param nCols * number of columns in Y * @param nPCs * number of desired principal components * @return the error * @throws Exception */ double runSequential(Configuration conf, Path input, Path output, final int nRows, final int nCols, final int nPCs) throws Exception { Matrix centralY = new DenseMatrix(nRows, nCols); FileSystem fs = FileSystem.get(input.toUri(), conf); if (fs.listStatus(input).length == 0) { System.err.println("No file under " + input); return 0; } int row = 0; for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(input, PathType.LIST, null, conf)) { centralY.assignRow(row, vw.get()); row++; } Matrix centralC = PCACommon.randomMatrix(nCols, nPCs); double ss = PCACommon.randSS(); InitialValues initVal = new InitialValues(centralC, ss); // Matrix sampledYe = sample(centralY); // runSequential(conf, sampledYe, initVal, 100); double error = runSequential(conf, centralY, initVal, 100); return error; }
From source file:org.qcri.pca.SPCADriver.java
/** * PPCA: sequential PPCA based on the matlab implementation of Jacob Verbeek * /*ww w. j a v a 2 s . c o m*/ * @param conf * the configuration * @param input * the path to the input matrix Y * @param output * the output path (not used currently) * @param nRows * number or rows in Y * @param nCols * number of columns in Y * @param nPCs * number of desired principal components * @return the error * @throws Exception */ double runSequential_JacobVersion(Configuration conf, Path input, Path output, final int nRows, final int nCols, final int nPCs) throws Exception { Matrix centralY = new DenseMatrix(nRows, nCols); FileSystem fs = FileSystem.get(input.toUri(), conf); if (fs.listStatus(input).length == 0) { System.err.println("No file under " + input); return 0; } int row = 0; for (VectorWritable vw : new SequenceFileDirValueIterable<VectorWritable>(input, PathType.LIST, null, conf)) { centralY.assignRow(row, vw.get()); row++; } Matrix C = PCACommon.randomMatrix(nCols, nPCs); double ss = PCACommon.randSS(); InitialValues initVal = new InitialValues(C, ss); double error = runSequential_JacobVersion(conf, centralY, initVal, 100); return error; }