List of usage examples for org.apache.mahout.math SparseRowMatrix SparseRowMatrix
public SparseRowMatrix(int rows, int columns)
From source file:com.elex.dmp.lda.CVB0DocInferenceMapper.java
License:Apache License
@Override public void map(Text docId, VectorWritable doc, Context context) throws IOException, InterruptedException { int numTopics = getNumTopics(); Vector docTopics = new DenseVector(new double[numTopics]).assign(1.0 / numTopics); Matrix docModel = new SparseRowMatrix(numTopics, doc.get().size()); int maxIters = getMaxIters(); ModelTrainer modelTrainer = getModelTrainer(); for (int i = 0; i < maxIters; i++) { modelTrainer.getReadModel().trainDocTopicModel(doc.get(), docTopics, docModel); }//w ww . j av a 2 s . c om context.write(docId, new VectorWritable(docTopics)); }
From source file:com.netease.news.classifier.naivebayes.NaiveBayesModel.java
License:Apache License
public static NaiveBayesModel materialize(Path output, Configuration conf) throws IOException { FileSystem fs = output.getFileSystem(conf); Vector weightsPerLabel = null; Vector perLabelThetaNormalizer = null; Vector weightsPerFeature = null; Matrix weightsPerLabelAndFeature;//w ww .j ava 2 s . c o m float alphaI; FSDataInputStream in = fs.open(new Path(output, "naiveBayesModel.bin")); try { alphaI = in.readFloat(); weightsPerFeature = VectorWritable.readVector(in); weightsPerLabel = new DenseVector(VectorWritable.readVector(in)); perLabelThetaNormalizer = new DenseVector(VectorWritable.readVector(in)); weightsPerLabelAndFeature = new SparseRowMatrix(weightsPerLabel.size(), weightsPerFeature.size()); for (int label = 0; label < weightsPerLabelAndFeature.numRows(); label++) { weightsPerLabelAndFeature.assignRow(label, VectorWritable.readVector(in)); } } finally { Closeables.close(in, true); } NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel, perLabelThetaNormalizer, alphaI); model.validate(); return model; }
From source file:com.netease.news.classifier.naivebayes.NaiveBayesModel.java
License:Apache License
public static NaiveBayesModel materializeLocal(String modelfile) throws IOException { Vector weightsPerLabel = null; Vector perLabelThetaNormalizer = null; Vector weightsPerFeature = null; Matrix weightsPerLabelAndFeature;//from w w w .j a v a 2 s. c o m float alphaI; System.out.println(modelfile); ClassLoader loader = NaiveBayesModel.class.getClassLoader(); InputStream sin = loader.getResourceAsStream(modelfile); DataInputStream in = new DataInputStream(sin); try { alphaI = in.readFloat(); weightsPerFeature = VectorWritable.readVector(in); weightsPerLabel = new DenseVector(VectorWritable.readVector(in)); perLabelThetaNormalizer = new DenseVector(VectorWritable.readVector(in)); weightsPerLabelAndFeature = new SparseRowMatrix(weightsPerLabel.size(), weightsPerFeature.size()); for (int label = 0; label < weightsPerLabelAndFeature.numRows(); label++) { weightsPerLabelAndFeature.assignRow(label, VectorWritable.readVector(in)); } } finally { in.close(); } NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel, perLabelThetaNormalizer, alphaI); model.validate(); return model; }
From source file:de.tuberlin.dima.recsys.ssnmm.ratingprediction.Evaluate.java
License:Apache License
public static void main(String[] args) throws IOException { int numUsers = 1823179; int numItems = 136736; double mu = 3.157255412010664; String distributedSimilarityMatrixPath = "/home/ssc/Desktop/yahoo/similarityMatrix/"; String itemBiasesFilePath = "/home/ssc/Desktop/yahoo/itemBiases.tsv"; String userBiasesFilePath = "/home/ssc/Desktop/yahoo/userBiases.tsv"; String trainingSetPath = "/home/ssc/Entwicklung/datasets/yahoo-songs/songs.tsv"; String holdoutSetPath = "home/ssc/Entwicklung/datasets/yahoo-songs/holdout.tsv"; Matrix similarities = new SparseRowMatrix(numItems, numItems); System.out.println("Reading similarities..."); int similaritiesRead = 0; Configuration conf = new Configuration(); for (Pair<IntWritable, VectorWritable> pair : new SequenceFileDirIterable<IntWritable, VectorWritable>( new Path(distributedSimilarityMatrixPath), PathType.LIST, PathFilters.partFilter(), conf)) { int item = pair.getFirst().get(); Iterator<Vector.Element> elements = pair.getSecond().get().iterateNonZero(); while (elements.hasNext()) { Vector.Element elem = elements.next(); similarities.setQuick(item, elem.index(), elem.get()); similaritiesRead++;/*from w w w . j a v a 2 s .c o m*/ } } System.out.println("Found " + similaritiesRead + " similarities"); Pattern sep = Pattern.compile("\t"); double[] itemBiases = new double[numItems]; double[] userBiases = new double[numUsers]; System.out.println("Reading item biases"); for (String line : new FileLineIterable(new File(itemBiasesFilePath))) { String[] parts = sep.split(line); itemBiases[Integer.parseInt(parts[0])] = Double.parseDouble(parts[1]); } System.out.println("Reading user biases"); for (String line : new FileLineIterable(new File(userBiasesFilePath))) { String[] parts = sep.split(line); userBiases[Integer.parseInt(parts[0])] = Double.parseDouble(parts[1]); } Iterator<Rating> trainRatings = new RatingsIterable(new File(trainingSetPath)).iterator(); Iterator<Rating> heldOutRatings = new RatingsIterable(new File(holdoutSetPath)).iterator(); int currentUser = 0; OpenIntDoubleHashMap prefs = new OpenIntDoubleHashMap(); int usersProcessed = 0; RunningAverage rmse = new FullRunningAverage(); RunningAverage mae = new FullRunningAverage(); RunningAverage rmseBase = new FullRunningAverage(); RunningAverage maeBase = new FullRunningAverage(); while (trainRatings.hasNext()) { Rating rating = trainRatings.next(); if (rating.user() != currentUser) { for (int n = 0; n < 10; n++) { Rating heldOutRating = heldOutRatings.next(); Preconditions.checkState(heldOutRating.user() == currentUser); double preference = 0.0; double totalSimilarity = 0.0; int count = 0; Iterator<Vector.Element> similarItems = similarities.viewRow(heldOutRating.item()) .iterateNonZero(); while (similarItems.hasNext()) { Vector.Element similarity = similarItems.next(); int similarItem = similarity.index(); if (prefs.containsKey(similarItem)) { preference += similarity.get() * (prefs.get(similarItem) - (mu + userBiases[currentUser] + itemBiases[similarItem])); totalSimilarity += Math.abs(similarity.get()); count++; } } double baselineEstimate = mu + userBiases[currentUser] + itemBiases[heldOutRating.item()]; double estimate = baselineEstimate; if (count > 1) { estimate += preference / totalSimilarity; } double baseError = Math.abs(heldOutRating.rating() - baselineEstimate); maeBase.addDatum(baseError); rmseBase.addDatum(baseError * baseError); double error = Math.abs(heldOutRating.rating() - estimate); mae.addDatum(error); rmse.addDatum(error * error); } if (++usersProcessed % 10000 == 0) { System.out.println(usersProcessed + " users processed, MAE " + mae.getAverage() + ", RMSE " + Math.sqrt(rmse.getAverage()) + " | baseline MAE " + maeBase.getAverage() + ", baseline RMSE " + Math.sqrt(rmseBase.getAverage())); } currentUser = rating.user(); prefs.clear(); } prefs.put(rating.item(), rating.rating()); } System.out.println(usersProcessed + " users processed, MAE " + mae.getAverage() + ", RMSE " + Math.sqrt(rmse.getAverage()) + " | baseline MAE " + maeBase.getAverage() + ", baseline RMSE " + Math.sqrt(rmseBase.getAverage())); }
From source file:io.ssc.relationdiscovery.SVD.java
License:Open Source License
public Matrix projectRowsOntoFeatureSpace() { SparseRowMatrix projection = new SparseRowMatrix(A.numRows(), rank); for (int patternIndex = 0; patternIndex < A.numRows(); patternIndex++) { Vector patternOccurrences = A.viewRow(patternIndex); for (int r = 0; r < rank; r++) { WeightedVector singularVector = singularVectors.get(r); double weight = singularVector.getWeight() * patternOccurrences.dot(singularVector); projection.setQuick(patternIndex, r, weight); }//w w w . ja va 2 s . c o m } return projection; }
From source file:io.ssc.relationdiscovery.Utils.java
License:Open Source License
public static Matrix loadOccurrences(File occurrences, int numRows, int numColumns) throws IOException { Matrix A = new SparseRowMatrix(numRows, numColumns); Pattern splitter = Pattern.compile("\t"); Pattern splitter2 = Pattern.compile(":"); for (String line : new FileLineIterable(occurrences)) { String[] parts = splitter.split(line); if (parts.length > 1) { int entityIndex = Integer.parseInt(parts[0]); for (int index = 1; index < parts.length; index++) { String[] tokens = splitter2.split(parts[index]); int patternIndex = Integer.parseInt(tokens[0]); double value = Double.parseDouble(tokens[1]); A.setQuick(patternIndex - 1, entityIndex - 1, value); }/*w w w . j ava 2 s . c o m*/ } } return A; }
From source file:org.pigml.classify.naivebayes.NaiveBayesModel.java
License:Apache License
public static NaiveBayesModel materialize(Path modelDir, Configuration conf) throws IOException { OpenIntDoubleHashMap weightsPerLabel = new OpenIntDoubleHashMap(); OpenIntDoubleHashMap weightsPerFeature = new OpenIntDoubleHashMap(); SequenceFileDirIterable<IntWritable, DoubleWritable> kvs; kvs = new SequenceFileDirIterable<IntWritable, DoubleWritable>(new Path(modelDir, "label_weights"), PathType.LIST, PathFilters.logsCRCFilter(), conf); for (Pair<IntWritable, DoubleWritable> kv : kvs) { weightsPerLabel.put(kv.getFirst().get(), kv.getSecond().get()); }/*from w ww. j a v a 2s . c o m*/ kvs = new SequenceFileDirIterable<IntWritable, DoubleWritable>(new Path(modelDir, "feature_weights"), PathType.LIST, PathFilters.logsCRCFilter(), conf); for (Pair<IntWritable, DoubleWritable> kv : kvs) { weightsPerFeature.put(kv.getFirst().get(), kv.getSecond().get()); } Matrix weightsPerLabelAndFeature = null; SequenceFileDirIterable<IntWritable, VectorWritable> labelVectors = new SequenceFileDirIterable<IntWritable, VectorWritable>( new Path(modelDir, "label_feature_weights"), PathType.LIST, PathFilters.logsCRCFilter(), conf); for (Pair<IntWritable, VectorWritable> labelVector : labelVectors) { int label = labelVector.getFirst().get(); Vector vector = labelVector.getSecond().get(); if (weightsPerLabelAndFeature == null) { weightsPerLabelAndFeature = new SparseRowMatrix(weightsPerLabel.size(), vector.size()); } weightsPerLabelAndFeature.assignRow(label, vector); } // TODO alphaI is hard-coded to 1.0 // TODO perLabelThetaNormalizer is not supported yet NaiveBayesModel model = new NaiveBayesModel(weightsPerLabelAndFeature, weightsPerFeature, weightsPerLabel, 1.0f); model.validate(); return model; }