List of usage examples for org.apache.mahout.common.iterator FixedSizeSamplingIterator FixedSizeSamplingIterator
public FixedSizeSamplingIterator(int size, Iterator<T> source)
From source file:Vectors.java
License:Apache License
public static Vector maybeSample(Vector original, int sampleSize) { if (original.getNumNondefaultElements() <= sampleSize) { return original; }//from w w w . j a v a 2 s . c o m Vector sample = original.like(); Iterator<Vector.Element> sampledElements = new FixedSizeSamplingIterator<Vector.Element>(sampleSize, original.iterateNonZero()); while (sampledElements.hasNext()) { Vector.Element elem = sampledElements.next(); sample.setQuick(elem.index(), elem.get()); } return sample; }
From source file:de.tuberlin.dima.recsys.ssnmm.interactioncut.InteractionCutDataModelBuilder.java
License:Apache License
@Override public DataModel buildDataModel(FastByIDMap<PreferenceArray> trainingData) { FastByIDMap<PreferenceArray> sampledTrainingData = new FastByIDMap<PreferenceArray>(); LongPrimitiveIterator userIDs = trainingData.keySetIterator(); while (userIDs.hasNext()) { long userID = userIDs.nextLong(); PreferenceArray prefs = trainingData.get(userID); if (prefs.length() > maxPrefsPerUser) { Preference[] sampledPrefs = Iterators.toArray( new FixedSizeSamplingIterator<Preference>(maxPrefsPerUser, prefs.iterator()), Preference.class); sampledTrainingData.put(userID, new GenericUserPreferenceArray(Arrays.asList(sampledPrefs))); } else {/*from ww w .j av a 2s .c om*/ sampledTrainingData.put(userID, prefs); } } return new GenericDataModel(sampledTrainingData); }
From source file:io.ssc.relationdiscovery.KMeans.java
License:Open Source License
public KMeans(Matrix A, int k, DistanceMeasure distanceMeasure) { this.A = A;/*from www. jav a 2 s. c o m*/ this.k = k; this.distanceMeasure = distanceMeasure; centroids = new Centroid[k]; log.info("Picking {} initial centroids", k); Iterator<MatrixSlice> samples = new FixedSizeSamplingIterator<MatrixSlice>(k, A.iterator()); int index = 0; while (samples.hasNext()) { centroids[index] = new Centroid(index, samples.next().vector()); index++; } }
From source file:org.gpfvic.mahout.cf.taste.impl.recommender.SamplingCandidateItemsStrategy.java
License:Apache License
@Override protected FastIDSet doGetCandidateItems(long[] preferredItemIDs, DataModel dataModel, boolean includeKnownItems) throws TasteException { LongPrimitiveIterator preferredItemIDsIterator = new LongPrimitiveArrayIterator(preferredItemIDs); if (preferredItemIDs.length > maxItems) { double samplingRate = (double) maxItems / preferredItemIDs.length; // log.info("preferredItemIDs.length {}, samplingRate {}", preferredItemIDs.length, samplingRate); preferredItemIDsIterator = new SamplingLongPrimitiveIterator(preferredItemIDsIterator, samplingRate); }/*from www . java 2 s. c om*/ FastIDSet possibleItemsIDs = new FastIDSet(); while (preferredItemIDsIterator.hasNext()) { long itemID = preferredItemIDsIterator.nextLong(); PreferenceArray prefs = dataModel.getPreferencesForItem(itemID); int prefsLength = prefs.length(); if (prefsLength > maxUsersPerItem) { Iterator<Preference> sampledPrefs = new FixedSizeSamplingIterator<>(maxUsersPerItem, prefs.iterator()); while (sampledPrefs.hasNext()) { addSomeOf(possibleItemsIDs, dataModel.getItemIDsFromUser(sampledPrefs.next().getUserID())); } } else { for (int i = 0; i < prefsLength; i++) { addSomeOf(possibleItemsIDs, dataModel.getItemIDsFromUser(prefs.getUserID(i))); } } } if (!includeKnownItems) { possibleItemsIDs.removeAll(preferredItemIDs); } return possibleItemsIDs; }