List of usage examples for org.apache.mahout.math.function Functions ABS
DoubleFunction ABS
To view the source code for org.apache.mahout.math.function Functions ABS.
Click Source Link
From source file:com.mapr.stats.GammaNormalDistributionTest.java
License:Apache License
@Test public void testEstimation() { final Random gen = new Random(1); GammaNormalDistribution gnd = new GammaNormalDistribution(0, 1, 1, gen); for (int i = 0; i < 10000; i++) { gnd.add(gen.nextGaussian() * 2 + 1); }/*w ww.j a v a 2 s . c om*/ assertEquals(1.0, gnd.nextMean(), 0.05); assertEquals(2.0, gnd.nextSD(), 0.1); double[] x = new double[10000]; double[] y = new double[10000]; double[] z = new double[10000]; AbstractContinousDistribution dist = gnd.posteriorDistribution(); for (int i = 0; i < 10000; i++) { x[i] = gnd.nextDouble(); y[i] = dist.nextDouble(); z[i] = gen.nextGaussian() * 2 + 1; } Arrays.sort(x); Arrays.sort(y); Arrays.sort(z); final Vector xv = new DenseVector(x).viewPart(1000, 8000); final Vector yv = new DenseVector(y).viewPart(1000, 8000); final Vector zv = new DenseVector(z).viewPart(1000, 8000); final double diffX = xv.minus(zv).assign(Functions.ABS).maxValue(); final double diffY = yv.minus(zv).assign(Functions.ABS).maxValue(); assertEquals(0, diffX, 0.13); assertEquals(0, diffY, 0.13); }
From source file:com.memonews.mahout.sentiment.SGDHelper.java
License:Apache License
static void analyzeState(final SGDInfo info, final int leakType, final int k, final State<AdaptiveLogisticRegression.Wrapper, CrossFoldLearner> best) throws IOException { final int bump = info.getBumps()[(int) Math.floor(info.getStep()) % info.getBumps().length]; final int scale = (int) Math.pow(10, Math.floor(info.getStep() / info.getBumps().length)); double maxBeta; double nonZeros; double positive; double norm;// ww w . j a va 2s . com double lambda = 0; double mu = 0; if (best != null) { final CrossFoldLearner state = best.getPayload().getLearner(); info.setAverageCorrect(state.percentCorrect()); info.setAverageLL(state.logLikelihood()); final OnlineLogisticRegression model = state.getModels().get(0); // finish off pending regularization model.close(); final Matrix beta = model.getBeta(); maxBeta = beta.aggregate(Functions.MAX, Functions.ABS); nonZeros = beta.aggregate(Functions.PLUS, new DoubleFunction() { @Override public double apply(final double v) { return Math.abs(v) > 1.0e-6 ? 1 : 0; } }); positive = beta.aggregate(Functions.PLUS, new DoubleFunction() { @Override public double apply(final double v) { return v > 0 ? 1 : 0; } }); norm = beta.aggregate(Functions.PLUS, Functions.ABS); lambda = best.getMappedParams()[0]; mu = best.getMappedParams()[1]; } else { maxBeta = 0; nonZeros = 0; positive = 0; norm = 0; } if (k % (bump * scale) == 0) { if (best != null) { ModelSerializer.writeBinary("/tmp/news-group-" + k + ".model", best.getPayload().getLearner().getModels().get(0)); } info.setStep(info.getStep() + 0.25); System.out.printf("%.2f\t%.2f\t%.2f\t%.2f\t%.8g\t%.8g\t", maxBeta, nonZeros, positive, norm, lambda, mu); System.out.printf("%d\t%.3f\t%.2f\t%s\n", k, info.getAverageLL(), info.getAverageCorrect() * 100, LEAK_LABELS[leakType % 3]); } }
From source file:com.tdunning.ch16.train.TrainNewsGroups.java
License:Apache License
public static void main(String[] args) throws IOException { File base = new File(args[0]); int leakType = 0; if (args.length > 1) { leakType = Integer.parseInt(args[1]); }//from w ww . ja v a2s . c o m Dictionary newsGroups = new Dictionary(); encoder.setProbes(2); AdaptiveLogisticRegression learningAlgorithm = new AdaptiveLogisticRegression(20, FEATURES, new L1()); learningAlgorithm.setInterval(800); learningAlgorithm.setAveragingWindow(500); List<File> files = Lists.newArrayList(); File[] directories = base.listFiles(); Arrays.sort(directories, Ordering.usingToString()); for (File newsgroup : directories) { if (newsgroup.isDirectory()) { newsGroups.intern(newsgroup.getName()); files.addAll(Arrays.asList(newsgroup.listFiles())); } } Collections.shuffle(files); System.out.printf("%d training files\n", files.size()); System.out.printf("%s\n", Arrays.asList(directories)); double averageLL = 0; double averageCorrect = 0; int k = 0; double step = 0; int[] bumps = { 1, 2, 5 }; for (File file : files) { String ng = file.getParentFile().getName(); int actual = newsGroups.intern(ng); Vector v = encodeFeatureVector(file); learningAlgorithm.train(actual, v); k++; int bump = bumps[(int) Math.floor(step) % bumps.length]; int scale = (int) Math.pow(10, Math.floor(step / bumps.length)); State<AdaptiveLogisticRegression.Wrapper, CrossFoldLearner> best = learningAlgorithm.getBest(); double maxBeta; double nonZeros; double positive; double norm; double lambda = 0; double mu = 0; if (best != null) { CrossFoldLearner state = best.getPayload().getLearner(); averageCorrect = state.percentCorrect(); averageLL = state.logLikelihood(); OnlineLogisticRegression model = state.getModels().get(0); // finish off pending regularization model.close(); Matrix beta = model.getBeta(); maxBeta = beta.aggregate(Functions.MAX, Functions.ABS); nonZeros = beta.aggregate(Functions.PLUS, new DoubleFunction() { @Override public double apply(double v) { return Math.abs(v) > 1.0e-6 ? 1 : 0; } }); positive = beta.aggregate(Functions.PLUS, new DoubleFunction() { @Override public double apply(double v) { return v > 0 ? 1 : 0; } }); norm = beta.aggregate(Functions.PLUS, Functions.ABS); lambda = learningAlgorithm.getBest().getMappedParams()[0]; mu = learningAlgorithm.getBest().getMappedParams()[1]; } else { maxBeta = 0; nonZeros = 0; positive = 0; norm = 0; } if (k % (bump * scale) == 0) { if (learningAlgorithm.getBest() != null) { ModelSerializer.writeBinary("/tmp/news-group-" + k + ".model", learningAlgorithm.getBest().getPayload().getLearner().getModels().get(0)); } step += 0.25; System.out.printf("%.2f\t%.2f\t%.2f\t%.2f\t%.8g\t%.8g\t", maxBeta, nonZeros, positive, norm, lambda, mu); System.out.printf("%d\t%.3f\t%.2f\t%s\n", k, averageLL, averageCorrect * 100, LEAK_LABELS[leakType % 3]); } } learningAlgorithm.close(); dissect(newsGroups, learningAlgorithm, files); System.out.println("exiting main"); ModelSerializer.writeBinary("/tmp/news-group.model", learningAlgorithm.getBest().getPayload().getLearner().getModels().get(0)); }
From source file:org.carrot2.clustering.lingo.SimpleLabelAssigner.java
License:Open Source License
public void assignLabels(LingoProcessingContext context, DoubleMatrix2D stemCos, IntIntOpenHashMap filteredRowToStemIndex, DoubleMatrix2D phraseCos) { final PreprocessingContext preprocessingContext = context.preprocessingContext; final int firstPhraseIndex = preprocessingContext.allLabels.firstPhraseIndex; final int[] labelsFeatureIndex = preprocessingContext.allLabels.featureIndex; final int[] mostFrequentOriginalWordIndex = preprocessingContext.allStems.mostFrequentOriginalWordIndex; final int desiredClusterCount = stemCos.columns(); int[] candidateStemIndices = new int[desiredClusterCount]; double[] candidateStemScores = new double[desiredClusterCount]; int[] candidatePhraseIndices = new int[desiredClusterCount]; Arrays.fill(candidatePhraseIndices, -1); double[] candidatePhraseScores = new double[desiredClusterCount]; MatrixUtils.maxInColumns(stemCos, candidateStemIndices, candidateStemScores, Functions.ABS); if (phraseCos != null) { MatrixUtils.maxInColumns(phraseCos, candidatePhraseIndices, candidatePhraseScores, Functions.ABS); }//from w ww .j a v a2s. co m // Choose between single words and phrases for each base vector final int[] clusterLabelFeatureIndex = new int[desiredClusterCount]; double[] clusterLabelScore = new double[desiredClusterCount]; for (int i = 0; i < desiredClusterCount; i++) { final int phraseFeatureIndex = candidatePhraseIndices[i]; final int stemIndex = filteredRowToStemIndex.get(candidateStemIndices[i]); final double phraseScore = candidatePhraseScores[i]; if (phraseFeatureIndex >= 0 && phraseScore > candidateStemScores[i]) { clusterLabelFeatureIndex[i] = labelsFeatureIndex[phraseFeatureIndex + firstPhraseIndex]; clusterLabelScore[i] = phraseScore; } else { clusterLabelFeatureIndex[i] = mostFrequentOriginalWordIndex[stemIndex]; clusterLabelScore[i] = candidateStemScores[i]; } } context.clusterLabelFeatureIndex = clusterLabelFeatureIndex; context.clusterLabelScore = clusterLabelScore; }