Example usage for org.apache.mahout.math.function Functions ABS

Introduction

In this page you can find the example usage for org.apache.mahout.math.function Functions ABS.

Prototype

DoubleFunction ABS

To view the source code for org.apache.mahout.math.function Functions ABS.

Click Source Link

Document

Function that returns Math.abs(a).

Usage

From source file:com.mapr.stats.GammaNormalDistributionTest.java

License:Apache License

@Test
public void testEstimation() {
    final Random gen = new Random(1);
    GammaNormalDistribution gnd = new GammaNormalDistribution(0, 1, 1, gen);

    for (int i = 0; i < 10000; i++) {
        gnd.add(gen.nextGaussian() * 2 + 1);
    }/*w ww.j a  v a  2 s  . c  om*/

    assertEquals(1.0, gnd.nextMean(), 0.05);
    assertEquals(2.0, gnd.nextSD(), 0.1);

    double[] x = new double[10000];
    double[] y = new double[10000];
    double[] z = new double[10000];
    AbstractContinousDistribution dist = gnd.posteriorDistribution();
    for (int i = 0; i < 10000; i++) {
        x[i] = gnd.nextDouble();
        y[i] = dist.nextDouble();
        z[i] = gen.nextGaussian() * 2 + 1;
    }

    Arrays.sort(x);
    Arrays.sort(y);
    Arrays.sort(z);

    final Vector xv = new DenseVector(x).viewPart(1000, 8000);
    final Vector yv = new DenseVector(y).viewPart(1000, 8000);
    final Vector zv = new DenseVector(z).viewPart(1000, 8000);
    final double diffX = xv.minus(zv).assign(Functions.ABS).maxValue();
    final double diffY = yv.minus(zv).assign(Functions.ABS).maxValue();
    assertEquals(0, diffX, 0.13);
    assertEquals(0, diffY, 0.13);
}

From source file:com.memonews.mahout.sentiment.SGDHelper.java

License:Apache License

static void analyzeState(final SGDInfo info, final int leakType, final int k,
        final State<AdaptiveLogisticRegression.Wrapper, CrossFoldLearner> best) throws IOException {
    final int bump = info.getBumps()[(int) Math.floor(info.getStep()) % info.getBumps().length];
    final int scale = (int) Math.pow(10, Math.floor(info.getStep() / info.getBumps().length));
    double maxBeta;
    double nonZeros;
    double positive;
    double norm;//  ww w  . j  a  va 2s  . com

    double lambda = 0;
    double mu = 0;

    if (best != null) {
        final CrossFoldLearner state = best.getPayload().getLearner();
        info.setAverageCorrect(state.percentCorrect());
        info.setAverageLL(state.logLikelihood());

        final OnlineLogisticRegression model = state.getModels().get(0);
        // finish off pending regularization
        model.close();

        final Matrix beta = model.getBeta();
        maxBeta = beta.aggregate(Functions.MAX, Functions.ABS);
        nonZeros = beta.aggregate(Functions.PLUS, new DoubleFunction() {
            @Override
            public double apply(final double v) {
                return Math.abs(v) > 1.0e-6 ? 1 : 0;
            }
        });
        positive = beta.aggregate(Functions.PLUS, new DoubleFunction() {
            @Override
            public double apply(final double v) {
                return v > 0 ? 1 : 0;
            }
        });
        norm = beta.aggregate(Functions.PLUS, Functions.ABS);

        lambda = best.getMappedParams()[0];
        mu = best.getMappedParams()[1];
    } else {
        maxBeta = 0;
        nonZeros = 0;
        positive = 0;
        norm = 0;
    }
    if (k % (bump * scale) == 0) {
        if (best != null) {
            ModelSerializer.writeBinary("/tmp/news-group-" + k + ".model",
                    best.getPayload().getLearner().getModels().get(0));
        }

        info.setStep(info.getStep() + 0.25);
        System.out.printf("%.2f\t%.2f\t%.2f\t%.2f\t%.8g\t%.8g\t", maxBeta, nonZeros, positive, norm, lambda,
                mu);
        System.out.printf("%d\t%.3f\t%.2f\t%s\n", k, info.getAverageLL(), info.getAverageCorrect() * 100,
                LEAK_LABELS[leakType % 3]);
    }
}

From source file:com.tdunning.ch16.train.TrainNewsGroups.java

License:Apache License

public static void main(String[] args) throws IOException {
    File base = new File(args[0]);

    int leakType = 0;
    if (args.length > 1) {
        leakType = Integer.parseInt(args[1]);
    }//from   w  ww .  ja v a2s .  c o m

    Dictionary newsGroups = new Dictionary();

    encoder.setProbes(2);
    AdaptiveLogisticRegression learningAlgorithm = new AdaptiveLogisticRegression(20, FEATURES, new L1());
    learningAlgorithm.setInterval(800);
    learningAlgorithm.setAveragingWindow(500);

    List<File> files = Lists.newArrayList();
    File[] directories = base.listFiles();
    Arrays.sort(directories, Ordering.usingToString());
    for (File newsgroup : directories) {
        if (newsgroup.isDirectory()) {
            newsGroups.intern(newsgroup.getName());
            files.addAll(Arrays.asList(newsgroup.listFiles()));
        }
    }
    Collections.shuffle(files);
    System.out.printf("%d training files\n", files.size());
    System.out.printf("%s\n", Arrays.asList(directories));

    double averageLL = 0;
    double averageCorrect = 0;

    int k = 0;
    double step = 0;
    int[] bumps = { 1, 2, 5 };
    for (File file : files) {
        String ng = file.getParentFile().getName();
        int actual = newsGroups.intern(ng);

        Vector v = encodeFeatureVector(file);
        learningAlgorithm.train(actual, v);

        k++;

        int bump = bumps[(int) Math.floor(step) % bumps.length];
        int scale = (int) Math.pow(10, Math.floor(step / bumps.length));
        State<AdaptiveLogisticRegression.Wrapper, CrossFoldLearner> best = learningAlgorithm.getBest();
        double maxBeta;
        double nonZeros;
        double positive;
        double norm;

        double lambda = 0;
        double mu = 0;

        if (best != null) {
            CrossFoldLearner state = best.getPayload().getLearner();
            averageCorrect = state.percentCorrect();
            averageLL = state.logLikelihood();

            OnlineLogisticRegression model = state.getModels().get(0);
            // finish off pending regularization
            model.close();

            Matrix beta = model.getBeta();
            maxBeta = beta.aggregate(Functions.MAX, Functions.ABS);
            nonZeros = beta.aggregate(Functions.PLUS, new DoubleFunction() {
                @Override
                public double apply(double v) {
                    return Math.abs(v) > 1.0e-6 ? 1 : 0;
                }
            });
            positive = beta.aggregate(Functions.PLUS, new DoubleFunction() {
                @Override
                public double apply(double v) {
                    return v > 0 ? 1 : 0;
                }
            });
            norm = beta.aggregate(Functions.PLUS, Functions.ABS);

            lambda = learningAlgorithm.getBest().getMappedParams()[0];
            mu = learningAlgorithm.getBest().getMappedParams()[1];
        } else {
            maxBeta = 0;
            nonZeros = 0;
            positive = 0;
            norm = 0;
        }
        if (k % (bump * scale) == 0) {
            if (learningAlgorithm.getBest() != null) {
                ModelSerializer.writeBinary("/tmp/news-group-" + k + ".model",
                        learningAlgorithm.getBest().getPayload().getLearner().getModels().get(0));
            }

            step += 0.25;
            System.out.printf("%.2f\t%.2f\t%.2f\t%.2f\t%.8g\t%.8g\t", maxBeta, nonZeros, positive, norm, lambda,
                    mu);
            System.out.printf("%d\t%.3f\t%.2f\t%s\n", k, averageLL, averageCorrect * 100,
                    LEAK_LABELS[leakType % 3]);
        }
    }
    learningAlgorithm.close();
    dissect(newsGroups, learningAlgorithm, files);
    System.out.println("exiting main");

    ModelSerializer.writeBinary("/tmp/news-group.model",
            learningAlgorithm.getBest().getPayload().getLearner().getModels().get(0));
}

From source file:org.carrot2.clustering.lingo.SimpleLabelAssigner.java

License:Open Source License

public void assignLabels(LingoProcessingContext context, DoubleMatrix2D stemCos,
        IntIntOpenHashMap filteredRowToStemIndex, DoubleMatrix2D phraseCos) {
    final PreprocessingContext preprocessingContext = context.preprocessingContext;
    final int firstPhraseIndex = preprocessingContext.allLabels.firstPhraseIndex;
    final int[] labelsFeatureIndex = preprocessingContext.allLabels.featureIndex;
    final int[] mostFrequentOriginalWordIndex = preprocessingContext.allStems.mostFrequentOriginalWordIndex;
    final int desiredClusterCount = stemCos.columns();

    int[] candidateStemIndices = new int[desiredClusterCount];
    double[] candidateStemScores = new double[desiredClusterCount];

    int[] candidatePhraseIndices = new int[desiredClusterCount];
    Arrays.fill(candidatePhraseIndices, -1);
    double[] candidatePhraseScores = new double[desiredClusterCount];

    MatrixUtils.maxInColumns(stemCos, candidateStemIndices, candidateStemScores, Functions.ABS);

    if (phraseCos != null) {
        MatrixUtils.maxInColumns(phraseCos, candidatePhraseIndices, candidatePhraseScores, Functions.ABS);
    }//from  w  ww .j a  v a2s.  co m

    // Choose between single words and phrases for each base vector
    final int[] clusterLabelFeatureIndex = new int[desiredClusterCount];
    double[] clusterLabelScore = new double[desiredClusterCount];
    for (int i = 0; i < desiredClusterCount; i++) {
        final int phraseFeatureIndex = candidatePhraseIndices[i];
        final int stemIndex = filteredRowToStemIndex.get(candidateStemIndices[i]);

        final double phraseScore = candidatePhraseScores[i];
        if (phraseFeatureIndex >= 0 && phraseScore > candidateStemScores[i]) {
            clusterLabelFeatureIndex[i] = labelsFeatureIndex[phraseFeatureIndex + firstPhraseIndex];
            clusterLabelScore[i] = phraseScore;
        } else {
            clusterLabelFeatureIndex[i] = mostFrequentOriginalWordIndex[stemIndex];
            clusterLabelScore[i] = candidateStemScores[i];
        }
    }

    context.clusterLabelFeatureIndex = clusterLabelFeatureIndex;
    context.clusterLabelScore = clusterLabelScore;
}