Example usage for org.apache.commons.math3.linear OpenMapRealVector OpenMapRealVector

List of usage examples for org.apache.commons.math3.linear OpenMapRealVector OpenMapRealVector

Introduction

In this page you can find the example usage for org.apache.commons.math3.linear OpenMapRealVector OpenMapRealVector.

Prototype

public OpenMapRealVector(RealVector v) 

Source Link

Document

Generic copy constructor.

Usage

From source file:ir.project.TFIDFBookVector.java

public TFIDFBookVector(int numTerms) {
    this.vector = new OpenMapRealVector(numTerms);
    this.title = "";
    this.isbn = "";
    this.author = "";
}

From source file:iitg.cs570.assign2.webgraph.DocVector.java

public DocVector(Map<String, Integer> terms) {
    this.terms = terms;
    this.vector = new OpenMapRealVector(terms.size());
}

From source file:ir.project.TFIDFBookVector.java

public TFIDFBookVector(int numTerms, String title, String isbn, String author) {
    // Uses a sparse vector
    this.vector = new OpenMapRealVector(numTerms);
    this.title = title;
    this.isbn = isbn;
    this.author = author;
}

From source file:com.mothsoft.alexis.engine.textual.DocumentFeatures.java

public DocumentFeatures(final Document document, final DocumentFeatureContext context) {
    this.termVector = new OpenMapRealVector(Integer.MAX_VALUE);
    this.associationVector = new OpenMapRealVector(Integer.MAX_VALUE);
    this.nameVector = new OpenMapRealVector(Integer.MAX_VALUE);

    for (final DocumentAssociation association : document.getDocumentAssociations()) {
        final Integer id = context.getContextId(association);
        increment(associationVector, id, 1);
    }// w  w w.  ja  v a 2 s  . c  o  m

    for (final DocumentTerm documentTerm : document.getDocumentTerms()) {
        final Integer termId = context.getContextId(documentTerm.getTerm());
        increment(termVector, termId, documentTerm.getCount());
    }

    for (final DocumentNamedEntity entity : document.getNamedEntities()) {
        final Integer id = context.getContextId(entity);
        increment(nameVector, id, 1);
    }
}

From source file:com.datumbox.framework.core.machinelearning.clustering.GaussianDPMMTest.java

/**
 * Test of predict method, of class GaussianDPMM.
 *///from   w  ww.  j a  va2 s. co  m
@Test
public void testPredict() {
    logger.info("testPredict");

    Configuration configuration = getConfiguration();

    Dataframe[] data = Datasets.gaussianClusters(configuration);

    Dataframe trainingData = data[0];
    Dataframe validationData = data[1];

    String storageName = this.getClass().getSimpleName();

    GaussianDPMM.TrainingParameters param = new GaussianDPMM.TrainingParameters();
    param.setAlpha(0.01);
    param.setMaxIterations(100);
    param.setInitializationMethod(GaussianDPMM.TrainingParameters.Initialization.ONE_CLUSTER_PER_RECORD);
    param.setKappa0(0);
    param.setNu0(1);
    param.setMu0(new OpenMapRealVector(2));
    param.setPsi0(MatrixUtils.createRealIdentityMatrix(2));

    GaussianDPMM instance = MLBuilder.create(param, configuration);
    instance.fit(trainingData);
    instance.save(storageName);

    trainingData.close();
    instance.close();

    instance = MLBuilder.load(GaussianDPMM.class, storageName, configuration);

    instance.predict(validationData);
    ClusteringMetrics vm = new ClusteringMetrics(validationData);

    double expResult = 1.0;
    double result = vm.getPurity();
    assertEquals(expResult, result, Constants.DOUBLE_ACCURACY_HIGH);

    instance.delete();

    validationData.close();
}

From source file:com.datumbox.framework.core.machinelearning.clustering.GaussianDPMMTest.java

/**
 * Test of validate method, of class GaussianDPMM.
 *//*w  ww .  j a  va 2  s  .c om*/
@Test
public void testKFoldCrossValidation() {
    logger.info("testKFoldCrossValidation");

    Configuration configuration = getConfiguration();

    int k = 5;

    Dataframe[] data = Datasets.gaussianClusters(configuration);
    Dataframe trainingData = data[0];
    data[1].close();

    GaussianDPMM.TrainingParameters param = new GaussianDPMM.TrainingParameters();
    param.setAlpha(0.01);
    param.setMaxIterations(100);
    param.setInitializationMethod(GaussianDPMM.TrainingParameters.Initialization.ONE_CLUSTER_PER_RECORD);
    param.setKappa0(0);
    param.setNu0(1);
    param.setMu0(new OpenMapRealVector(2));
    param.setPsi0(MatrixUtils.createRealIdentityMatrix(2));

    ClusteringMetrics vm = new Validator<>(ClusteringMetrics.class, configuration)
            .validate(new KFoldSplitter(k).split(trainingData), param);
    System.out.println(vm);

    double expResult = 1.0;
    double result = vm.getPurity();
    assertEquals(expResult, result, Constants.DOUBLE_ACCURACY_HIGH);

    trainingData.close();
}

From source file:com.datumbox.framework.core.machinelearning.regression.MatrixLinearRegression.java

/** {@inheritDoc} */
@Override/*  w  w w.  jav  a2  s . c o  m*/
protected void _predict(Dataframe newData) {
    //read model params
    ModelParameters modelParameters = knowledgeBase.getModelParameters();

    Map<Object, Double> thitas = modelParameters.getThitas();
    Map<Object, Integer> featureIds = modelParameters.getFeatureIds();

    int d = thitas.size();

    RealVector coefficients = new OpenMapRealVector(d);
    for (Map.Entry<Object, Double> entry : thitas.entrySet()) {
        Integer featureId = featureIds.get(entry.getKey());
        coefficients.setEntry(featureId, entry.getValue());
    }

    Map<Integer, Integer> recordIdsReference = new HashMap<>(); //use a mapping between recordIds and rowIds in Matrix
    DataframeMatrix matrixDataset = DataframeMatrix.parseDataset(newData, recordIdsReference, featureIds);

    RealMatrix X = matrixDataset.getX();

    RealVector Y = X.operate(coefficients);
    for (Map.Entry<Integer, Record> e : newData.entries()) {
        Integer rId = e.getKey();
        Record r = e.getValue();
        int rowId = recordIdsReference.get(rId);
        newData._unsafe_set(rId,
                new Record(r.getX(), r.getY(), Y.getEntry(rowId), r.getYPredictedProbabilities()));
    }

    //recordIdsReference = null;
    //matrixDataset = null;
}

From source file:com.datumbox.framework.core.common.dataobjects.DataframeMatrix.java

/**
 * Parses a single Record and converts it to RealVector by using an already
 * existing mapping between feature names and column ids. 
 * /*from   w ww  .j  a va 2  s .com*/
 * @param r
 * @param featureIdsReference
 * @return 
 */
public static RealVector parseRecord(Record r, Map<Object, Integer> featureIdsReference) {
    if (featureIdsReference.isEmpty()) {
        throw new IllegalArgumentException("The featureIdsReference map should not be empty.");
    }

    int d = featureIdsReference.size();

    //create an Map-backed vector only if we have available info about configuration.
    RealVector v = (storageEngine != null) ? new MapRealVector(d) : new OpenMapRealVector(d);

    boolean addConstantColumn = featureIdsReference.containsKey(Dataframe.COLUMN_NAME_CONSTANT);

    if (addConstantColumn) {
        v.setEntry(0, 1.0); //add the constant column
    }
    for (Map.Entry<Object, Object> entry : r.getX().entrySet()) {
        Object feature = entry.getKey();
        Double value = TypeInference.toDouble(entry.getValue());
        if (value != null) {
            Integer featureId = featureIdsReference.get(feature);
            if (featureId != null) {//if the feature exists
                v.setEntry(featureId, value);
            }
        } else {
            //else the X matrix maintains the 0.0 default value
        }
    }

    return v;
}

From source file:com.datumbox.framework.core.machinelearning.featureselection.PCA.java

/** {@inheritDoc} */
@Override/*from ww w  .  java2  s  .c  o m*/
protected void _fit(Dataframe trainingData) {
    ModelParameters modelParameters = knowledgeBase.getModelParameters();

    int n = trainingData.size();
    int d = trainingData.xColumnSize();

    //convert data into matrix
    Map<Object, Integer> featureIds = modelParameters.getFeatureIds();
    DataframeMatrix matrixDataset = DataframeMatrix.newInstance(trainingData, false, null, featureIds);
    RealMatrix X = matrixDataset.getX();

    //calculate means and subtract them from data
    RealVector meanValues = new OpenMapRealVector(d);
    for (Integer columnId : featureIds.values()) {
        double mean = 0.0;
        for (int row = 0; row < n; row++) {
            mean += X.getEntry(row, columnId);
        }
        mean /= n;

        for (int row = 0; row < n; row++) {
            X.addToEntry(row, columnId, -mean);
        }

        meanValues.setEntry(columnId, mean);
    }
    modelParameters.setMean(meanValues);

    //dxd matrix
    RealMatrix covarianceDD = (X.transpose().multiply(X)).scalarMultiply(1.0 / (n - 1.0));

    EigenDecomposition decomposition = new EigenDecomposition(covarianceDD);
    RealVector eigenValues = new ArrayRealVector(decomposition.getRealEigenvalues(), false);

    RealMatrix components = decomposition.getV();

    //Whiten Components W = U*L^0.5; To whiten them we multiply with L^0.5.
    if (knowledgeBase.getTrainingParameters().isWhitened()) {

        RealMatrix sqrtEigenValues = new DiagonalMatrix(d);
        for (int i = 0; i < d; i++) {
            sqrtEigenValues.setEntry(i, i, FastMath.sqrt(eigenValues.getEntry(i)));
        }

        components = components.multiply(sqrtEigenValues);
    }

    //the eigenvalues and their components are sorted by descending order no need to resort them
    Integer maxDimensions = knowledgeBase.getTrainingParameters().getMaxDimensions();
    Double variancePercentageThreshold = knowledgeBase.getTrainingParameters().getVariancePercentageThreshold();
    if (variancePercentageThreshold != null && variancePercentageThreshold <= 1) {
        double totalVariance = 0.0;
        for (int i = 0; i < d; i++) {
            totalVariance += eigenValues.getEntry(i);
        }

        double sum = 0.0;
        int varCounter = 0;
        for (int i = 0; i < d; i++) {
            sum += eigenValues.getEntry(i) / totalVariance;
            varCounter++;
            if (sum >= variancePercentageThreshold) {
                break;
            }
        }

        if (maxDimensions == null || maxDimensions > varCounter) {
            maxDimensions = varCounter;
        }
    }

    if (maxDimensions != null && maxDimensions < d) {
        //keep only the maximum selected eigenvalues
        eigenValues = eigenValues.getSubVector(0, maxDimensions);

        //keep only the maximum selected eigenvectors
        components = components.getSubMatrix(0, components.getRowDimension() - 1, 0, maxDimensions - 1);
    }

    modelParameters.setEigenValues(eigenValues);
    modelParameters.setComponents(components);
}

From source file:edu.byu.nlp.dataset.BasicSparseFeatureVector.java

@Override
public SparseRealVector asApacheSparseRealVector() {
    SparseRealVector retval = new OpenMapRealVector(length());
    for (int i = 0; i < indices.length; i++) {
        retval.setEntry(indices[i], values[i]);
    }//from w ww.  j  a v a 2s . c o  m
    return retval;
}