List of usage examples for org.apache.commons.math3.linear OpenMapRealVector OpenMapRealVector
public OpenMapRealVector(RealVector v)
From source file:ir.project.TFIDFBookVector.java
public TFIDFBookVector(int numTerms) { this.vector = new OpenMapRealVector(numTerms); this.title = ""; this.isbn = ""; this.author = ""; }
From source file:iitg.cs570.assign2.webgraph.DocVector.java
public DocVector(Map<String, Integer> terms) { this.terms = terms; this.vector = new OpenMapRealVector(terms.size()); }
From source file:ir.project.TFIDFBookVector.java
public TFIDFBookVector(int numTerms, String title, String isbn, String author) { // Uses a sparse vector this.vector = new OpenMapRealVector(numTerms); this.title = title; this.isbn = isbn; this.author = author; }
From source file:com.mothsoft.alexis.engine.textual.DocumentFeatures.java
public DocumentFeatures(final Document document, final DocumentFeatureContext context) { this.termVector = new OpenMapRealVector(Integer.MAX_VALUE); this.associationVector = new OpenMapRealVector(Integer.MAX_VALUE); this.nameVector = new OpenMapRealVector(Integer.MAX_VALUE); for (final DocumentAssociation association : document.getDocumentAssociations()) { final Integer id = context.getContextId(association); increment(associationVector, id, 1); }// w w w. ja v a 2 s . c o m for (final DocumentTerm documentTerm : document.getDocumentTerms()) { final Integer termId = context.getContextId(documentTerm.getTerm()); increment(termVector, termId, documentTerm.getCount()); } for (final DocumentNamedEntity entity : document.getNamedEntities()) { final Integer id = context.getContextId(entity); increment(nameVector, id, 1); } }
From source file:com.datumbox.framework.core.machinelearning.clustering.GaussianDPMMTest.java
/** * Test of predict method, of class GaussianDPMM. *///from w ww. j a va2 s. co m @Test public void testPredict() { logger.info("testPredict"); Configuration configuration = getConfiguration(); Dataframe[] data = Datasets.gaussianClusters(configuration); Dataframe trainingData = data[0]; Dataframe validationData = data[1]; String storageName = this.getClass().getSimpleName(); GaussianDPMM.TrainingParameters param = new GaussianDPMM.TrainingParameters(); param.setAlpha(0.01); param.setMaxIterations(100); param.setInitializationMethod(GaussianDPMM.TrainingParameters.Initialization.ONE_CLUSTER_PER_RECORD); param.setKappa0(0); param.setNu0(1); param.setMu0(new OpenMapRealVector(2)); param.setPsi0(MatrixUtils.createRealIdentityMatrix(2)); GaussianDPMM instance = MLBuilder.create(param, configuration); instance.fit(trainingData); instance.save(storageName); trainingData.close(); instance.close(); instance = MLBuilder.load(GaussianDPMM.class, storageName, configuration); instance.predict(validationData); ClusteringMetrics vm = new ClusteringMetrics(validationData); double expResult = 1.0; double result = vm.getPurity(); assertEquals(expResult, result, Constants.DOUBLE_ACCURACY_HIGH); instance.delete(); validationData.close(); }
From source file:com.datumbox.framework.core.machinelearning.clustering.GaussianDPMMTest.java
/** * Test of validate method, of class GaussianDPMM. *//*w ww . j a va 2 s .c om*/ @Test public void testKFoldCrossValidation() { logger.info("testKFoldCrossValidation"); Configuration configuration = getConfiguration(); int k = 5; Dataframe[] data = Datasets.gaussianClusters(configuration); Dataframe trainingData = data[0]; data[1].close(); GaussianDPMM.TrainingParameters param = new GaussianDPMM.TrainingParameters(); param.setAlpha(0.01); param.setMaxIterations(100); param.setInitializationMethod(GaussianDPMM.TrainingParameters.Initialization.ONE_CLUSTER_PER_RECORD); param.setKappa0(0); param.setNu0(1); param.setMu0(new OpenMapRealVector(2)); param.setPsi0(MatrixUtils.createRealIdentityMatrix(2)); ClusteringMetrics vm = new Validator<>(ClusteringMetrics.class, configuration) .validate(new KFoldSplitter(k).split(trainingData), param); System.out.println(vm); double expResult = 1.0; double result = vm.getPurity(); assertEquals(expResult, result, Constants.DOUBLE_ACCURACY_HIGH); trainingData.close(); }
From source file:com.datumbox.framework.core.machinelearning.regression.MatrixLinearRegression.java
/** {@inheritDoc} */ @Override/* w w w. jav a2 s . c o m*/ protected void _predict(Dataframe newData) { //read model params ModelParameters modelParameters = knowledgeBase.getModelParameters(); Map<Object, Double> thitas = modelParameters.getThitas(); Map<Object, Integer> featureIds = modelParameters.getFeatureIds(); int d = thitas.size(); RealVector coefficients = new OpenMapRealVector(d); for (Map.Entry<Object, Double> entry : thitas.entrySet()) { Integer featureId = featureIds.get(entry.getKey()); coefficients.setEntry(featureId, entry.getValue()); } Map<Integer, Integer> recordIdsReference = new HashMap<>(); //use a mapping between recordIds and rowIds in Matrix DataframeMatrix matrixDataset = DataframeMatrix.parseDataset(newData, recordIdsReference, featureIds); RealMatrix X = matrixDataset.getX(); RealVector Y = X.operate(coefficients); for (Map.Entry<Integer, Record> e : newData.entries()) { Integer rId = e.getKey(); Record r = e.getValue(); int rowId = recordIdsReference.get(rId); newData._unsafe_set(rId, new Record(r.getX(), r.getY(), Y.getEntry(rowId), r.getYPredictedProbabilities())); } //recordIdsReference = null; //matrixDataset = null; }
From source file:com.datumbox.framework.core.common.dataobjects.DataframeMatrix.java
/** * Parses a single Record and converts it to RealVector by using an already * existing mapping between feature names and column ids. * /*from w ww .j a va 2 s .com*/ * @param r * @param featureIdsReference * @return */ public static RealVector parseRecord(Record r, Map<Object, Integer> featureIdsReference) { if (featureIdsReference.isEmpty()) { throw new IllegalArgumentException("The featureIdsReference map should not be empty."); } int d = featureIdsReference.size(); //create an Map-backed vector only if we have available info about configuration. RealVector v = (storageEngine != null) ? new MapRealVector(d) : new OpenMapRealVector(d); boolean addConstantColumn = featureIdsReference.containsKey(Dataframe.COLUMN_NAME_CONSTANT); if (addConstantColumn) { v.setEntry(0, 1.0); //add the constant column } for (Map.Entry<Object, Object> entry : r.getX().entrySet()) { Object feature = entry.getKey(); Double value = TypeInference.toDouble(entry.getValue()); if (value != null) { Integer featureId = featureIdsReference.get(feature); if (featureId != null) {//if the feature exists v.setEntry(featureId, value); } } else { //else the X matrix maintains the 0.0 default value } } return v; }
From source file:com.datumbox.framework.core.machinelearning.featureselection.PCA.java
/** {@inheritDoc} */ @Override/*from ww w . java2 s .c o m*/ protected void _fit(Dataframe trainingData) { ModelParameters modelParameters = knowledgeBase.getModelParameters(); int n = trainingData.size(); int d = trainingData.xColumnSize(); //convert data into matrix Map<Object, Integer> featureIds = modelParameters.getFeatureIds(); DataframeMatrix matrixDataset = DataframeMatrix.newInstance(trainingData, false, null, featureIds); RealMatrix X = matrixDataset.getX(); //calculate means and subtract them from data RealVector meanValues = new OpenMapRealVector(d); for (Integer columnId : featureIds.values()) { double mean = 0.0; for (int row = 0; row < n; row++) { mean += X.getEntry(row, columnId); } mean /= n; for (int row = 0; row < n; row++) { X.addToEntry(row, columnId, -mean); } meanValues.setEntry(columnId, mean); } modelParameters.setMean(meanValues); //dxd matrix RealMatrix covarianceDD = (X.transpose().multiply(X)).scalarMultiply(1.0 / (n - 1.0)); EigenDecomposition decomposition = new EigenDecomposition(covarianceDD); RealVector eigenValues = new ArrayRealVector(decomposition.getRealEigenvalues(), false); RealMatrix components = decomposition.getV(); //Whiten Components W = U*L^0.5; To whiten them we multiply with L^0.5. if (knowledgeBase.getTrainingParameters().isWhitened()) { RealMatrix sqrtEigenValues = new DiagonalMatrix(d); for (int i = 0; i < d; i++) { sqrtEigenValues.setEntry(i, i, FastMath.sqrt(eigenValues.getEntry(i))); } components = components.multiply(sqrtEigenValues); } //the eigenvalues and their components are sorted by descending order no need to resort them Integer maxDimensions = knowledgeBase.getTrainingParameters().getMaxDimensions(); Double variancePercentageThreshold = knowledgeBase.getTrainingParameters().getVariancePercentageThreshold(); if (variancePercentageThreshold != null && variancePercentageThreshold <= 1) { double totalVariance = 0.0; for (int i = 0; i < d; i++) { totalVariance += eigenValues.getEntry(i); } double sum = 0.0; int varCounter = 0; for (int i = 0; i < d; i++) { sum += eigenValues.getEntry(i) / totalVariance; varCounter++; if (sum >= variancePercentageThreshold) { break; } } if (maxDimensions == null || maxDimensions > varCounter) { maxDimensions = varCounter; } } if (maxDimensions != null && maxDimensions < d) { //keep only the maximum selected eigenvalues eigenValues = eigenValues.getSubVector(0, maxDimensions); //keep only the maximum selected eigenvectors components = components.getSubMatrix(0, components.getRowDimension() - 1, 0, maxDimensions - 1); } modelParameters.setEigenValues(eigenValues); modelParameters.setComponents(components); }
From source file:edu.byu.nlp.dataset.BasicSparseFeatureVector.java
@Override public SparseRealVector asApacheSparseRealVector() { SparseRealVector retval = new OpenMapRealVector(length()); for (int i = 0; i < indices.length; i++) { retval.setEntry(indices[i], values[i]); }//from w ww. j a v a 2s . c o m return retval; }