List of usage examples for org.apache.commons.math3.stat.regression OLSMultipleLinearRegression newSampleData
public void newSampleData(double[] y, double[][] x) throws MathIllegalArgumentException
From source file:modelcreation.ModelCreation.java
/** * @param args the command line arguments *//*from ww w . j av a 2 s. co m*/ public static void main(String[] args) { int size = writeDataIntoFile(); double[][] x = new double[size][2]; double[] y = new double[size]; readDataFromFile(x, y); // TTest tTest = new TTest(); // System.out.println("p value for home value = " + tTest.tTest(x[0], y)); // System.out.println("p value for away value = " + tTest.tTest(x[1], y)); // System.out.println("Average mean squared error: " + apply10FoldCrossValidation(x, y)); // double[] predictions = new double[size]; // for (int i = 0; i < size; i++) { // predictions[i] = 0.5622255342802198 + (1.0682845275289186E-9 * x[i][0]) + (-9.24614306976538E-10 * x[i][1]); // // //System.out.print("Actual: " + y[i]); // //System.out.println(" Predicted: " + predicted); // } // // System.out.println(calculateMeanSquaredError(y, predictions)); OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression(); regression.newSampleData(y, x); regression.setNoIntercept(true); printRegressionStatistics(regression); //Team[] teams2014 = getTeams(354); //Team[] teams2015 = getTeams(398, 2015); //Team[] teams = concatTeams(teams2014, teams2015); // HashMap<Integer, ArrayList<Integer>> marketValueGoalsDataset = createMarketValueGoalsDataset(teams2014); // // SimpleRegression regression = new SimpleRegression(); // // Set<Integer> marketValues = marketValueGoalsDataset.keySet(); // for (Integer marketValue:marketValues) { // ArrayList<Integer> goals = marketValueGoalsDataset.get(marketValue); // int totalGoals = 0; // for(Integer goal:goals) { // regression.addData(marketValue, goal); // totalGoals += goal; // } // double avg = (double) totalGoals / goals.size(); // System.out.println("Team Value: " + marketValue + ", Goal Average: " + avg); // } // // System.out.println("Intercept: " + regression.getIntercept()); // System.out.println("Slope: " + regression.getSlope()); // System.out.println("R^2: " + regression.getRSquare()); //LinearRegression.calculateLinearRegression(marketValueGoalsDataset); }
From source file:dase.timeseries.analysis.GrangerTest.java
/** * Returns p-value for Granger causality test. * * @param y/*from ww w. j ava 2 s . c om*/ * - predictable variable * @param x * - predictor * @param L * - lag, should be 1 or greater. * @return p-value of Granger causality */ public static double granger(double[] y, double[] x, int L) { OLSMultipleLinearRegression h0 = new OLSMultipleLinearRegression(); OLSMultipleLinearRegression h1 = new OLSMultipleLinearRegression(); double[][] laggedY = createLaggedSide(L, y); double[][] laggedXY = createLaggedSide(L, x, y); int n = laggedY.length; h0.newSampleData(strip(L, y), laggedY); h1.newSampleData(strip(L, y), laggedXY); double rs0[] = h0.estimateResiduals(); double rs1[] = h1.estimateResiduals(); double RSS0 = sqrSum(rs0); double RSS1 = sqrSum(rs1); double ftest = ((RSS0 - RSS1) / L) / (RSS1 / (n - 2 * L - 1)); System.out.println(RSS0 + " " + RSS1); System.out.println("F-test " + ftest); FDistribution fDist = new FDistribution(L, n - 2 * L - 1); double pValue = 1.0 - fDist.cumulativeProbability(ftest); System.out.println("P-value " + pValue); return pValue; }
From source file:modelcreation.ModelCreation.java
public static double apply10FoldCrossValidation(double[][] x, double[] y) { int subSize = y.length / 10; ArrayList<Integer> indeces = new ArrayList(); for (int i = 0; i < y.length; i++) { indeces.add(i);/*from ww w . j a v a 2 s.co m*/ } Collections.shuffle(indeces); double[] meanSquaredErrors = new double[10]; int count = 0; for (int i = 0; i < 10; i++) { System.out.println("-------------Fold " + i + "--------------"); double[][] subXTest = new double[subSize][2]; double[] subYTest = new double[subSize]; double[][] subXTraining = new double[y.length - subSize][2]; double[] subYTraining = new double[y.length - subSize]; for (int j = 0; j < i * subSize; j++) { int index = indeces.get(count); count++; subXTraining[j][0] = x[index][0]; subXTraining[j][1] = x[index][1]; subYTraining[j] = y[index]; } for (int j = 0; j < subSize; j++) { int index = indeces.get(count); count++; subXTest[j][0] = x[index][0]; subXTest[j][1] = x[index][1]; subYTest[j] = y[index]; } for (int j = i * subSize; j < y.length - subSize; j++) { int index = indeces.get(count); count++; subXTraining[j][0] = x[index][0]; subXTraining[j][1] = x[index][1]; subYTraining[j] = y[index]; } count = 0; OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression(); regression.newSampleData(subYTraining, subXTraining); regression.setNoIntercept(true); meanSquaredErrors[i] = evaluateModel(regression, subXTest, subYTest); } double sum = 0; for (int i = 0; i < meanSquaredErrors.length; i++) { sum += meanSquaredErrors[i]; } return (double) sum / meanSquaredErrors.length; }
From source file:net.gtl.movieanalytics.model.LinearRegression.java
private double[] estimateParameter(double[][] x, double[] y) { //printTestData(x, y); OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression(); ols.newSampleData(y, x); return ols.estimateRegressionParameters(); }
From source file:com.insightml.models.regression.OLS.java
@Override public IModel<Sample, Double> train(final double[][] features, final double[] expected, final String[] featureNames) { final OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression(); regression.newSampleData(expected, features); return new LinearRegressionModel(regression.estimateRegressionParameters(), featureNames); }
From source file:lu.lippmann.cdb.lab.regression.Regression.java
/** * Constructor.// ww w .j a va 2 s .co m */ public Regression(final Instances ds, final int idx) throws Exception { this.newds = WekaDataProcessingUtil.buildDataSetSortedByAttribute(ds, idx); //System.out.println("Regression -> "+newds.toSummaryString()); final int N = this.newds.numInstances(); final int M = this.newds.numAttributes(); final double[][] x = new double[N][M - 1]; final double[] y = new double[N]; for (int i = 0; i < N; i++) { y[i] = this.newds.instance(i).value(0); } for (int i = 0; i < N; i++) { for (int j = 1; j < M; j++) { x[i][j - 1] = this.newds.instance(i).value(j); } } final OLSMultipleLinearRegression reg = new OLSMultipleLinearRegression(); //reg.setNoIntercept(true); reg.newSampleData(y, x); this.r2 = reg.calculateRSquared(); //this.r2=-1d; this.coe = reg.estimateRegressionParameters(); this.estims = calculateEstimations(x, y, coe); }
From source file:com.davidbracewell.ml.regression.LeastSquaresLearner.java
@Override protected void trainAll(List<Instance> trainingData) { OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression(); double[] y = new double[trainingData.size()]; double[][] x = new double[trainingData.size()][]; int i = 0;//from www . ja va2 s . c o m for (Instance datum : trainingData) { y[i] = datum.getTargetValue(); x[i] = datum.toArray(); i++; } regression.newSampleData(y, x); double[] params = regression.estimateRegressionParameters(); model.bias = params[0]; double[] weights = new double[params.length - 1]; System.arraycopy(params, 1, weights, 0, params.length - 1); model.weights = new DenseVector(weights); }
From source file:model.Modelo.java
public Equacao ajustarModelo(Local local, ArrayList<ArvoreAjuste> arvoresAjuste) throws Exception { DecimalFormat df4casas = new DecimalFormat("##,###,###,##0.0000"); JEP myParser = new JEP(); //http://www.singularsys.com/jep/doc/html/index.html myParser.addStandardFunctions();/*from www. ja v a2 s.c o m*/ myParser.addStandardConstants(); double resultadoTermo = 0.0; int qtdeVariaveis = 0; int idMetodoCalculo = 1; //Equao ArrayList<Termo> termos = getTermos(); // ArrayList<ArvoreAjuste> arvoresAjuste = new ArrayList<ArvoreAjuste>(); // arvoresAjuste = getArvoresAjuste(); double[] qtdeObs = new double[arvoresAjuste.size()]; double[] qtdeEst = new double[arvoresAjuste.size()]; int iArvoreAjuste = 0; double[][] valorEntrada = new double[arvoresAjuste.size()][termos.size()]; int iTermo = 0; for (ArvoreAjuste arvoreAjuste : arvoresAjuste) { for (Termo termo : termos) { for (VariavelArvoreAjuste variavelArvoreAjuste : arvoreAjuste.variaveisArvoreAjuste) { String sigla = variavelArvoreAjuste.getVariavel().getSigla(); Double valor = variavelArvoreAjuste.getValor(); myParser.addVariable(sigla, valor); } myParser.parseExpression(termo.getExpressao()); resultadoTermo = myParser.getValue(); valorEntrada[iArvoreAjuste][iTermo] = resultadoTermo; iTermo++; } qtdeObs[iArvoreAjuste] = arvoreAjuste.getQtdeObs(idVariavelInteresse); iArvoreAjuste++; iTermo = 0; } OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression(); regression.newSampleData(qtdeObs, valorEntrada); double[] valorCoeficiente = regression.estimateRegressionParameters(); //Monta equacao a partir dos coeficientes calculados String valorCoeficienteFormatado = df4casas.format(valorCoeficiente[0]); String expressaoEquacao = Double.toString(valorCoeficiente[0]); String expressaoEquacaoFormatada = valorCoeficienteFormatado; iTermo = 1; for (Termo termo : termos) { valorCoeficienteFormatado = df4casas.format(valorCoeficiente[iTermo]); if (valorCoeficiente[iTermo] < 0) { expressaoEquacao = expressaoEquacao + valorCoeficiente[iTermo] + "*" + termo.getExpressao(); expressaoEquacaoFormatada = expressaoEquacaoFormatada + valorCoeficienteFormatado + "*" + termo.getExpressao(); } else { expressaoEquacao = expressaoEquacao + "+" + valorCoeficiente[iTermo] + "*" + termo.getExpressao(); expressaoEquacaoFormatada = expressaoEquacaoFormatada + "+" + valorCoeficienteFormatado + "*" + termo.getExpressao(); } iTermo++; } //Cadastra equao ajustada Equacao equacao = new Equacao(); equacao.setIdModelo(this.id); equacao.setExpressao(expressaoEquacao); equacao.setExpressaoFormatada(expressaoEquacaoFormatada); equacao.setIdVariavelInteresse(idVariavelInteresse); ArrayList<Variavel> variaveisEquacao = new ArrayList<Variavel>(); variaveisEquacao = equacao.extraiVariaveis(); equacao.setVariaveisEquacao(variaveisEquacao); EquacaoDao equacaoDao = new EquacaoDao(); int idEquacao = equacaoDao.cadastrar(equacao); equacao.setId(idEquacao); EquacaoLocalDao equacaoLocalDao = new EquacaoLocalDao(); EquacaoLocal equacaoLocal = new EquacaoLocal(); equacaoLocal.setIdLocal(local.getId()); equacaoLocal.setIdEquacao(idEquacao); equacaoLocal.setIdVariavelInteresse(idVariavelInteresse); equacaoLocalDao.cadastrar(equacaoLocal); //Aplica equacaoModelo em todas as ArvoresAjuste para calcular valorEstimado myParser.parseExpression(expressaoEquacao); iArvoreAjuste = 0; for (ArvoreAjuste arvoreAjuste : arvoresAjuste) { arvoreAjuste.variaveisArvoreAjuste = arvoreAjuste.getVariaveisArvoreAjuste(); qtdeVariaveis = arvoreAjuste.variaveisArvoreAjuste.size(); for (VariavelArvoreAjuste variavelArvoreAjuste : arvoreAjuste.variaveisArvoreAjuste) { String sigla = variavelArvoreAjuste.getVariavel().getSigla(); Double valor = variavelArvoreAjuste.getValor(); myParser.addVariable(sigla, valor); } ArvoreAjusteDao arvoreAjusteDao = new ArvoreAjusteDao(); arvoreAjuste.setQtdeEst(myParser.getValue(), idVariavelInteresse, 1); //Equacao arvoreAjusteDao.updateQtdeEst(arvoreAjuste, idVariavelInteresse, 1); //Equacao qtdeEst[iArvoreAjuste] = arvoreAjuste.getQtdeEst(idVariavelInteresse, 1); //Equacao); iArvoreAjuste++; } EstatisticaAjuste estatisticaAjuste = new EstatisticaAjuste(); estatisticaAjuste.setIdModelo(id); estatisticaAjuste.setIdLocal(local.getId()); estatisticaAjuste.setIdVariavelInteresse(idVariavelInteresse); estatisticaAjuste.setIdMetodoCalculo(1); //Equao estatisticaAjuste.calcularEstatisticasAjuste(qtdeObs, qtdeEst, qtdeVariaveis); return equacao; }
From source file:hms.hwestra.interactionrebuttal2.InteractionRebuttal2.java
private void iterativelyIncreaseNumberOfPCsInCellCountPredictionModel(String pcFile, String cellcountFile, String pheno) throws IOException { DoubleMatrixDataset<String, String> pcs = new DoubleMatrixDataset<String, String>(pcFile); // samples on rows, pcs on cols? DoubleMatrixDataset<String, String> cellcounts = new DoubleMatrixDataset<String, String>(cellcountFile); // samples on rows, celltype on cols Integer phenoId = cellcounts.hashCols.get(pheno); boolean[] includeRow = new boolean[pcs.nrRows]; int shared = 0; for (int i = 0; i < pcs.nrRows; i++) { String sample = pcs.rowObjects.get(i); if (cellcounts.hashRows.containsKey(sample)) { shared++;/*ww w .j a va 2 s . c om*/ includeRow[i] = true; } } // order the samples of the cell count in the order of the pcs double[] olsY = new double[shared]; //Ordinary least squares: cell count int ctr = 0; for (int i = 0; i < pcs.nrRows; i++) { String sample = pcs.rowObjects.get(i); Integer sampleId = cellcounts.hashRows.get(sample); if (sampleId != null) { olsY[ctr] = cellcounts.rawData[sampleId][phenoId]; ctr++; } } org.apache.commons.math3.distribution.FDistribution fDist = null; cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = null; cern.jet.random.tdouble.StudentT tDistColt = null; OLSMultipleLinearRegression previousFullModel = null; for (int col = 0; col < pcs.nrCols; col++) { OLSMultipleLinearRegression regressionFullModel = new OLSMultipleLinearRegression(); OLSMultipleLinearRegression regressionOrigModel = new OLSMultipleLinearRegression(); int nrPcs = col + 1; double[][] olsX = new double[shared][nrPcs]; double[][] olsXN = new double[shared][1]; for (int inc = 0; inc < col + 1; inc++) { ctr = 0; for (int i = 0; i < pcs.nrRows; i++) { if (includeRow[i]) { olsX[ctr][inc] = pcs.rawData[i][inc]; ctr++; } } } double[] pc = new double[shared]; ctr = 0; for (int i = 0; i < pcs.nrRows; i++) { if (includeRow[i]) { pc[ctr] = pcs.rawData[i][col]; olsXN[ctr][0] = pcs.rawData[i][0]; ctr++; } } double corr = JSci.maths.ArrayMath.correlation(pc, olsY); Correlation.correlationToZScore(olsY.length); double z = Correlation.convertCorrelationToZScore(olsY.length, corr); double p = ZScores.zToP(z); regressionFullModel.newSampleData(olsY, olsX); regressionOrigModel.newSampleData(olsY, olsXN); double rsquaredadj = regressionFullModel.calculateAdjustedRSquared(); double rsquared = regressionFullModel.calculateRSquared(); double rse = regressionOrigModel.estimateRegressionStandardError(); double rsefull = regressionFullModel.estimateRegressionStandardError(); double rss1 = regressionOrigModel.calculateResidualSumOfSquares(); double rss2 = regressionFullModel.calculateResidualSumOfSquares(); double F = ((rss1 - rss2) / (3 - 2)) / (rss2 / (olsY.length - 3)); int numParams1 = 1; // regressor + intercept int numParams2 = nrPcs; // regressors + intercept if (nrPcs > 1) { double F2 = ((rss1 - rss2) / (numParams2 - numParams1)) / (rss2 / (olsY.length - numParams2)); double rss3 = previousFullModel.calculateResidualSumOfSquares(); int numParams3 = nrPcs - 1; double FPrevious = ((rss3 - rss2) / (numParams2 - numParams3)) / (rss2 / (olsY.length - numParams2)); // pf(f, m1$df.residual-m2$df.residual, m2$df.residual, lower.tail = FALSE) // (double numeratorDegreesOfFreedom, double denominatorDegreesOfFreedom) fDist = new org.apache.commons.math3.distribution.FDistribution((numParams2 - numParams1), olsY.length - numParams2); FDistribution fDistPrev = new FDistribution((numParams2 - numParams3), olsY.length - numParams2); double anovaFTestP = -1; double anovaFTestP2 = -1; try { anovaFTestP = 1 - fDist.cumulativeProbability(F2); anovaFTestP2 = 1 - fDist.cumulativeProbability(FPrevious); if (anovaFTestP < 1E-160) { anovaFTestP = 1E-16; } if (anovaFTestP2 < 1E-160) { anovaFTestP2 = 1E-16; } } catch (Exception err) { } System.out.println(nrPcs + "\t" + corr + "\t" + z + "\t" + p + "\t" + rsquared + "\t" + numParams2 + "\t" + F2 + "\t" + FPrevious + "\t" + anovaFTestP + "\t" + anovaFTestP2); } else { System.out.println(nrPcs + "\t" + corr + "\t" + z + "\t" + p + "\t" + rsquared + "\t" + numParams1); } previousFullModel = regressionFullModel; } ArrayList<String> colNames = new ArrayList<String>(); colNames.add("CellCount"); double[][] data = new double[shared][pcs.nrCols + 1]; for (int i = 0; i < olsY.length; i++) { data[i][0] = olsY[i]; } ArrayList<String> rowNames = new ArrayList<String>(); for (int col = 0; col < pcs.nrCols; col++) { ctr = 0; colNames.add(pcs.colObjects.get(col)); for (int row = 0; row < pcs.nrRows; row++) { if (includeRow[row]) { data[ctr][col + 1] = pcs.rawData[row][col]; ctr++; } } } for (int row = 0; row < pcs.nrRows; row++) { if (includeRow[row]) { rowNames.add("Sample_" + pcs.rowObjects.get(row)); } } DoubleMatrixDataset<String, String> dsout = new DoubleMatrixDataset<String, String>(); dsout.rawData = data; dsout.rowObjects = rowNames; dsout.colObjects = colNames; dsout.recalculateHashMaps(); dsout.save(pcFile + "-mergedWCellCount.txt"); }
From source file:com.mebigfatguy.damus.main.DamusCalculator.java
private boolean calcLinearRegression() { try {// w ww .j ava2s .c o m Context context = Context.instance(); PredictionModel model = context.getPredictionModel(); TrainingData data = context.getTrainingData(); int numMetrics = model.getNumMetrics(); int numResults = model.getNumResults(); int numItems = data.getNumItems(); OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression(); double[][] xx = new double[numItems][]; for (int i = 0; i < numItems; i++) { double[] x = new double[numMetrics]; for (int m = 0; m < numMetrics; m++) { Metric metric = model.getMetric(m); double value; switch (metric.getType()) { case Percent: value = ((Number) data.getItem(i).getValue(metric)).doubleValue(); break; case Real: value = ((BigDecimal) data.getItem(i).getValue(metric)).doubleValue(); break; case YesNo: value = ((Boolean) data.getItem(i).getValue(metric)).booleanValue() ? 1.0 : 0.0; break; default: value = 0.0; break; } x[m] = value; } xx[i] = x; } for (int r = 0; r < numResults; r++) { Metric metric = model.getResult(r); double[] y = new double[numItems]; for (int i = 0; i < numItems; i++) { double value; switch (metric.getType()) { case Percent: value = ((Number) data.getItem(i).getValue(metric)).doubleValue(); break; case Real: value = ((BigDecimal) data.getItem(i).getValue(metric)).doubleValue(); break; case YesNo: value = ((Boolean) data.getItem(i).getValue(metric)).booleanValue() ? 1.0 : 0.0; break; default: value = 0.0; break; } y[i] = value; } regression.newSampleData(y, xx); double result = regression.estimateRegressandVariance(); trainingItem.setValue(metric, new BigDecimal(result)); } return true; } catch (Exception e) { return false; } }