Example usage for org.apache.commons.math3.stat.regression OLSMultipleLinearRegression newSampleData

List of usage examples for org.apache.commons.math3.stat.regression OLSMultipleLinearRegression newSampleData

Introduction

In this page you can find the example usage for org.apache.commons.math3.stat.regression OLSMultipleLinearRegression newSampleData.

Prototype

public void newSampleData(double[] y, double[][] x) throws MathIllegalArgumentException 

Source Link

Document

Loads model x and y sample data, overriding any previous sample.

Usage

From source file:modelcreation.ModelCreation.java

/**
 * @param args the command line arguments
 *//*from  ww w . j  av a 2 s.  co  m*/
public static void main(String[] args) {

    int size = writeDataIntoFile();
    double[][] x = new double[size][2];
    double[] y = new double[size];
    readDataFromFile(x, y);

    //        TTest tTest = new TTest();
    //        System.out.println("p value for home value = " + tTest.tTest(x[0], y));
    //        System.out.println("p value for away value = " + tTest.tTest(x[1], y));
    //        
    System.out.println("Average mean squared error: " + apply10FoldCrossValidation(x, y));

    //        double[] predictions = new double[size];
    //        for (int i = 0; i < size; i++) {             
    //            predictions[i] = 0.5622255342802198 + (1.0682845275289186E-9 * x[i][0]) + (-9.24614306976538E-10 * x[i][1]);
    //                               
    //            //System.out.print("Actual: " + y[i]);
    //            //System.out.println(" Predicted: " + predicted);
    //        }
    //        
    //        System.out.println(calculateMeanSquaredError(y, predictions));

    OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
    regression.newSampleData(y, x);
    regression.setNoIntercept(true);
    printRegressionStatistics(regression);

    //Team[] teams2014 = getTeams(354);
    //Team[] teams2015 = getTeams(398, 2015);

    //Team[] teams = concatTeams(teams2014, teams2015);

    //        HashMap<Integer, ArrayList<Integer>> marketValueGoalsDataset = createMarketValueGoalsDataset(teams2014);
    //
    //        SimpleRegression regression = new SimpleRegression();
    //                
    //        Set<Integer> marketValues = marketValueGoalsDataset.keySet();
    //        for (Integer marketValue:marketValues) {
    //            ArrayList<Integer> goals = marketValueGoalsDataset.get(marketValue);
    //            int totalGoals = 0;
    //            for(Integer goal:goals) {
    //                regression.addData(marketValue, goal);
    //                totalGoals += goal;
    //            }
    //            double avg = (double) totalGoals / goals.size();
    //            System.out.println("Team Value: " + marketValue + ", Goal Average: " + avg);
    //        }      
    //        
    //        System.out.println("Intercept: " + regression.getIntercept());
    //        System.out.println("Slope: " + regression.getSlope());
    //        System.out.println("R^2: " + regression.getRSquare());

    //LinearRegression.calculateLinearRegression(marketValueGoalsDataset);
}

From source file:dase.timeseries.analysis.GrangerTest.java

/**
 * Returns p-value for Granger causality test.
 *
 * @param y/*from   ww w.  j ava  2 s .  c  om*/
 *            - predictable variable
 * @param x
 *            - predictor
 * @param L
 *            - lag, should be 1 or greater.
 * @return p-value of Granger causality
 */
public static double granger(double[] y, double[] x, int L) {
    OLSMultipleLinearRegression h0 = new OLSMultipleLinearRegression();
    OLSMultipleLinearRegression h1 = new OLSMultipleLinearRegression();

    double[][] laggedY = createLaggedSide(L, y);

    double[][] laggedXY = createLaggedSide(L, x, y);

    int n = laggedY.length;

    h0.newSampleData(strip(L, y), laggedY);
    h1.newSampleData(strip(L, y), laggedXY);

    double rs0[] = h0.estimateResiduals();
    double rs1[] = h1.estimateResiduals();

    double RSS0 = sqrSum(rs0);
    double RSS1 = sqrSum(rs1);

    double ftest = ((RSS0 - RSS1) / L) / (RSS1 / (n - 2 * L - 1));

    System.out.println(RSS0 + " " + RSS1);
    System.out.println("F-test " + ftest);

    FDistribution fDist = new FDistribution(L, n - 2 * L - 1);

    double pValue = 1.0 - fDist.cumulativeProbability(ftest);
    System.out.println("P-value " + pValue);
    return pValue;
}

From source file:modelcreation.ModelCreation.java

public static double apply10FoldCrossValidation(double[][] x, double[] y) {
    int subSize = y.length / 10;
    ArrayList<Integer> indeces = new ArrayList();
    for (int i = 0; i < y.length; i++) {
        indeces.add(i);/*from ww  w .  j a  v a  2 s.co m*/
    }
    Collections.shuffle(indeces);

    double[] meanSquaredErrors = new double[10];
    int count = 0;
    for (int i = 0; i < 10; i++) {
        System.out.println("-------------Fold " + i + "--------------");
        double[][] subXTest = new double[subSize][2];
        double[] subYTest = new double[subSize];
        double[][] subXTraining = new double[y.length - subSize][2];
        double[] subYTraining = new double[y.length - subSize];

        for (int j = 0; j < i * subSize; j++) {
            int index = indeces.get(count);
            count++;
            subXTraining[j][0] = x[index][0];
            subXTraining[j][1] = x[index][1];
            subYTraining[j] = y[index];
        }

        for (int j = 0; j < subSize; j++) {
            int index = indeces.get(count);
            count++;
            subXTest[j][0] = x[index][0];
            subXTest[j][1] = x[index][1];
            subYTest[j] = y[index];
        }

        for (int j = i * subSize; j < y.length - subSize; j++) {
            int index = indeces.get(count);
            count++;
            subXTraining[j][0] = x[index][0];
            subXTraining[j][1] = x[index][1];
            subYTraining[j] = y[index];
        }

        count = 0;
        OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
        regression.newSampleData(subYTraining, subXTraining);
        regression.setNoIntercept(true);
        meanSquaredErrors[i] = evaluateModel(regression, subXTest, subYTest);
    }

    double sum = 0;
    for (int i = 0; i < meanSquaredErrors.length; i++) {
        sum += meanSquaredErrors[i];
    }
    return (double) sum / meanSquaredErrors.length;

}

From source file:net.gtl.movieanalytics.model.LinearRegression.java

private double[] estimateParameter(double[][] x, double[] y) {
    //printTestData(x, y);

    OLSMultipleLinearRegression ols = new OLSMultipleLinearRegression();
    ols.newSampleData(y, x);
    return ols.estimateRegressionParameters();
}

From source file:com.insightml.models.regression.OLS.java

@Override
public IModel<Sample, Double> train(final double[][] features, final double[] expected,
        final String[] featureNames) {
    final OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
    regression.newSampleData(expected, features);
    return new LinearRegressionModel(regression.estimateRegressionParameters(), featureNames);
}

From source file:lu.lippmann.cdb.lab.regression.Regression.java

/**
 * Constructor.//  ww w  .j  a  va  2 s  .co  m
 */
public Regression(final Instances ds, final int idx) throws Exception {
    this.newds = WekaDataProcessingUtil.buildDataSetSortedByAttribute(ds, idx);

    //System.out.println("Regression -> "+newds.toSummaryString());

    final int N = this.newds.numInstances();
    final int M = this.newds.numAttributes();

    final double[][] x = new double[N][M - 1];
    final double[] y = new double[N];
    for (int i = 0; i < N; i++) {
        y[i] = this.newds.instance(i).value(0);
    }
    for (int i = 0; i < N; i++) {
        for (int j = 1; j < M; j++) {
            x[i][j - 1] = this.newds.instance(i).value(j);
        }
    }

    final OLSMultipleLinearRegression reg = new OLSMultipleLinearRegression();
    //reg.setNoIntercept(true);
    reg.newSampleData(y, x);

    this.r2 = reg.calculateRSquared();
    //this.r2=-1d;

    this.coe = reg.estimateRegressionParameters();

    this.estims = calculateEstimations(x, y, coe);
}

From source file:com.davidbracewell.ml.regression.LeastSquaresLearner.java

@Override
protected void trainAll(List<Instance> trainingData) {
    OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
    double[] y = new double[trainingData.size()];
    double[][] x = new double[trainingData.size()][];
    int i = 0;//from   www . ja  va2 s  .  c o m
    for (Instance datum : trainingData) {
        y[i] = datum.getTargetValue();
        x[i] = datum.toArray();
        i++;
    }
    regression.newSampleData(y, x);
    double[] params = regression.estimateRegressionParameters();
    model.bias = params[0];
    double[] weights = new double[params.length - 1];
    System.arraycopy(params, 1, weights, 0, params.length - 1);
    model.weights = new DenseVector(weights);
}

From source file:model.Modelo.java

public Equacao ajustarModelo(Local local, ArrayList<ArvoreAjuste> arvoresAjuste) throws Exception {

    DecimalFormat df4casas = new DecimalFormat("##,###,###,##0.0000");

    JEP myParser = new JEP(); //http://www.singularsys.com/jep/doc/html/index.html

    myParser.addStandardFunctions();/*from www. ja v a2 s.c  o m*/
    myParser.addStandardConstants();

    double resultadoTermo = 0.0;
    int qtdeVariaveis = 0;
    int idMetodoCalculo = 1; //Equao

    ArrayList<Termo> termos = getTermos();

    //      ArrayList<ArvoreAjuste> arvoresAjuste = new ArrayList<ArvoreAjuste>();
    //      arvoresAjuste = getArvoresAjuste();

    double[] qtdeObs = new double[arvoresAjuste.size()];
    double[] qtdeEst = new double[arvoresAjuste.size()];
    int iArvoreAjuste = 0;

    double[][] valorEntrada = new double[arvoresAjuste.size()][termos.size()];
    int iTermo = 0;

    for (ArvoreAjuste arvoreAjuste : arvoresAjuste) {

        for (Termo termo : termos) {

            for (VariavelArvoreAjuste variavelArvoreAjuste : arvoreAjuste.variaveisArvoreAjuste) {
                String sigla = variavelArvoreAjuste.getVariavel().getSigla();
                Double valor = variavelArvoreAjuste.getValor();
                myParser.addVariable(sigla, valor);
            }
            myParser.parseExpression(termo.getExpressao());
            resultadoTermo = myParser.getValue();
            valorEntrada[iArvoreAjuste][iTermo] = resultadoTermo;

            iTermo++;

        }

        qtdeObs[iArvoreAjuste] = arvoreAjuste.getQtdeObs(idVariavelInteresse);

        iArvoreAjuste++;
        iTermo = 0;
    }

    OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();
    regression.newSampleData(qtdeObs, valorEntrada);
    double[] valorCoeficiente = regression.estimateRegressionParameters();

    //Monta equacao a partir dos coeficientes calculados
    String valorCoeficienteFormatado = df4casas.format(valorCoeficiente[0]);

    String expressaoEquacao = Double.toString(valorCoeficiente[0]);
    String expressaoEquacaoFormatada = valorCoeficienteFormatado;
    iTermo = 1;
    for (Termo termo : termos) {
        valorCoeficienteFormatado = df4casas.format(valorCoeficiente[iTermo]);
        if (valorCoeficiente[iTermo] < 0) {
            expressaoEquacao = expressaoEquacao + valorCoeficiente[iTermo] + "*" + termo.getExpressao();
            expressaoEquacaoFormatada = expressaoEquacaoFormatada + valorCoeficienteFormatado + "*"
                    + termo.getExpressao();
        } else {
            expressaoEquacao = expressaoEquacao + "+" + valorCoeficiente[iTermo] + "*" + termo.getExpressao();
            expressaoEquacaoFormatada = expressaoEquacaoFormatada + "+" + valorCoeficienteFormatado + "*"
                    + termo.getExpressao();
        }
        iTermo++;
    }
    //Cadastra equao ajustada
    Equacao equacao = new Equacao();
    equacao.setIdModelo(this.id);
    equacao.setExpressao(expressaoEquacao);
    equacao.setExpressaoFormatada(expressaoEquacaoFormatada);
    equacao.setIdVariavelInteresse(idVariavelInteresse);

    ArrayList<Variavel> variaveisEquacao = new ArrayList<Variavel>();
    variaveisEquacao = equacao.extraiVariaveis();
    equacao.setVariaveisEquacao(variaveisEquacao);
    EquacaoDao equacaoDao = new EquacaoDao();
    int idEquacao = equacaoDao.cadastrar(equacao);
    equacao.setId(idEquacao);

    EquacaoLocalDao equacaoLocalDao = new EquacaoLocalDao();
    EquacaoLocal equacaoLocal = new EquacaoLocal();
    equacaoLocal.setIdLocal(local.getId());
    equacaoLocal.setIdEquacao(idEquacao);
    equacaoLocal.setIdVariavelInteresse(idVariavelInteresse);
    equacaoLocalDao.cadastrar(equacaoLocal);

    //Aplica equacaoModelo em todas as ArvoresAjuste para calcular valorEstimado        
    myParser.parseExpression(expressaoEquacao);

    iArvoreAjuste = 0;
    for (ArvoreAjuste arvoreAjuste : arvoresAjuste) {

        arvoreAjuste.variaveisArvoreAjuste = arvoreAjuste.getVariaveisArvoreAjuste();
        qtdeVariaveis = arvoreAjuste.variaveisArvoreAjuste.size();

        for (VariavelArvoreAjuste variavelArvoreAjuste : arvoreAjuste.variaveisArvoreAjuste) {
            String sigla = variavelArvoreAjuste.getVariavel().getSigla();
            Double valor = variavelArvoreAjuste.getValor();
            myParser.addVariable(sigla, valor);
        }

        ArvoreAjusteDao arvoreAjusteDao = new ArvoreAjusteDao();
        arvoreAjuste.setQtdeEst(myParser.getValue(), idVariavelInteresse, 1); //Equacao
        arvoreAjusteDao.updateQtdeEst(arvoreAjuste, idVariavelInteresse, 1); //Equacao

        qtdeEst[iArvoreAjuste] = arvoreAjuste.getQtdeEst(idVariavelInteresse, 1); //Equacao);
        iArvoreAjuste++;
    }

    EstatisticaAjuste estatisticaAjuste = new EstatisticaAjuste();
    estatisticaAjuste.setIdModelo(id);
    estatisticaAjuste.setIdLocal(local.getId());
    estatisticaAjuste.setIdVariavelInteresse(idVariavelInteresse);
    estatisticaAjuste.setIdMetodoCalculo(1); //Equao

    estatisticaAjuste.calcularEstatisticasAjuste(qtdeObs, qtdeEst, qtdeVariaveis);

    return equacao;
}

From source file:hms.hwestra.interactionrebuttal2.InteractionRebuttal2.java

private void iterativelyIncreaseNumberOfPCsInCellCountPredictionModel(String pcFile, String cellcountFile,
        String pheno) throws IOException {

    DoubleMatrixDataset<String, String> pcs = new DoubleMatrixDataset<String, String>(pcFile); // samples on rows, pcs on cols?
    DoubleMatrixDataset<String, String> cellcounts = new DoubleMatrixDataset<String, String>(cellcountFile); // samples on rows, celltype on cols

    Integer phenoId = cellcounts.hashCols.get(pheno);

    boolean[] includeRow = new boolean[pcs.nrRows];
    int shared = 0;
    for (int i = 0; i < pcs.nrRows; i++) {
        String sample = pcs.rowObjects.get(i);
        if (cellcounts.hashRows.containsKey(sample)) {
            shared++;/*ww w  .j  a va 2  s .  c  om*/
            includeRow[i] = true;
        }
    }

    // order the samples of the cell count in the order of the pcs
    double[] olsY = new double[shared]; //Ordinary least squares: cell count
    int ctr = 0;
    for (int i = 0; i < pcs.nrRows; i++) {
        String sample = pcs.rowObjects.get(i);
        Integer sampleId = cellcounts.hashRows.get(sample);
        if (sampleId != null) {
            olsY[ctr] = cellcounts.rawData[sampleId][phenoId];
            ctr++;
        }
    }

    org.apache.commons.math3.distribution.FDistribution fDist = null;
    cern.jet.random.tdouble.engine.DoubleRandomEngine randomEngine = null;
    cern.jet.random.tdouble.StudentT tDistColt = null;

    OLSMultipleLinearRegression previousFullModel = null;

    for (int col = 0; col < pcs.nrCols; col++) {
        OLSMultipleLinearRegression regressionFullModel = new OLSMultipleLinearRegression();
        OLSMultipleLinearRegression regressionOrigModel = new OLSMultipleLinearRegression();

        int nrPcs = col + 1;
        double[][] olsX = new double[shared][nrPcs];
        double[][] olsXN = new double[shared][1];
        for (int inc = 0; inc < col + 1; inc++) {
            ctr = 0;
            for (int i = 0; i < pcs.nrRows; i++) {
                if (includeRow[i]) {
                    olsX[ctr][inc] = pcs.rawData[i][inc];
                    ctr++;
                }
            }
        }

        double[] pc = new double[shared];
        ctr = 0;
        for (int i = 0; i < pcs.nrRows; i++) {
            if (includeRow[i]) {
                pc[ctr] = pcs.rawData[i][col];
                olsXN[ctr][0] = pcs.rawData[i][0];
                ctr++;
            }
        }

        double corr = JSci.maths.ArrayMath.correlation(pc, olsY);
        Correlation.correlationToZScore(olsY.length);
        double z = Correlation.convertCorrelationToZScore(olsY.length, corr);
        double p = ZScores.zToP(z);

        regressionFullModel.newSampleData(olsY, olsX);
        regressionOrigModel.newSampleData(olsY, olsXN);

        double rsquaredadj = regressionFullModel.calculateAdjustedRSquared();
        double rsquared = regressionFullModel.calculateRSquared();

        double rse = regressionOrigModel.estimateRegressionStandardError();
        double rsefull = regressionFullModel.estimateRegressionStandardError();

        double rss1 = regressionOrigModel.calculateResidualSumOfSquares();
        double rss2 = regressionFullModel.calculateResidualSumOfSquares();
        double F = ((rss1 - rss2) / (3 - 2)) / (rss2 / (olsY.length - 3));
        int numParams1 = 1; // regressor + intercept
        int numParams2 = nrPcs; // regressors + intercept
        if (nrPcs > 1) {

            double F2 = ((rss1 - rss2) / (numParams2 - numParams1)) / (rss2 / (olsY.length - numParams2));

            double rss3 = previousFullModel.calculateResidualSumOfSquares();
            int numParams3 = nrPcs - 1;
            double FPrevious = ((rss3 - rss2) / (numParams2 - numParams3))
                    / (rss2 / (olsY.length - numParams2));

            // pf(f, m1$df.residual-m2$df.residual, m2$df.residual, lower.tail = FALSE)
            // (double numeratorDegreesOfFreedom, double denominatorDegreesOfFreedom)
            fDist = new org.apache.commons.math3.distribution.FDistribution((numParams2 - numParams1),
                    olsY.length - numParams2);
            FDistribution fDistPrev = new FDistribution((numParams2 - numParams3), olsY.length - numParams2);

            double anovaFTestP = -1;
            double anovaFTestP2 = -1;
            try {
                anovaFTestP = 1 - fDist.cumulativeProbability(F2);
                anovaFTestP2 = 1 - fDist.cumulativeProbability(FPrevious);
                if (anovaFTestP < 1E-160) {
                    anovaFTestP = 1E-16;
                }

                if (anovaFTestP2 < 1E-160) {
                    anovaFTestP2 = 1E-16;
                }
            } catch (Exception err) {
            }

            System.out.println(nrPcs + "\t" + corr + "\t" + z + "\t" + p + "\t" + rsquared + "\t" + numParams2
                    + "\t" + F2 + "\t" + FPrevious + "\t" + anovaFTestP + "\t" + anovaFTestP2);
        } else {
            System.out.println(nrPcs + "\t" + corr + "\t" + z + "\t" + p + "\t" + rsquared + "\t" + numParams1);
        }

        previousFullModel = regressionFullModel;

    }

    ArrayList<String> colNames = new ArrayList<String>();
    colNames.add("CellCount");
    double[][] data = new double[shared][pcs.nrCols + 1];
    for (int i = 0; i < olsY.length; i++) {
        data[i][0] = olsY[i];
    }

    ArrayList<String> rowNames = new ArrayList<String>();
    for (int col = 0; col < pcs.nrCols; col++) {
        ctr = 0;
        colNames.add(pcs.colObjects.get(col));
        for (int row = 0; row < pcs.nrRows; row++) {
            if (includeRow[row]) {
                data[ctr][col + 1] = pcs.rawData[row][col];
                ctr++;
            }

        }
    }

    for (int row = 0; row < pcs.nrRows; row++) {
        if (includeRow[row]) {
            rowNames.add("Sample_" + pcs.rowObjects.get(row));
        }
    }

    DoubleMatrixDataset<String, String> dsout = new DoubleMatrixDataset<String, String>();
    dsout.rawData = data;
    dsout.rowObjects = rowNames;
    dsout.colObjects = colNames;
    dsout.recalculateHashMaps();
    dsout.save(pcFile + "-mergedWCellCount.txt");

}

From source file:com.mebigfatguy.damus.main.DamusCalculator.java

private boolean calcLinearRegression() {
    try {// w ww .j ava2s  .c  o  m
        Context context = Context.instance();
        PredictionModel model = context.getPredictionModel();
        TrainingData data = context.getTrainingData();

        int numMetrics = model.getNumMetrics();
        int numResults = model.getNumResults();
        int numItems = data.getNumItems();

        OLSMultipleLinearRegression regression = new OLSMultipleLinearRegression();

        double[][] xx = new double[numItems][];
        for (int i = 0; i < numItems; i++) {
            double[] x = new double[numMetrics];
            for (int m = 0; m < numMetrics; m++) {
                Metric metric = model.getMetric(m);
                double value;
                switch (metric.getType()) {
                case Percent:
                    value = ((Number) data.getItem(i).getValue(metric)).doubleValue();
                    break;

                case Real:
                    value = ((BigDecimal) data.getItem(i).getValue(metric)).doubleValue();
                    break;

                case YesNo:
                    value = ((Boolean) data.getItem(i).getValue(metric)).booleanValue() ? 1.0 : 0.0;
                    break;

                default:
                    value = 0.0;
                    break;
                }
                x[m] = value;
            }
            xx[i] = x;
        }

        for (int r = 0; r < numResults; r++) {
            Metric metric = model.getResult(r);
            double[] y = new double[numItems];
            for (int i = 0; i < numItems; i++) {
                double value;
                switch (metric.getType()) {
                case Percent:
                    value = ((Number) data.getItem(i).getValue(metric)).doubleValue();
                    break;

                case Real:
                    value = ((BigDecimal) data.getItem(i).getValue(metric)).doubleValue();
                    break;

                case YesNo:
                    value = ((Boolean) data.getItem(i).getValue(metric)).booleanValue() ? 1.0 : 0.0;
                    break;

                default:
                    value = 0.0;
                    break;
                }
                y[i] = value;
            }

            regression.newSampleData(y, xx);
            double result = regression.estimateRegressandVariance();
            trainingItem.setValue(metric, new BigDecimal(result));
        }
        return true;
    } catch (Exception e) {
        return false;
    }
}