Example usage for org.apache.commons.math.stat.descriptive DescriptiveStatistics getN

Introduction

In this page you can find the example usage for org.apache.commons.math.stat.descriptive DescriptiveStatistics getN.

Prototype

public long getN()

Source Link

Document

Returns the number of available values

Usage

From source file:com.mozilla.socorro.RawDumpSizeScan.java

public static void main(String[] args) throws ParseException {
    String startDateStr = args[0];
    String endDateStr = args[1];/*from  w  w w.  j  ava 2 s.  co m*/

    // Set both start/end time and start/stop row
    Calendar startCal = Calendar.getInstance();
    Calendar endCal = Calendar.getInstance();

    SimpleDateFormat sdf = new SimpleDateFormat("yyyyMMdd");

    if (!StringUtils.isBlank(startDateStr)) {
        startCal.setTime(sdf.parse(startDateStr));
    }
    if (!StringUtils.isBlank(endDateStr)) {
        endCal.setTime(sdf.parse(endDateStr));
    }

    DescriptiveStatistics stats = new DescriptiveStatistics();
    long numNullRawBytes = 0L;
    HTable table = null;
    Map<String, Integer> rowValueSizeMap = new HashMap<String, Integer>();
    try {
        table = new HTable(TABLE_NAME_CRASH_REPORTS);
        Scan[] scans = generateScans(startCal, endCal);
        for (Scan s : scans) {
            ResultScanner rs = table.getScanner(s);
            Iterator<Result> iter = rs.iterator();
            while (iter.hasNext()) {
                Result r = iter.next();
                ImmutableBytesWritable rawBytes = r.getBytes();
                //length = r.getValue(RAW_DATA_BYTES, DUMP_BYTES);
                if (rawBytes != null) {
                    int length = rawBytes.getLength();
                    if (length > 20971520) {
                        rowValueSizeMap.put(new String(r.getRow()), length);
                    }
                    stats.addValue(length);
                } else {
                    numNullRawBytes++;
                }

                if (stats.getN() % 10000 == 0) {
                    System.out.println("Processed " + stats.getN());
                    System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", stats.getMin(),
                            stats.getMax(), stats.getMean()));
                    System.out.println(
                            String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f",
                                    stats.getPercentile(25.0d), stats.getPercentile(50.0d),
                                    stats.getPercentile(75.0d)));
                    System.out.println("Number of large entries: " + rowValueSizeMap.size());
                }
            }
            rs.close();
        }

        System.out.println("Finished Processing!");
        System.out.println(String.format("Min: %.02f Max: %.02f Mean: %.02f", stats.getMin(), stats.getMax(),
                stats.getMean()));
        System.out.println(String.format("1st Quartile: %.02f 2nd Quartile: %.02f 3rd Quartile: %.02f",
                stats.getPercentile(25.0d), stats.getPercentile(50.0d), stats.getPercentile(75.0d)));

        for (Map.Entry<String, Integer> entry : rowValueSizeMap.entrySet()) {
            System.out.println(String.format("RowId: %s => Length: %d", entry.getKey(), entry.getValue()));
        }
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    } finally {
        if (table != null) {
            try {
                table.close();
            } catch (IOException e) {
                // TODO Auto-generated catch block
                e.printStackTrace();
            }
        }
    }
}

From source file:cs.cirg.cida.analysis.MannWhitneyUTest.java

@Override
public DataTable performTest(String... variableNames) {
    if (this.getExperiments().size() < 2) {
        return null;
    }/*from   www . j a va 2 s.com*/
    IExperiment experiment1 = this.getExperiments().get(0);
    IExperiment experiment2 = this.getExperiments().get(1);

    DescriptiveStatistics stats1 = experiment1.getBottomRowStatistics(variableNames[0]);
    DescriptiveStatistics stats2 = experiment2.getBottomRowStatistics(variableNames[0]);

    double[] xA = new double[(int) stats1.getN()];
    double[] xB = new double[(int) stats2.getN()];
    for (int i = 0; i < xA.length; i++) {
        xA[i] = stats1.getElement(i);
        xB[i] = stats2.getElement(i);
    }

    MannWhitneyTest mannWhitneyTest = new MannWhitneyTest(xA, xB, alternativeHypothesis, 0.0, false);

    results = new StandardDataTable<StringType>();

    ArrayList<StringType> row = new ArrayList<StringType>();
    row.add(new StringType("N_A"));
    row.add(new StringType(stats1.getN() + ""));
    results.addRow(row);

    row = new ArrayList<StringType>();
    row.add(new StringType("N_B"));
    row.add(new StringType(stats2.getN() + ""));
    results.addRow(row);

    row = new ArrayList<StringType>();
    row.add(new StringType("P value"));
    row.add(new StringType(mannWhitneyTest.exactSP() + ""));
    results.addRow(row);

    row = new ArrayList<StringType>();
    row.add(new StringType("Approx P value"));
    row.add(new StringType(mannWhitneyTest.approxSP() + ""));
    results.addRow(row);

    row = new ArrayList<StringType>();
    row.add(new StringType("Ranks Sum A"));
    row.add(new StringType(mannWhitneyTest.getRankSumA() + ""));
    results.addRow(row);

    row = new ArrayList<StringType>();
    row.add(new StringType("Ranks Sum B"));
    row.add(new StringType(mannWhitneyTest.getRankSumB() + ""));
    results.addRow(row);

    row = new ArrayList<StringType>();
    row.add(new StringType("U"));
    row.add(new StringType(mannWhitneyTest.getStatistic() + ""));
    results.addRow(row);

    row = new ArrayList<StringType>();
    row.add(new StringType("Z"));
    row.add(new StringType(mannWhitneyTest.getZ() + ""));
    results.addRow(row);

    results.setColumnName(0, "Statistic");
    results.setColumnName(1, "Value");

    return this.getResults();
}

From source file:cal.binBased.BinMSnSpectrum.java

public double[] getBin_spectrum(int shift) {
    ArrayList<Double> bin_spec_al = new ArrayList<Double>();
    double binSize = (fragment_tolerance * 2), upperLimit = max_value + 0.00001;
    for (double lowerLimit = min_value; lowerLimit < upperLimit; lowerLimit = lowerLimit + binSize) {
        double tmp_intensity_bin = 0;
        DescriptiveStatistics obj = new DescriptiveStatistics();
        for (Peak p : peakList) {
            double mz = p.getMz() + shift;
            if (mz >= lowerLimit && mz < lowerLimit + binSize) {
                obj.addValue(p.intensity);
            }/*from   w w w .j a  v  a2  s  .c o m*/
        }
        if (obj.getN() > 0) {
            if (intensities_sum_or_mean_or_median == 0) {
                tmp_intensity_bin = obj.getSum();
            } else if (intensities_sum_or_mean_or_median == 1) {
                tmp_intensity_bin = obj.getMean();
            } else if (intensities_sum_or_mean_or_median == 2) {
                tmp_intensity_bin = obj.getPercentile(50);
            }
        }
        // put every bin_pectrum
        bin_spec_al.add(tmp_intensity_bin);
    }
    // convert an arraylist to double array
    // initiate size of array
    bin_size = bin_spec_al.size();
    double[] bin_spectrum = new double[bin_spec_al.size()];
    for (int i = 0; i < bin_spec_al.size(); i++) {
        bin_spectrum[i] = bin_spec_al.get(i);
    }
    return bin_spectrum;
}

From source file:cal.binBased.BinMSnSpectrum.java

private ArrayList<double[]> prepareBinSpectra() {
    // first prepare bin-spectrum to be filled with zero
    int size = (2 * correctionFactor) + 1;

    ArrayList<double[]> shiftedSpectra = new ArrayList<double[]>(size);
    for (int i = 0; i < size; i++) {
        double[] shiftedSpectrum = new double[bin_size];
        shiftedSpectra.add(shiftedSpectrum);
    }/*from w  w w . ja v a 2s  .  com*/
    // now fill each bin spectrum with correct mz values.
    double binSize = (fragment_tolerance * 2), upperLimit = max_value + 0.00001;
    int current_index = 0;
    for (double lowerLimit = min_value + correctionFactor; lowerLimit < upperLimit
            - correctionFactor; lowerLimit = lowerLimit + binSize) {
        double tmp_intensity_bin = 0;
        DescriptiveStatistics obj = new DescriptiveStatistics();
        for (Peak p : peakList) {
            double mz = p.getMz();
            if (mz >= lowerLimit && mz < lowerLimit + binSize) {
                obj.addValue(p.intensity);
            }
        }
        if (obj.getN() > 0) {
            if (intensities_sum_or_mean_or_median == 0) {
                tmp_intensity_bin = obj.getSum();
            } else if (intensities_sum_or_mean_or_median == 1) {
                tmp_intensity_bin = obj.getMean();
            } else if (intensities_sum_or_mean_or_median == 2) {
                tmp_intensity_bin = obj.getPercentile(50);
            }
        }
        // put every bin_pectrum            
        int filling_index = current_index;
        // check every bin spectrum            
        for (double[] shifted : shiftedSpectra) {
            shifted[filling_index] = tmp_intensity_bin;
            filling_index++;
        }
        current_index++;
    }
    return shiftedSpectra;
}

From source file:edu.usc.goffish.gopher.sample.N_Hop_Stat_Collector.java

@Override
public void compute(List<SubGraphMessage> subGraphMessages) {

    /**//from ww w .j  a  v  a 2  s .  co m
     * We do this in following steps.
     * Calculate stats for each subgraph.
     * Calculate aggregate stats for partition.
     * In this case a single sub-graph will do the aggregation
     * Aggregate partition level stats and combine at the smallest partition.
     */

    if (superStep == 0) {
        SubGraphMessage msg = subGraphMessages.get(0);
        String data = new String(msg.getData());

        String[] dataSplit = data.split("#");
        N = Integer.parseInt(dataSplit[0]);
        String[] vps = dataSplit[1].split(",");
        for (String vp : vps) {
            vantagePoints.add(vp.trim());
        }

        try {

            Iterable<? extends ISubgraphInstance> subgraphInstances = subgraph.getInstances(Long.MIN_VALUE,
                    Long.MAX_VALUE, PropertySet.EmptyPropertySet, subgraph.getEdgeProperties(), false);

            //                        sliceManager.readInstances(subgraph,
            //                        Long.MIN_VALUE, Long.MAX_VALUE,
            //                        PropertySet.EmptyPropertySet, subgraph.getEdgeProperties());

            for (ISubgraphInstance instance : subgraphInstances) {

                Map<String, DescriptiveStatistics> statsMap = new HashMap<String, DescriptiveStatistics>();

                for (TemplateEdge edge : subgraph.edges()) {

                    ISubgraphObjectProperties edgeProps = instance.getPropertiesForEdge(edge.getId());

                    Integer isExist = (Integer) edgeProps.getValue(IS_EXIST_PROP);
                    if (isExist == 1) {
                        String[] vantageIps = ((String) edgeProps.getValue(VANTAGE_IP_PROP)).split(",");
                        String[] latencies = ((String) edgeProps.getValue(LATENCY_PROP)).split(",");
                        String[] hops = ((String) edgeProps.getValue(HOP_PROP)).split(",");

                        Integer[] vantangeIdx = vantageIpIndex(vantageIps);
                        if (vantangeIdx == null) {
                            continue;
                        }

                        for (int i : vantangeIdx) {

                            String vantage = vantageIps[i];
                            String latency = latencies[i];
                            String hop = hops[i];

                            double latency_num = Double.parseDouble(latency);
                            int hop_num = Integer.parseInt(hop);

                            if (latency_num >= 0 && hop_num == N) {
                                if (statsMap.containsKey(vantage)) {

                                    statsMap.get(vantage).addValue(latency_num);

                                } else {

                                    DescriptiveStatistics statistics = new DescriptiveStatistics();
                                    statistics.addValue(latency_num);
                                    statsMap.put(vantage, statistics);

                                }
                            }
                            ;

                        }

                    }

                }

                int c = 0;
                StringBuffer msgBuffer = new StringBuffer();

                for (String v : statsMap.keySet()) {
                    c++;
                    DescriptiveStatistics statistics = statsMap.get(v);
                    String m = createMessageString(v, instance.getTimestampStart(), instance.getTimestampEnd(),
                            statistics.getStandardDeviation(), statistics.getMean(), statistics.getN());

                    if (c == statsMap.keySet().size()) {
                        msgBuffer.append(m);
                    } else {

                        msgBuffer.append(m).append("|");
                    }

                }

                SubGraphMessage subMsg = new SubGraphMessage(msgBuffer.toString().getBytes());

                sentMessage(partition.getId(), subMsg);

            }

        } catch (IOException e) {
            e.printStackTrace();
            throw new RuntimeException(e);
        }

    } else if (superStep == 1) {
        //Ok here every sub-graph will receive message from its own partition.
        //Each message is belongs to a given some time span.
        Map<String, List<String[]>> vantageGroup = new HashMap<String, List<String[]>>();

        for (SubGraphMessage subGraphMessage : subGraphMessages) {

            String msgData = new String(subGraphMessage.getData());
            String[] dataParts = msgData.split("|");

            for (String data : dataParts) {
                String[] vantageParts = data.split(",");
                //Group by vantage point and startTime
                if (vantageGroup.containsKey(vantageParts[0] + "|" + vantageParts[1])) {
                    vantageGroup.get(vantageParts[0] + "|" + vantageParts[1]).add(vantageParts);
                } else {
                    ArrayList<String[]> arrayList = new ArrayList<String[]>();
                    arrayList.add(vantageParts);
                    vantageGroup.put(vantageParts[0] + "|" + vantageParts[1], arrayList);
                }

            }

        }

        for (String key : vantageGroup.keySet()) {

            if (!acquireLock(key)) {
                continue;
            }

            List<String[]> data = vantageGroup.get(key);

            double totalN = 0;
            double totalAvgVal = 0;

            double totalVar = 0;
            for (String[] d : data) {

                //average
                double mean = Double.parseDouble(d[4]);
                long sN = Long.parseLong(d[5]);
                totalN += sN;
                totalAvgVal += mean * sN;

                double sd = Double.parseDouble(d[3]);
                totalVar += ((double) sd * sd) / ((double) sN);

            }

            double avg = totalAvgVal / totalN;
            double newSD = Math.sqrt(totalVar);

            //create message
            //sent to all the partitions except me.
            String msg = key + "," + newSD + "," + avg + "," + totalN;

            for (int pid : partitions) {
                sentMessage(pid, new SubGraphMessage(msg.getBytes()));
            }

        }

    } else if (superStep >= 2) {

        if (partition.getId() == Collections.min(partitions)) {

            Map<String, List<String[]>> group = new HashMap<String, List<String[]>>();

            for (SubGraphMessage msg : subGraphMessages) {

                String data = new String(msg.getData());

                String[] dataParts = data.split(",");

                if (group.containsKey(dataParts[0])) {
                    group.get(dataParts[0]).add(dataParts);
                } else {
                    List<String[]> list = new ArrayList<String[]>();
                    list.add(dataParts);
                    group.put(dataParts[0], list);
                }

            }

            if (!acquireLock("" + partition.getId())) {
                voteToHalt();
                return;
            }

            PrintWriter writer;
            try {

                writer = new PrintWriter(new FileWriter("TimeSeriesStats.csv"));
            } catch (IOException e) {
                e.printStackTrace();
                throw new RuntimeException(e);
            }
            for (String key : group.keySet()) {

                List<String[]> data = group.get(key);

                double totalN = 0;
                double totalAvgVal = 0;

                double totalVar = 0;
                for (String[] d : data) {

                    //average

                    //key + "," + newSD + "," + avg + "," + totalN;
                    double mean = Double.parseDouble(d[2]);
                    long sN = Long.parseLong(d[3]);
                    totalN += sN;
                    totalAvgVal += mean * sN;

                    double sd = Double.parseDouble(d[1]);
                    totalVar += ((double) sd * sd) / ((double) sN);

                }

                double avg = totalAvgVal / totalN;
                double newSD = Math.sqrt(totalVar);

                String vantage = key.split("|")[0];
                String timeStamp = key.split("|")[1];

                log(writer, vantage, timeStamp, avg, newSD);

            }
            writer.flush();
            voteToHalt();

        }
    }

}

From source file:guineu.modules.dataanalysis.foldChanges.FoldTestTask.java

public double Foldtest(int mol) throws IllegalArgumentException, MathException {
    DescriptiveStatistics stats1 = new DescriptiveStatistics();
    DescriptiveStatistics stats2 = new DescriptiveStatistics();

    String parameter1 = "";

    try {// w w w.  j a v  a2 s  . com
        // Determine groups for selected raw data files
        List<String> availableParameterValues = dataset.getParameterAvailableValues(parameter);

        int numberOfGroups = availableParameterValues.size();

        if (numberOfGroups > 1) {
            parameter1 = availableParameterValues.get(0);
            String parameter2 = availableParameterValues.get(1);

            for (String sampleName : dataset.getAllColumnNames()) {
                if (dataset.getParametersValue(sampleName, parameter) != null
                        && dataset.getParametersValue(sampleName, parameter).equals(parameter1)) {
                    stats1.addValue((Double) this.dataset.getRow(mol).getPeak(sampleName));
                } else if (dataset.getParametersValue(sampleName, parameter) != null
                        && dataset.getParametersValue(sampleName, parameter).equals(parameter2)) {
                    stats2.addValue((Double) this.dataset.getRow(mol).getPeak(sampleName));
                }
            }
        } else {
            return -1;
        }

    } catch (Exception e) {
        e.printStackTrace();
    }

    if (stats1.getN() > 0 && stats2.getN() > 0) {
        /*double[] sortValues1 = stats1.getSortedValues();
         double[] sortValues2 = stats2.getSortedValues();
                
         return sortValues1[((int) stats1.getN() / 2)] / sortValues2[((int) stats2.getN() / 2)];*/
        return stats1.getMean() / stats2.getMean();
    } else {
        return 0;
    }
}

From source file:com.joliciel.talismane.other.corpus.CorpusStatistics.java

@Override
public void onNextParseConfiguration(ParseConfiguration parseConfiguration, Writer writer) {
    sentenceCount++;//from  w  w  w.  j a va 2s  .  c  om
    sentenceLengthStats.addValue(parseConfiguration.getPosTagSequence().size());

    for (PosTaggedToken posTaggedToken : parseConfiguration.getPosTagSequence()) {
        if (posTaggedToken.getTag().equals(PosTag.ROOT_POS_TAG))
            continue;

        Token token = posTaggedToken.getToken();

        String word = token.getOriginalText();
        words.add(word);
        if (referenceWords != null) {
            if (!referenceWords.contains(word))
                unknownTokenCount++;
        }
        if (alphanumeric.matcher(token.getOriginalText()).find()) {
            String lowercase = word.toLowerCase(TalismaneSession.getLocale());
            lowerCaseWords.add(lowercase);
            alphanumericCount++;
            if (referenceLowercaseWords != null) {
                if (!referenceLowercaseWords.contains(lowercase))
                    unknownAlphanumericCount++;
            }
        }

        tokenCount++;

        Integer countObj = posTagCounts.get(posTaggedToken.getTag().getCode());
        int count = countObj == null ? 0 : countObj.intValue();
        count++;
        posTagCounts.put(posTaggedToken.getTag().getCode(), count);
    }

    int maxDepth = 0;
    DescriptiveStatistics avgSyntaxDepthForSentenceStats = new DescriptiveStatistics();
    for (DependencyArc arc : parseConfiguration.getDependencies()) {
        Integer countObj = depLabelCounts.get(arc.getLabel());
        int count = countObj == null ? 0 : countObj.intValue();
        count++;
        depLabelCounts.put(arc.getLabel(), count);
        totalDepCount++;

        if (arc.getHead().getTag().equals(PosTag.ROOT_POS_TAG)
                && (arc.getLabel() == null || arc.getLabel().length() == 0)) {
            // do nothing for unattached stuff (e.g. punctuation)
        } else if (arc.getLabel().equals("ponct")) {
            // do nothing for punctuation
        } else {
            int depth = 0;
            DependencyArc theArc = arc;
            while (theArc != null && !theArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG)) {
                theArc = parseConfiguration.getGoverningDependency(theArc.getHead());
                depth++;
            }
            if (depth > maxDepth)
                maxDepth = depth;

            syntaxDepthStats.addValue(depth);
            avgSyntaxDepthForSentenceStats.addValue(depth);

            int distance = Math
                    .abs(arc.getHead().getToken().getIndex() - arc.getDependent().getToken().getIndex());
            syntaxDistanceStats.addValue(distance);
        }

        maxSyntaxDepthStats.addValue(maxDepth);
        if (avgSyntaxDepthForSentenceStats.getN() > 0)
            avgSyntaxDepthStats.addValue(avgSyntaxDepthForSentenceStats.getMean());
    }

    // we cheat a little bit by only allowing each arc to count once
    // there could be a situation where there are two independent non-projective arcs
    // crossing the same mother arc, but we prefer here to underestimate,
    // as this phenomenon is quite rare.
    Set<DependencyArc> nonProjectiveArcs = new HashSet<DependencyArc>();
    int i = 0;
    for (DependencyArc arc : parseConfiguration.getDependencies()) {
        i++;
        if (arc.getHead().getTag().equals(PosTag.ROOT_POS_TAG)
                && (arc.getLabel() == null || arc.getLabel().length() == 0))
            continue;
        if (nonProjectiveArcs.contains(arc))
            continue;

        int headIndex = arc.getHead().getToken().getIndex();
        int depIndex = arc.getDependent().getToken().getIndex();
        int startIndex = headIndex < depIndex ? headIndex : depIndex;
        int endIndex = headIndex >= depIndex ? headIndex : depIndex;
        int j = 0;
        for (DependencyArc otherArc : parseConfiguration.getDependencies()) {
            j++;
            if (j <= i)
                continue;
            if (otherArc.getHead().getTag().equals(PosTag.ROOT_POS_TAG)
                    && (otherArc.getLabel() == null || otherArc.getLabel().length() == 0))
                continue;
            if (nonProjectiveArcs.contains(otherArc))
                continue;

            int headIndex2 = otherArc.getHead().getToken().getIndex();
            int depIndex2 = otherArc.getDependent().getToken().getIndex();
            int startIndex2 = headIndex2 < depIndex2 ? headIndex2 : depIndex2;
            int endIndex2 = headIndex2 >= depIndex2 ? headIndex2 : depIndex2;
            boolean nonProjective = false;
            if (startIndex2 < startIndex && endIndex2 > startIndex && endIndex2 < endIndex) {
                nonProjective = true;
            } else if (startIndex2 > startIndex && startIndex2 < endIndex && endIndex2 > endIndex) {
                nonProjective = true;
            }
            if (nonProjective) {
                nonProjectiveArcs.add(arc);
                nonProjectiveArcs.add(otherArc);
                nonProjectiveCount++;
                LOG.debug("Non-projective arcs in sentence: " + parseConfiguration.getSentence().getText());
                LOG.debug(arc.toString());
                LOG.debug(otherArc.toString());
                break;
            }
        }
    }
}

From source file:net.sf.mzmine.modules.peaklistmethods.dataanalysis.heatmaps.HeatMapTask.java

private double[][] groupingDataset(UserParameter selectedParameter, String referenceGroup) {
    // Collect all data files
    Vector<RawDataFile> allDataFiles = new Vector<RawDataFile>();
    DescriptiveStatistics meanControlStats = new DescriptiveStatistics();
    DescriptiveStatistics meanGroupStats = new DescriptiveStatistics();
    allDataFiles.addAll(Arrays.asList(peakList.getRawDataFiles()));

    // Determine the reference group and non reference group (the rest of
    // the samples) for raw data files
    List<RawDataFile> referenceDataFiles = new ArrayList<RawDataFile>();
    List<RawDataFile> nonReferenceDataFiles = new ArrayList<RawDataFile>();

    List<String> groups = new ArrayList<String>();
    MZmineProject project = MZmineCore.getCurrentProject();

    for (RawDataFile rawDataFile : allDataFiles) {

        Object paramValue = project.getParameterValue(selectedParameter, rawDataFile);
        if (!groups.contains(String.valueOf(paramValue))) {
            groups.add(String.valueOf(paramValue));
        }/*from   w  w w  .  ja v  a  2s . co m*/
        if (String.valueOf(paramValue).equals(referenceGroup)) {

            referenceDataFiles.add(rawDataFile);
        } else {

            nonReferenceDataFiles.add(rawDataFile);
        }
    }

    int numRows = 0;
    for (int row = 0; row < peakList.getNumberOfRows(); row++) {

        if (!onlyIdentified || (onlyIdentified && peakList.getRow(row).getPeakIdentities().length > 0)) {
            numRows++;
        }
    }

    // Create a new aligned peak list with all the samples if the reference
    // group has to be shown or with only
    // the non reference group if not.
    double[][] dataMatrix = new double[groups.size() - 1][numRows];
    pValueMatrix = new String[groups.size() - 1][numRows];

    // data files that should be in the heat map
    List<RawDataFile> shownDataFiles = nonReferenceDataFiles;

    for (int row = 0, rowIndex = 0; row < peakList.getNumberOfRows(); row++) {
        PeakListRow rowPeak = peakList.getRow(row);
        if (!onlyIdentified || (onlyIdentified && rowPeak.getPeakIdentities().length > 0)) {
            // Average area or height of the reference group
            meanControlStats.clear();
            for (int column = 0; column < referenceDataFiles.size(); column++) {

                if (rowPeak.getPeak(referenceDataFiles.get(column)) != null) {

                    if (area) {

                        meanControlStats.addValue(rowPeak.getPeak(referenceDataFiles.get(column)).getArea());
                    } else {

                        meanControlStats.addValue(rowPeak.getPeak(referenceDataFiles.get(column)).getHeight());
                    }

                }
            }

            // Divide the area or height of each peak by the average of the
            // area or height of the reference peaks in each row
            int columnIndex = 0;
            for (int column = 0; column < groups.size(); column++) {
                String group = groups.get(column);
                meanGroupStats.clear();
                if (!group.equals(referenceGroup)) {

                    for (int dataColumn = 0; dataColumn < shownDataFiles.size(); dataColumn++) {

                        Object paramValue = project.getParameterValue(selectedParameter,
                                shownDataFiles.get(dataColumn));
                        if (rowPeak.getPeak(shownDataFiles.get(dataColumn)) != null
                                && String.valueOf(paramValue).equals(group)) {

                            Feature peak = rowPeak.getPeak(shownDataFiles.get(dataColumn));

                            if (!Double.isInfinite(peak.getArea()) && !Double.isNaN(peak.getArea())) {

                                if (area) {

                                    meanGroupStats.addValue(peak.getArea());
                                } else {

                                    meanGroupStats.addValue(peak.getHeight());
                                }
                            }

                        }
                    }

                    double value = meanGroupStats.getMean() / meanControlStats.getMean();
                    if (meanGroupStats.getN() > 1 && meanControlStats.getN() > 1) {
                        pValueMatrix[columnIndex][rowIndex] = this.getPvalue(meanGroupStats, meanControlStats);
                    } else {
                        pValueMatrix[columnIndex][rowIndex] = "";
                    }

                    if (log) {

                        value = Math.log(value);
                    }
                    dataMatrix[columnIndex++][rowIndex] = value;
                }
            }
            rowIndex++;
        }
    }

    // Scale the data dividing the peak area/height by the standard
    // deviation of each column
    if (scale) {
        scale(dataMatrix);
    }

    // Create two arrays: row and column names
    rowNames = new String[dataMatrix[0].length];
    colNames = new String[groups.size() - 1];

    int columnIndex = 0;
    for (String group : groups) {

        if (!group.equals(referenceGroup)) {

            colNames[columnIndex++] = group;
        }
    }
    for (int row = 0, rowIndex = 0; row < peakList.getNumberOfRows(); row++) {
        if (!onlyIdentified || (onlyIdentified && peakList.getRow(row).getPeakIdentities().length > 0)) {
            if (peakList.getRow(row).getPeakIdentities() != null
                    && peakList.getRow(row).getPeakIdentities().length > 0) {

                rowNames[rowIndex++] = peakList.getRow(row).getPreferredPeakIdentity().getName();
            } else {

                rowNames[rowIndex++] = "Unknown";
            }
        }
    }

    return dataMatrix;
}

From source file:datafu.hourglass.jobs.StagedOutputJob.java

/**
 * Writes Hadoop counters and other task statistics to a file in the file system.
 * // w  w w . j av a2  s. co  m
 * @param fs
 * @throws IOException
 */
private void writeCounters(final FileSystem fs) throws IOException {
    final Path actualOutputPath = FileOutputFormat.getOutputPath(this);

    SimpleDateFormat timestampFormat = new SimpleDateFormat("yyyyMMddHHmmss");

    String suffix = timestampFormat.format(new Date());

    if (_countersParentPath != null) {
        if (!fs.exists(_countersParentPath)) {
            _log.info("Creating counter parent path " + _countersParentPath);
            fs.mkdirs(_countersParentPath, FsPermission.valueOf("-rwxrwxr-x"));
        }
        // make the name as unique as possible in this case because this may be a directory
        // where other counter files will be dropped
        _countersPath = new Path(_countersParentPath, ".counters." + suffix);
    } else {
        _countersPath = new Path(actualOutputPath, ".counters." + suffix);
    }

    _log.info(String.format("Writing counters to %s", _countersPath));
    FSDataOutputStream counterStream = fs.create(_countersPath);
    BufferedOutputStream buffer = new BufferedOutputStream(counterStream, 256 * 1024);
    OutputStreamWriter writer = new OutputStreamWriter(buffer);
    for (String groupName : getCounters().getGroupNames()) {
        for (Counter counter : getCounters().getGroup(groupName)) {
            writeAndLog(writer, String.format("%s=%d", counter.getName(), counter.getValue()));
        }
    }

    JobID jobID = this.getJobID();

    org.apache.hadoop.mapred.JobID oldJobId = new org.apache.hadoop.mapred.JobID(jobID.getJtIdentifier(),
            jobID.getId());

    long minStart = Long.MAX_VALUE;
    long maxFinish = 0;
    long setupStart = Long.MAX_VALUE;
    long cleanupFinish = 0;
    DescriptiveStatistics mapStats = new DescriptiveStatistics();
    DescriptiveStatistics reduceStats = new DescriptiveStatistics();
    boolean success = true;

    JobClient jobClient = new JobClient(this.conf);

    Map<String, String> taskIdToType = new HashMap<String, String>();

    TaskReport[] setupReports = jobClient.getSetupTaskReports(oldJobId);
    if (setupReports.length > 0) {
        _log.info("Processing setup reports");
        for (TaskReport report : jobClient.getSetupTaskReports(oldJobId)) {
            taskIdToType.put(report.getTaskID().toString(), "SETUP");
            if (report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start time");
                continue;
            }
            setupStart = Math.min(setupStart, report.getStartTime());
        }
    } else {
        _log.error("No setup reports");
    }

    TaskReport[] mapReports = jobClient.getMapTaskReports(oldJobId);
    if (mapReports.length > 0) {
        _log.info("Processing map reports");
        for (TaskReport report : mapReports) {
            taskIdToType.put(report.getTaskID().toString(), "MAP");
            if (report.getFinishTime() == 0 || report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start or finish time");
                continue;
            }
            minStart = Math.min(minStart, report.getStartTime());
            mapStats.addValue(report.getFinishTime() - report.getStartTime());
        }
    } else {
        _log.error("No map reports");
    }

    TaskReport[] reduceReports = jobClient.getReduceTaskReports(oldJobId);
    if (reduceReports.length > 0) {
        _log.info("Processing reduce reports");
        for (TaskReport report : reduceReports) {
            taskIdToType.put(report.getTaskID().toString(), "REDUCE");
            if (report.getFinishTime() == 0 || report.getStartTime() == 0) {
                _log.warn("Skipping report with zero start or finish time");
                continue;
            }
            maxFinish = Math.max(maxFinish, report.getFinishTime());
            reduceStats.addValue(report.getFinishTime() - report.getStartTime());
        }
    } else {
        _log.error("No reduce reports");
    }

    TaskReport[] cleanupReports = jobClient.getCleanupTaskReports(oldJobId);
    if (cleanupReports.length > 0) {
        _log.info("Processing cleanup reports");
        for (TaskReport report : cleanupReports) {
            taskIdToType.put(report.getTaskID().toString(), "CLEANUP");
            if (report.getFinishTime() == 0) {
                _log.warn("Skipping report with finish time of zero");
                continue;
            }
            cleanupFinish = Math.max(cleanupFinish, report.getFinishTime());
        }
    } else {
        _log.error("No cleanup reports");
    }

    if (minStart == Long.MAX_VALUE) {
        _log.error("Could not determine map-reduce start time");
        success = false;
    }
    if (maxFinish == 0) {
        _log.error("Could not determine map-reduce finish time");
        success = false;
    }

    if (setupStart == Long.MAX_VALUE) {
        _log.error("Could not determine setup start time");
        success = false;
    }
    if (cleanupFinish == 0) {
        _log.error("Could not determine cleanup finish time");
        success = false;
    }

    // Collect statistics on successful/failed/killed task attempts, categorized by setup/map/reduce/cleanup.
    // Unfortunately the job client doesn't have an easier way to get these statistics.
    Map<String, Integer> attemptStats = new HashMap<String, Integer>();
    _log.info("Processing task attempts");
    for (TaskCompletionEvent event : getTaskCompletionEvents(jobClient, oldJobId)) {
        String type = taskIdToType.get(event.getTaskAttemptId().getTaskID().toString());
        String status = event.getTaskStatus().toString();

        String key = String.format("%s_%s_ATTEMPTS", status, type);
        if (!attemptStats.containsKey(key)) {
            attemptStats.put(key, 0);
        }
        attemptStats.put(key, attemptStats.get(key) + 1);
    }

    if (success) {
        writeAndLog(writer, String.format("SETUP_START_TIME_MS=%d", setupStart));
        writeAndLog(writer, String.format("CLEANUP_FINISH_TIME_MS=%d", cleanupFinish));
        writeAndLog(writer, String.format("COMPLETE_WALL_CLOCK_TIME_MS=%d", cleanupFinish - setupStart));

        writeAndLog(writer, String.format("MAP_REDUCE_START_TIME_MS=%d", minStart));
        writeAndLog(writer, String.format("MAP_REDUCE_FINISH_TIME_MS=%d", maxFinish));
        writeAndLog(writer, String.format("MAP_REDUCE_WALL_CLOCK_TIME_MS=%d", maxFinish - minStart));

        writeAndLog(writer, String.format("MAP_TOTAL_TASKS=%d", (long) mapStats.getN()));
        writeAndLog(writer, String.format("MAP_MAX_TIME_MS=%d", (long) mapStats.getMax()));
        writeAndLog(writer, String.format("MAP_MIN_TIME_MS=%d", (long) mapStats.getMin()));
        writeAndLog(writer, String.format("MAP_AVG_TIME_MS=%d", (long) mapStats.getMean()));
        writeAndLog(writer, String.format("MAP_STD_TIME_MS=%d", (long) mapStats.getStandardDeviation()));
        writeAndLog(writer, String.format("MAP_SUM_TIME_MS=%d", (long) mapStats.getSum()));

        writeAndLog(writer, String.format("REDUCE_TOTAL_TASKS=%d", (long) reduceStats.getN()));
        writeAndLog(writer, String.format("REDUCE_MAX_TIME_MS=%d", (long) reduceStats.getMax()));
        writeAndLog(writer, String.format("REDUCE_MIN_TIME_MS=%d", (long) reduceStats.getMin()));
        writeAndLog(writer, String.format("REDUCE_AVG_TIME_MS=%d", (long) reduceStats.getMean()));
        writeAndLog(writer, String.format("REDUCE_STD_TIME_MS=%d", (long) reduceStats.getStandardDeviation()));
        writeAndLog(writer, String.format("REDUCE_SUM_TIME_MS=%d", (long) reduceStats.getSum()));

        writeAndLog(writer, String.format("MAP_REDUCE_SUM_TIME_MS=%d",
                (long) mapStats.getSum() + (long) reduceStats.getSum()));

        for (Map.Entry<String, Integer> attemptStat : attemptStats.entrySet()) {
            writeAndLog(writer, String.format("%s=%d", attemptStat.getKey(), attemptStat.getValue()));
        }
    }

    writer.close();
    buffer.close();
    counterStream.close();
}

From source file:de.tudarmstadt.ukp.experiments.argumentation.sequence.feature.coreference.CoreferenceFeatures.java

@Override
protected List<Feature> extract(JCas jCas, Sentence sentence, String sentencePrefix)
        throws TextClassificationException {
    List<List<CoreferenceLink>> coreferenceChains = extractCoreferenceChains(jCas);

    FrequencyDistribution<String> featuresAcrossAllChains = new FrequencyDistribution<>();
    DescriptiveStatistics chainLength = new DescriptiveStatistics();
    DescriptiveStatistics distanceToPreviousSentence = new DescriptiveStatistics();
    DescriptiveStatistics distanceToNextSentence = new DescriptiveStatistics();
    DescriptiveStatistics interSentencesCorLinks = new DescriptiveStatistics();

    for (List<CoreferenceLink> chain : coreferenceChains) {

        SortedMap<Integer, List<CoreferenceLink>> sentencesAndLinks = extractSentencesAndLinksFromChain(chain,
                jCas);/* w w  w .j  ava2s  . c o m*/

        int currentSentencePos = getCurrentSentencePos(jCas, sentence);

        log.debug(sentencesAndLinks.keySet() + ", current " + currentSentencePos);

        // is the sentence in chain that spans more sentences?
        boolean partOfChain = sentencesAndLinks.containsKey(currentSentencePos) && sentencesAndLinks.size() > 1;

        // is part of a chain?
        if (partOfChain) {
            log.debug(chainToString(chain));
            featuresAcrossAllChains.inc(FN_PART_OF_CHAIN);

            // starts the chain?
            if (sentencesAndLinks.firstKey().equals(currentSentencePos)) {
                featuresAcrossAllChains.inc(FN_STARTS_THE_CHAIN);
            } else if (sentencesAndLinks.lastKey().equals(currentSentencePos)) {
                // ends the chain?
                featuresAcrossAllChains.inc(FN_ENDS_THE_CHAIN);
            } else {
                // in the middle of chain?
                featuresAcrossAllChains.inc(FN_IN_THE_MIDDLE_OF_CHAIN);
            }

            // length of the chain
            chainLength.addValue(sentencesAndLinks.size());

            List<CoreferenceLink> currentSentenceLinks = sentencesAndLinks.get(currentSentencePos);
            CoreferenceLink currentSentenceFirstLink = currentSentenceLinks.get(0);
            CoreferenceLink currentSentenceLastLink = currentSentenceLinks.get(currentSentenceLinks.size() - 1);

            // transition to the previous link, i.e. NOMINAL -> PRONOMINAL
            if (!sentencesAndLinks.firstKey().equals(currentSentencePos)) {
                // find the previous sentence
                List<CoreferenceLink> previousSentenceLinks = null;
                int prevSentNo = currentSentencePos;
                while (previousSentenceLinks == null && prevSentNo >= 0) {
                    prevSentNo--;

                    if (sentencesAndLinks.containsKey(prevSentNo)) {
                        previousSentenceLinks = sentencesAndLinks.get(prevSentNo);
                    }
                }

                if (previousSentenceLinks == null) {
                    throw new IllegalStateException("Oops :))");
                }

                // distance to previous sentence
                distanceToPreviousSentence.addValue(currentSentencePos - prevSentNo);

                // get the last link from the previous sentence
                CoreferenceLink prevSentenceLastLink = previousSentenceLinks
                        .get(previousSentenceLinks.size() - 1);

                // add type type transition
                String prevSentenceLastLinkReferenceType = prevSentenceLastLink.getReferenceType();
                String currentSentenceFirstLinkReferenceType = currentSentenceFirstLink.getReferenceType();
                String transitionType = prevSentenceLastLinkReferenceType + GLUE
                        + currentSentenceFirstLinkReferenceType;
                featuresAcrossAllChains.addSample(FN_TRANSITION_IN_TYPE_TYPE + transitionType, 1);

                // add token - type transition
                String glueCoreferenceCurrentSentence = glueCoreferenceLinkTokens(currentSentenceFirstLink);
                String typeToken = prevSentenceLastLinkReferenceType + GLUE + glueCoreferenceCurrentSentence;
                featuresAcrossAllChains.addSample(FN_TRANSITION_IN_TYPE_TOKEN + typeToken, 1);

                // add type - token transition
                String glueCoreferencePrevSentence = glueCoreferenceLinkTokens(prevSentenceLastLink);
                String tokenType = glueCoreferencePrevSentence + GLUE + currentSentenceFirstLinkReferenceType;
                featuresAcrossAllChains.addSample(FN_TRANSITION_IN_TOKEN_TYPE + tokenType, 1);

                // add token token transition
                String tokenToken = glueCoreferencePrevSentence + GLUE + glueCoreferenceCurrentSentence;
                featuresAcrossAllChains.addSample(FN_TRANSITION_IN_TOKEN_TOKEN + tokenToken, 1);

                // exact matching token-token reference?
                if (glueCoreferencePrevSentence.equals(glueCoreferenceCurrentSentence)) {
                    featuresAcrossAllChains.addSample(FN_TRANSITION_IN_TOKEN_TOKEN_MATCH, 1);
                }
            }

            // transition to the previous link, i.e. NOMINAL -> PRONOMINAL
            if (!sentencesAndLinks.lastKey().equals(currentSentencePos)) {
                // find the previous sentence
                List<CoreferenceLink> nextSentenceLinks = null;
                int nextSentNo = currentSentencePos;
                while (nextSentenceLinks == null && nextSentNo <= sentencesAndLinks.lastKey()) {
                    nextSentNo++;

                    if (sentencesAndLinks.containsKey(nextSentNo)) {
                        nextSentenceLinks = sentencesAndLinks.get(nextSentNo);
                    }
                }

                if (nextSentenceLinks == null) {
                    throw new IllegalStateException("Oops :))");
                }

                // distance to next sentence
                distanceToNextSentence.addValue(nextSentNo - currentSentencePos);

                // get the last link from the previous sentence
                CoreferenceLink nextSentenceFirstLink = nextSentenceLinks.get(0);

                // add type type transition
                String currentSentenceLastLinkReferenceType = currentSentenceLastLink.getReferenceType();
                String nextSentenceFirstLinkReferenceType = nextSentenceFirstLink.getReferenceType();
                String transitionType = currentSentenceLastLinkReferenceType + GLUE
                        + nextSentenceFirstLinkReferenceType;
                featuresAcrossAllChains.addSample(FN_TRANSITION_OUT_TYPE_TYPE + transitionType, 1);

                // add token - type transition
                String glueCoreferenceCurrentSent = glueCoreferenceLinkTokens(currentSentenceLastLink);
                String typeToken = glueCoreferenceCurrentSent + GLUE + nextSentenceFirstLinkReferenceType;
                featuresAcrossAllChains.addSample(FN_TRANSITION_OUT_TOKEN_TYPE + typeToken, 1);

                // add type - token transition
                String glueCoreferenceNextSent = glueCoreferenceLinkTokens(nextSentenceFirstLink);
                String tokenType = currentSentenceLastLinkReferenceType + GLUE + glueCoreferenceNextSent;
                featuresAcrossAllChains.addSample(FN_TRANSITION_OUT_TYPE_TOKEN + tokenType, 1);

                // add token token transition
                String tokenToken = glueCoreferenceCurrentSent + GLUE + glueCoreferenceNextSent;
                featuresAcrossAllChains.addSample(FN_TRANSITION_OUT_TOKEN_TOKEN + tokenToken, 1);

                // exact matching token-token reference?
                if (glueCoreferenceNextSent.equals(glueCoreferenceCurrentSent)) {
                    featuresAcrossAllChains.addSample(FN_TRANSITION_OUT_TOKEN_TOKEN_MATCH, 1);
                }
            }
        }

        // number of inter-sentence coreference links
        if (sentencesAndLinks.containsKey(currentSentencePos)) {
            int coreferenceLinks = sentencesAndLinks.get(currentSentencePos).size();
            interSentencesCorLinks.addValue(coreferenceLinks);
        }

        /*
        List<Integer> positions = positionsOfSentenceInCurrentChain(chain, sentence);
                
        // ok, we're in a chain
        if (!positions.isEmpty()) {
        log.debug(printChain(chain));
        log.debug(sentence.getCoveredText());
        log.debug(positions);
        Integer lastPosition = positions.get(positions.size() - 1);
        Integer firstPosition = positions.get(0);
                
        if (lastPosition == positions.size() - 1) {
            log.debug("Last sentence of chain");
        }
                
        log.debug("-----");
        }
        */
    }

    List<Feature> result = new ArrayList<>();

    log.debug(featuresAcrossAllChains);
    if (distanceToNextSentence.getN() > 0) {
        log.debug("Next:" + distanceToNextSentence);

        result.add(new Feature(sentencePrefix + FEATURE_NAME + FN_DIST_TO_NEXT_MIN,
                distanceToNextSentence.getMin()));
        result.add(new Feature(sentencePrefix + FEATURE_NAME + FN_DIST_TO_NEXT_MAX,
                distanceToNextSentence.getMax()));
        result.add(new Feature(sentencePrefix + FEATURE_NAME + FN_DIST_TO_NEXT_AVG,
                distanceToNextSentence.getMean()));
    }
    if (distanceToPreviousSentence.getN() > 0) {

        log.debug("Prev: " + distanceToPreviousSentence);

        result.add(new Feature(sentencePrefix + FEATURE_NAME + FN_DIST_TO_PREV_MIN,
                distanceToPreviousSentence.getMin()));
        result.add(new Feature(sentencePrefix + FEATURE_NAME + FN_DIST_TO_PREV_MAX,
                distanceToPreviousSentence.getMax()));
        result.add(new Feature(sentencePrefix + FEATURE_NAME + FN_DIST_TO_PREV_AVG,
                distanceToPreviousSentence.getMean()));
    }

    if (interSentencesCorLinks.getN() > 0) {
        result.add(new Feature(sentencePrefix + FEATURE_NAME + FN_INTER_SENT_COR_MIN,
                interSentencesCorLinks.getMin()));
        result.add(new Feature(sentencePrefix + FEATURE_NAME + FN_INTER_SENT_COR_MAX,
                interSentencesCorLinks.getMax()));
        result.add(new Feature(sentencePrefix + FEATURE_NAME + FN_INTER_SENT_COR_AVG,
                interSentencesCorLinks.getMean()));
    }

    log.debug("----");

    for (String feat : featuresAcrossAllChains.getKeys()) {
        // binary
        result.add(new Feature(sentencePrefix + FEATURE_NAME + feat, 1));
    }

    return result;
}