List of usage examples for org.apache.commons.lang ArrayUtils toString
public static String toString(Object array)
Outputs an array as a String, treating null
as an empty array.
From source file:org.bdval.DiscoverAndValidate.java
public static void main(final String[] args) throws JSAPException, IllegalAccessException, InstantiationException { final String version = VersionUtils.getImplementationVersion(DiscoverAndValidate.class); LOG.info(DiscoverAndValidate.class.getName() + " Implementation-Version: " + version); if (LOG.isDebugEnabled()) { LOG.debug("Running with: " + ArrayUtils.toString(args)); }//from ww w .j a v a2s.c o m final String mode = CLI.getOption(args, "-m", CLI.getOption(args, "--mode", null)); if (modelConditionModes.contains(mode)) { final ProcessModelConditions pmcTool = new ProcessModelConditions(); pmcTool.process(args); } else { final DiscoverAndValidate davTool = new DiscoverAndValidate(); davTool.process(args); } }
From source file:org.bdval.DiscoverWithCoxRegression.java
@Override public void process(final DAVOptions options) { super.process(options); double minPvalue = 1.0; int minPvalueProbleIndex = 0; for (final ClassificationTask task : options.classificationTasks) { for (final GeneList geneList : options.geneLists) { try { if (LOG.isDebugEnabled()) { LOG.debug("Discover markers with Cox-regression-Test " + task); }//from w ww . j a v a2 s . c om options.normalizeFeatures = false; options.scaleFeatures = false; final Table processedTable = processTable(geneList, options.inputTable, options, MicroarrayTrainEvaluate.calculateLabelValueGroups(task)); final ArrayTable.ColumnDescription labelColumn = processedTable.getColumnValues(0); if (LOG.isDebugEnabled()) { LOG.debug(ArrayUtils.toString(labelColumn.getStrings())); } assert labelColumn.type == String.class : "label must have type String"; final ScoredTranscriptBoundedSizeQueue selectedProbesets = new ScoredTranscriptBoundedSizeQueue( maxProbesToReport); for (int featureIndex = 1; featureIndex < processedTable.getColumnNumber(); featureIndex++) { final int probesetIndex = featureIndex - 1; final ArrayTable.ColumnDescription cd = processedTable.getColumnValues(featureIndex); assert cd.type == double.class : "features must have type double"; final double[] values = cd.getDoubles(); final double pValue = coxTest(values, labelColumn.getStrings()); if (pValue < minPvalue) { minPvalue = pValue; minPvalueProbleIndex = probesetIndex; } if (pValue <= alpha) { // The queue keeps items with larger score. Transform the pValue accordingly. selectedProbesets.enqueue(new TranscriptScore(1 - pValue, probesetIndex)); } } if (LOG.isDebugEnabled()) { LOG.debug("Selected " + selectedProbesets.size() + " probesets at significance level = " + alpha); } if (selectedProbesets.isEmpty()) { LOG.warn("no probeset pass test - adding the probeset with min PValue"); selectedProbesets.enqueue(new TranscriptScore(1 - minPvalue, minPvalueProbleIndex)); } while (!selectedProbesets.isEmpty()) { final TranscriptScore feature = selectedProbesets.dequeue(); final TranscriptScore probeset = new TranscriptScore(feature.score, feature.transcriptIndex); reporter.reportFeature(0, -(feature.score - 1), probeset, options); } } catch (Exception e) { LOG.fatal("Caught exception during Cox-Regression", e); System.exit(10); } } } }
From source file:org.bdval.DistributionDifferenceByFeatureMode.java
/** * Interpret the command line arguments. * * @param jsap the JSAP command line parser * @param result the results of command line parsing * @param options the DAVOptions/*from w w w .j a va2 s.co m*/ */ @Override public void interpretArguments(final JSAP jsap, final JSAPResult result, final DAVOptions options) { checkArgumentsSound(jsap, result, false); setupPathwayOptions(result, options); setupRservePort(result, options); setupClassifier(result, options); setExceptionOnCheckPostFilteringFail(true); final String maqciiPropertiesFile = verifyFilenameOption(result, "maqcii-properties-file"); final String modelConditionsFile = verifyFilenameOption(result, "model-conditions-file"); final String modelsDir = verifyDirectoryOption(result, "models-dir"); final String keepModelSetStr = result.getString("model-list"); final String excludeModelSetStr = result.getString("model-exclude-list"); extendedOutput = result.getBoolean("extended-output"); mergeClasses = result.getBoolean("merge-classes"); maxNumClasses = result.getInt("max-num-classes"); if (mergeClasses) { maxNumClasses = 1; } propertiesTrainingLabel = result.getString("properties-training-label"); propertiesValidationLabel = result.getString("properties-validation-label"); evalDatasetRoot = result.getString("eval-dataset-root"); if (!evalDatasetRoot.equals("-")) { if (!isValidDirectory(evalDatasetRoot)) { LOG.fatal("eval-dataset-root must either be '-' or a valid directory"); System.exit(10); } } else { evalDatasetRoot = null; } LOG.info("Creating the signal quality calculator..."); signalQualityCalcObj = createCalculator(result); if (signalQualityCalcObj == null) { System.exit(10); } // Populate keepModelSet (empty set for all) Set<String> keepModelSet = modelList(keepModelSetStr, "all"); excludeModelSet = modelList(excludeModelSetStr, "none"); // Only retain model conditions in keepModelSet LOG.info("Reading the model conditions file..."); modelIdToModelConditionsMap = ProcessModelConditionsMode.readModelConditionsFile(modelConditionsFile, keepModelSet); LOG.info(String.format("... found model conditions for %d models", modelIdToModelConditionsMap.keySet().size())); // Only save model prefixes that we have model conditions for keepModelSet = modelIdToModelConditionsMap.keySet(); if (keepModelSet.size() == 0) { LOG.fatal("No models to load"); System.exit(10); } LOG.info("Scanning the models directory for models to keep..."); modelIdToModelPrefixMap = scanModelsDirectory(modelsDir, keepModelSet); if (modelIdToModelPrefixMap.keySet().size() == 0) { LOG.fatal("No models to loaded."); System.exit(10); } keepModelSet = modelIdToModelPrefixMap.keySet(); //Reduce model conditions to the models we actually have reduceMap(modelIdToModelConditionsMap, keepModelSet); LOG.info(String.format("... Finished scanning models directory. Found %d models.", modelIdToModelPrefixMap.keySet().size())); final Set<String> datasetNames = extractDatasetNamesFromModelConditions(modelIdToModelConditionsMap); LOG.info(String.format("Models exist in %d datasets", datasetNames.size())); LOG.debug(String.format("modelIdToModelConditionsMap[%d]%s=", modelIdToModelConditionsMap.keySet().size(), ArrayUtils.toString(modelIdToModelConditionsMap.keySet()))); LOG.debug(String.format("modelIdToModelPrefixMap[%d]=%s", modelIdToModelPrefixMap.keySet().size(), ArrayUtils.toString(modelIdToModelPrefixMap.keySet()))); LOG.info(String.format("datasetNames[%d]=%s", datasetNames.size(), ArrayUtils.toString(datasetNames))); LOG.info("Reading the maqcii properties file..."); datasetName2DetailsMap = readMaqciiProperties(maqciiPropertiesFile, datasetNames); if (datasetName2DetailsMap == null) { System.exit(10); } LOG.debug(String.format("datasetName2DetailsMap[%d]=%s", datasetName2DetailsMap.keySet().size(), ArrayUtils.toString(datasetName2DetailsMap))); }
From source file:org.bdval.DistributionDifferenceByFeatureMode.java
/** * Perform a signal quality assessment on a single set of files (one model). * * @param options the options to run with * @param modelId the model id being processed * @param modelFilenamePrefix the model filename prefix * @param datasetDetailsMap the map of dataset details (filenames, etc.) * @throws IOException error reading or writing *//* w w w.j a v a 2 s. co m*/ public void loadFilesAndCalculateQuality(final DAVOptions options, final String modelId, final String modelFilenamePrefix, final Map<String, String> datasetDetailsMap) throws IOException { final String trainingDatasetFilename = datasetDetailsMap.get("training.dataset-file"); final String validationDatasetFilename = datasetDetailsMap.get("validation.dataset-file"); final String trainingSamplesFilename = datasetDetailsMap.get("training.test-samples"); final String validationSamplesFilename = datasetDetailsMap.get("validation.test-samples"); final String trainingTrueLabelsFilename = datasetDetailsMap.get("training.true-labels"); final String validationTrueLabelsFilename = datasetDetailsMap.get("validation.true-labels"); final String tasksFilename = datasetDetailsMap.get("tasks-file"); LOG.info("Running loadFilesAndCalculateQuality for:"); LOG.info(" modelId=" + modelId); LOG.info(" modelFilenamePrefix=" + modelFilenamePrefix); LOG.info(" trainingDatasetFilename=" + trainingDatasetFilename); LOG.info(" validationDatasetFilename=" + validationDatasetFilename); LOG.info(" trainingSamplesFilename=" + trainingSamplesFilename); LOG.info(" validationSamplesFilename=" + validationSamplesFilename); LOG.info(" trainingTrueLabelsFilename=" + trainingTrueLabelsFilename); LOG.info(" validationTrueLabelsFilename=" + validationTrueLabelsFilename); LOG.info(" tasksFilename=" + tasksFilename); // Load the tasks and CIDs files using the official way... final ClassificationTask tClassTasks = loadCachedTaskAndConditions(tasksFilename, trainingTrueLabelsFilename); final ClassificationTask vClassTasks = loadCachedTaskAndConditions(tasksFilename, validationTrueLabelsFilename); final ConditionIdentifiers tConditionIdentifiers = tClassTasks.getConditionsIdentifiers(); final ConditionIdentifiers vConditionIdentifiers = vClassTasks.getConditionsIdentifiers(); final String[] allClasses = tClassTasks.getConditionNames(); final String[] vClasses = vClassTasks.getConditionNames(); assert Arrays.equals(allClasses, vClasses); final StringBuilder classMapcomment = new StringBuilder(); if (mergeClasses) { classMapcomment.append( String.format("## All classes merge to %s", BaseSignalQualityCalculator.CLASS_TRANSLATION[0])); } else { for (int i = 0; i < allClasses.length; i++) { if (classMapcomment.length() > 0) { classMapcomment.append('\n'); } classMapcomment.append(String.format("## Class %s becomes %s", allClasses[i], BaseSignalQualityCalculator.CLASS_TRANSLATION[i])); } } signalQualityCalcObj.setClassMapComment(classMapcomment.toString()); final ObjectSet<String> trainingSampleIds = loadSampleIds(trainingSamplesFilename); final ObjectSet<String> validationSampleIds = loadSampleIds(validationSamplesFilename); try { final BDVModel model = new BDVModel(modelFilenamePrefix); final boolean scaleFeaturesFromCommandLine = options.scaleFeatures; model.load(options); // Force scaleFeature to respect the command line option (default is true) options.scaleFeatures = scaleFeaturesFromCommandLine; assert model.getGeneList() != null : " gene list must not be null"; final List<Set<String>> trainingLabelValueGroups = new ArrayList<Set<String>>(); options.inputTable = readMemoryCachedInputFile(trainingDatasetFilename); final Table trainingTable = model.loadTestSet(this, options, model.getGeneList(), trainingLabelValueGroups, trainingSampleIds); final int trainingFilteredNumberOfSamples = trainingTable.getRowNumber(); LOG.info("Training dataset has " + trainingFilteredNumberOfSamples + " samples."); if (trainingFilteredNumberOfSamples != trainingSampleIds.size()) { signalQualityCalcObj.writeData(String.format( "# error with model-id=%s - number of samples " + "doesn't match. trainingTable has %d but trainingSampleIds " + "has %d", modelId, trainingFilteredNumberOfSamples, trainingSampleIds.size())); return; } final List<Set<String>> validationLabelValueGroups = new ArrayList<Set<String>>(); options.inputTable = readMemoryCachedInputFile(validationDatasetFilename); final Table validationTable = model.loadTestSet(this, options, model.getGeneList(), validationLabelValueGroups, validationSampleIds); final int validationFilteredNumberOfSamples = validationTable.getRowNumber(); LOG.info("Validation dataset has " + validationFilteredNumberOfSamples + " samples."); if (validationFilteredNumberOfSamples != validationSampleIds.size()) { signalQualityCalcObj.writeData(String.format( "# error with model-id=%s - number of samples " + "doesn't match. validationTable has %d but validationSampleIds " + "has %d", modelId, validationFilteredNumberOfSamples, validationSampleIds.size())); return; } final Map<String, Map<String, double[]>> classToDataMapMap = new Object2ObjectOpenHashMap<String, Map<String, double[]>>(); System.out.printf("There are %d classes, %s%n", allClasses.length, ArrayUtils.toString(allClasses)); if (mergeClasses) { classToDataMapMap.put("merged-training", retrieveDataAsMap(trainingTable, tConditionIdentifiers, null)); System.out.println("Loading filtered validation data"); classToDataMapMap.put("merged-validation", retrieveDataAsMap(validationTable, vConditionIdentifiers, null)); System.out.printf("Loaded data for model=%s merged classes%n", modelId); signalQualityCalcObj.calculatePValues(model, modelId, new String[] { "merged" }, classToDataMapMap); } else { for (final String classId : allClasses) { // For each CLASS classToDataMapMap.put(classId + "-training", retrieveDataAsMap(trainingTable, tConditionIdentifiers, classId)); classToDataMapMap.put(classId + "-validation", retrieveDataAsMap(validationTable, vConditionIdentifiers, classId)); System.out.printf("Loaded data for model=%s/class=%s%n", modelId, classId); } signalQualityCalcObj.calculatePValues(model, modelId, allClasses, classToDataMapMap); } } catch (IOException e) { LOG.error("Error loading model " + modelFilenamePrefix, e); System.exit(10); } catch (ClassNotFoundException e) { LOG.fatal("Error loading model " + modelFilenamePrefix, e); System.exit(10); } catch (ColumnTypeException e) { LOG.fatal("Error processing input file ", e); System.exit(10); } catch (TypeMismatchException e) { LOG.fatal("Error processing input file ", e); System.exit(10); } catch (InvalidColumnException e) { LOG.fatal("Error processing input file ", e); System.exit(10); } catch (SyntaxErrorException e) { LOG.fatal("Error reading dataset file ", e); System.exit(10); } catch (UnsupportedFormatException e) { LOG.fatal("Error reading dataset file ", e); System.exit(10); } }
From source file:org.bdval.modelconditions.ProcessModelConditions.java
public static void main(final String[] args) throws JSAPException, IllegalAccessException, InstantiationException { final String version = VersionUtils.getImplementationVersion(ProcessModelConditions.class); LOG.info(ProcessModelConditions.class.getName() + " Implementation-Version: " + version); if (LOG.isDebugEnabled()) { LOG.debug("Running with: " + ArrayUtils.toString(args)); }/*from w ww.j av a 2 s. c om*/ final ProcessModelConditions pmcTools = new ProcessModelConditions(); pmcTools.process(args); }
From source file:org.bdval.Predict.java
private double convertToNumeric(final String[] symbolicClassLabel, final String label) { if (LOG.isTraceEnabled()) { LOG.trace("Converting Symbolic: " + ArrayUtils.toString(symbolicClassLabel) + " Label: " + label); }/* w w w . j av a2 s . c o m*/ if (label.equals(symbolicClassLabel[0])) { return 0; } else if (label.equals(symbolicClassLabel[1])) { return 1; } else { // if true labels were not provided, simply return NaN. if (sample2TrueLabelMap == null) { return Double.NaN; } LOG.fatal("Label is not recognized: " + label); System.exit(10); return -1; } }
From source file:org.bdval.Predict.java
/** * Read the true labels file if specified. * * @param trueLabelCidFilename//from www .jav a2s . co m * @param printStats */ private static Map<String, String> readSampleToTrueLabelsMap(final String trueLabelCidFilename, final boolean printStats) { if (printStats && trueLabelCidFilename == null) { System.err.println( "True labels must be provided for " + "statistics to be evaluated (--print-stats option)."); System.exit(1); } if (LOG.isDebugEnabled()) { LOG.debug("Reading true labels from: " + trueLabelCidFilename); } Map<String, String> sample2TrueLabelMap = null; if (trueLabelCidFilename != null) { FastBufferedReader labelReader = null; try { sample2TrueLabelMap = new Object2ObjectOpenHashMap<String, String>(); labelReader = new FastBufferedReader(new FileReader(trueLabelCidFilename)); final LineIterator lit = new LineIterator(labelReader); int lineNumber = 0; MutableString line; while (lit.hasNext()) { line = lit.next(); if (lineNumber++ == 0 || line.startsWith("#")) { // Skip the first line or comment line continue; } final String[] tokens = line.toString().split("[\t]"); final LabelSample s = new LabelSample(); s.label = tokens[0]; s.sampleId = tokens[1]; if (LOG.isTraceEnabled()) { LOG.trace("Tokens: " + ArrayUtils.toString(tokens)); LOG.trace("Label: " + s.label + " SampleId: " + s.sampleId); } sample2TrueLabelMap.put(s.sampleId.intern(), s.label); } } catch (IOException e) { LOG.fatal("Cannot read true labels in cids format from file: " + trueLabelCidFilename, e); System.exit(10); } finally { IOUtils.closeQuietly(labelReader); } } return sample2TrueLabelMap; }
From source file:org.bdval.RserveStatusMode.java
/** * Interpret the command line arguments. * @param jsap the JSAP command line parser * @param result the results of command line parsing * @param options the DAVOptions//from ww w. j a va 2s . c o m */ @Override public void interpretArguments(final JSAP jsap, final JSAPResult result, final DAVOptions options) { final List<String> argList = new LinkedList<String>(); argList.add("--validate"); boolean hasSpecificArgs = false; if (result.contains("host")) { argList.add("--host"); argList.add(result.getInetAddress("host").getHostName()); hasSpecificArgs = true; } if (result.contains("port")) { argList.add("--port"); argList.add(Integer.toString(result.getInt("port"))); hasSpecificArgs = true; } final String user = result.getString("username"); if (StringUtils.isNotBlank(user)) { argList.add("--username"); argList.add(user); hasSpecificArgs = true; } final String password = result.getString("password"); if (StringUtils.isNotBlank(password)) { argList.add("--password"); argList.add(password); hasSpecificArgs = true; } if (hasSpecificArgs && result.userSpecified("configuration")) { LOG.error("configuration option cannot be used with other options"); printHelp(jsap); return; } if (!hasSpecificArgs) { final String configuration = result.getString("configuration"); if (StringUtils.isNotBlank(configuration)) { argList.add("--configuration"); argList.add(configuration); } } final String[] args = argList.toArray(new String[argList.size()]); if (LOG.isDebugEnabled()) { LOG.debug("Calling RUtils with: " + ArrayUtils.toString(args)); } try { RUtils.main(args); } catch (ParseException e) { LOG.error("Error parsing options", e); } catch (RserveException e) { LOG.error("Rserve connection error", e); } catch (ConfigurationException e) { LOG.error("Rserve configuration error", e); } }
From source file:org.bdval.signalquality.KolmogorovSmirnovTest.java
/** * Calculates Kolmogorov-Smirnov for the given doubles[]'s xA and xB. * @param x the first double[]//from w ww. j a v a2s . com * @param y the first double[] * @return the the result values */ public static KolmogorovSmirnovTestResult calculate(final double[] x, final double[] y) { try { final RScript rscript = RScript.createFromResource("rscripts/KS_test.R"); rscript.setInput("x", x); rscript.setInput("y", y); rscript.setOutput("p_value", RDataObjectType.Double); rscript.setOutput("test_statistic", RDataObjectType.Double); rscript.execute(); return new KolmogorovSmirnovTestResult(rscript.getOutputDouble("p_value"), rscript.getOutputDouble("test_statistic")); } catch (Exception e) { LOG.warn(String.format("Cannot calculate KolmogorovSmirnovTest for x=%s, y=%s", ArrayUtils.toString(x), ArrayUtils.toString(y)), e); return null; } }
From source file:org.bdval.signalquality.SimpleSignalQualityCalculator.java
/** * Compute the signal quality between the two sets of data. * Input maps have a key of the probe id, such as "AA799301_Probe1" * Map value per key is a double[] of values. * <p/>//from w w w. j a va 2s. c o m * One idea Fabien had is to write a TSV file that contains * ModelID Feature p-Value * for all models and then do various post processing on that one * file using a separate tool instead of doing the entire calculation here? * * @param model the model we are writing * @param modelId the model id we are calculating the signal quality for * @param classToDataMapMap map of classes + "-training"/"-validation" to the * map of feature to raw data. */ @Override public void calculatePValues(final BDVModel model, final String modelId, final String[] allClasses, final Map<String, Map<String, double[]>> classToDataMapMap) { // Call calculate in AbstractSignalQualityCalculator first super.calculatePValues(model, modelId, allClasses, classToDataMapMap); // System.out.println("(simple) Calculating signal quality for model " + modelId); // Calculate pValues and write them // Aquire the ENITRE feature set from both training and validation allFeaturesSet.clear(); for (final String sampleClass : allClasses) { final Map<String, double[]> trainingDataMap = classToDataMapMap.get(sampleClass + "-training"); final Map<String, double[]> validationDataMap = classToDataMapMap.get(sampleClass + "-validation"); allFeaturesSet.addAll(trainingDataMap.keySet()); allFeaturesSet.addAll(validationDataMap.keySet()); } for (final String featureId : allFeaturesSet) { double[] trainingData = null; double[] validationData = null; try { data.clear(); data.put("model-id", modelId); data.put("feature", featureId); for (int classIndex = 0; classIndex < allClasses.length; classIndex++) { final String classId = allClasses[classIndex]; final String classIdAppend = "[" + CLASS_TRANSLATION[classIndex] + "]"; trainingData = classToDataMapMap.get(classId + "-training").get(featureId); if (trainingData == null) { trainingData = ArrayUtils.EMPTY_DOUBLE_ARRAY; } validationData = classToDataMapMap.get(classId + "-validation").get(featureId); if (validationData == null) { validationData = ArrayUtils.EMPTY_DOUBLE_ARRAY; } if (trainingData.length == 0 || validationData.length == 0) { if (trainingData.length == 0) { writeData(String.format("# modelId=%s, featureId=%s, class=%s has no training data.", modelId, featureId, classId)); } if (validationData.length == 0) { writeData(String.format("# modelId=%s, featureId=%s, class=%s has no validation data.", modelId, featureId, classId)); } continue; } // System.out.printf("Training data size = %s Validation data size = %d%n", // trainingData.length, validationData.length); if (dataToPvalueScript == null) { dataToPvalueScript = RScript.createFromResource("rscripts/data_to_pvalue.R"); } dataToPvalueScript.setInput("x", trainingData); dataToPvalueScript.setInput("y", validationData); dataToPvalueScript.setOutput("p_value", RDataObjectType.Double); dataToPvalueScript.setOutput("test_statistic", RDataObjectType.Double); dataToPvalueScript.setOutput("sum_rank_features", RDataObjectType.DoubleArray); dataToPvalueScript.execute(); final double pvalue = dataToPvalueScript.getOutputDouble("p_value"); final double testStatistic = dataToPvalueScript.getOutputDouble("test_statistic"); final double[] sumRankFeatures = dataToPvalueScript.getOutputDoubleArray("sum_rank_features"); data.put("p-value" + classIdAppend, pvalue); data.put("test-statistics" + classIdAppend, testStatistic); data.put("t1" + classIdAppend, sumRankFeatures[0]); data.put("t2" + classIdAppend, sumRankFeatures[1]); if (model != null) { data.put("mean" + classIdAppend, model.getTrainingSetMeanValue(featureId)); data.put("range" + classIdAppend, model.getTrainingSetRangeValue(featureId)); } else { data.put("mean" + classIdAppend, -1d); data.put("range" + classIdAppend, -1d); } data.put("training-values" + classIdAppend, trainingData); data.put("validation-values" + classIdAppend, validationData); } writeData(data); } catch (Exception e) { LOG.error("Could not calculate KS-test. " + "Error data written to comment in output file"); writeData(String.format("# ERROR WITH %s.%s.trainingData=%s", modelId, featureId, ArrayUtils.toString(trainingData))); writeData(String.format("# ERROR WITH %s.%s.validationData=%s", modelId, featureId, ArrayUtils.toString(validationData))); } } }