Example usage for org.apache.commons.lang StringUtils strip

List of usage examples for org.apache.commons.lang StringUtils strip

Introduction

In this page you can find the example usage for org.apache.commons.lang StringUtils strip.

Prototype

public static String strip(String str) 

Source Link

Document

Strips whitespace from the start and end of a String.

Usage

From source file:ubic.gemma.datastructure.matrix.ExpressionDataWriterUtils.java

/**
 * Replaces spaces and hyphens with underscores.
 * //from   ww w  .j  ava2 s.co  m
 * @param factor
 * @param factorValue
 * @return
 */
public static String constructFactorValueName(FactorValue factorValue) {

    StringBuilder buf = new StringBuilder();

    if (factorValue.getCharacteristics().size() > 0) {
        for (Characteristic c : factorValue.getCharacteristics()) {
            buf.append(StringUtils.strip(c.getValue()));
            if (factorValue.getCharacteristics().size() > 1)
                buf.append(" | ");
        }
    } else if (factorValue.getMeasurement() != null) {
        buf.append(factorValue.getMeasurement().getValue());
    } else if (StringUtils.isNotBlank(factorValue.getValue())) {
        buf.append(StringUtils.strip(factorValue.getValue()));
    }

    String matchedFactorValue = buf.toString();

    matchedFactorValue = matchedFactorValue.trim();
    matchedFactorValue = matchedFactorValue.replaceAll("-", "_");
    matchedFactorValue = matchedFactorValue.replaceAll("\\s", "_");
    return matchedFactorValue;
}

From source file:ubic.gemma.genome.gene.service.GeneSearchServiceImpl.java

/**
 * Search for multiple genes at once. This attempts to limit the number of genes per query to only one.
 * /*from ww w  .ja va2 s  .  c  o m*/
 * @param query A list of gene names (symbols), one per line.
 * @param taxonId
 * @return map with each gene-query as a key and a collection of the search-results as the value
 * @throws IOException
 */
@Override
public Map<String, Collection<GeneValueObject>> searchMultipleGenesGetMap(String query, Long taxonId)
        throws IOException {
    Taxon taxon = taxonService.load(taxonId);
    BufferedReader reader = new BufferedReader(new StringReader(query));
    String line = null;
    int genesAdded = 0;

    Map<String, Collection<GeneValueObject>> queryToGenes = new HashMap<String, Collection<GeneValueObject>>();
    while ((line = reader.readLine()) != null) {

        line = StringUtils.strip(line);
        queryToGenes.put(line, new HashSet<GeneValueObject>());
    }

    reader = new BufferedReader(new StringReader(query));
    while ((line = reader.readLine()) != null) {
        if (StringUtils.isBlank(line))
            continue;
        if (genesAdded >= MAX_GENES_PER_QUERY) {
            log.warn("Too many genes, stopping");
            break;
        }
        line = StringUtils.strip(line);
        SearchSettings settings = SearchSettingsImpl.geneSearch(line, taxon);
        List<SearchResult> geneSearchResults = searchService.search(settings).get(Gene.class); // drops
        // predicted gene
        // results

        // FIXME inform the user (on the client!) if there are some that don't have results.
        if (geneSearchResults == null || geneSearchResults.isEmpty()) {
            log.warn("No gene results for gene with id: " + line);
        } else if (geneSearchResults.size() == 1) { // Just one result so add it
            Gene g = (Gene) geneSearchResults.iterator().next().getResultObject();
            queryToGenes.get(line).add(new GeneValueObject(g));
            genesAdded++;
        } else { // Many results need to find best if possible
            Collection<Gene> notExactMatch = new HashSet<Gene>();
            Collection<GeneValueObject> sameTaxonMatch = new HashSet<GeneValueObject>();

            Boolean foundMatch = false;

            // Usually if there is more than 1 results the search term was a official symbol and picked up matches
            // like grin1, grin2, grin3, grin (given the search term was grin)
            for (SearchResult sr : geneSearchResults) {
                Gene srGene = (Gene) sr.getResultObject();
                if (srGene.getOfficialSymbol().equalsIgnoreCase(line)) {
                    queryToGenes.get(line).add(new GeneValueObject(srGene));
                    genesAdded++;
                    foundMatch = true;
                    break; // found so return
                } else if (srGene.getTaxon().equals(taxon)) {
                    sameTaxonMatch.add(new GeneValueObject(srGene));
                } else
                    notExactMatch.add(srGene);
            }

            // if no exact match found add all of them of the same taxon and toss a warning
            if (!foundMatch) {

                if (!sameTaxonMatch.isEmpty()) {

                    queryToGenes.get(line).addAll(sameTaxonMatch);

                    log.warn(sameTaxonMatch.size() + " genes found for query id = " + line
                            + ". Genes found are: " + sameTaxonMatch + ". Adding All");
                } else {
                    log.warn(notExactMatch.size() + " genes found for query id = " + line
                            + ". Genes found are: " + notExactMatch + ". Adding None");
                }
            }
        }
    }

    return queryToGenes;
}

From source file:ubic.gemma.loader.entrez.pubmed.PubMedXMLParser.java

/**
 * Fill in information about the book: Publisher, Editor(s), Publication year
 * //from w ww.  j a  v a 2  s  .  c  o  m
 * @param bibRef
 * @param record
 * @return
 * @throws IOException
 */
private void processBookRecord(BibliographicReference bibRef, Node record) throws IOException {

    NodeList recordNodes = record.getChildNodes();
    for (int p = 0; p < recordNodes.getLength(); p++) {
        Node item = recordNodes.item(p);
        if (!(item instanceof Element)) {
            continue;
        }

        String name = item.getNodeName();
        if (name.equals("ArticleTitle")) {
            // this is the title of the chapter.
            bibRef.setTitle(StringUtils.strip(XMLUtils.getTextValue((Element) item)));
        } else if (name.equals("Book")) {
            processBookInfo(bibRef, item);
        } else if (name.equals("AuthorList")) {
            bibRef.setAuthorList(extractAuthorList(item.getChildNodes()));
        } else if (name.equals("Abstract")) {
            bibRef.setAbstractText("");
            NodeList abstractTextSections = item.getChildNodes();
            for (int q = 0; q < abstractTextSections.getLength(); q++) {
                Node jitem = abstractTextSections.item(q);
                if (!(jitem instanceof Element)) {
                    continue;
                }
                if (jitem.getNodeName().equals("AbstractText")) {
                    bibRef.setAbstractText(
                            bibRef.getAbstractText() + (XMLUtils.getTextValue((Element) jitem)) + " ");
                }

                bibRef.setAbstractText(bibRef.getAbstractText().trim());
            }
        } else if (name.equals("PMID")) {
            processAccession(bibRef, item);
        } else if (name.equals("ContributionDate")) {
            /*
             * Unusual, but happens for books that are updated with new sections. We use this instead of the
             * publication date.
             */
            extractBookPublicationYear(bibRef, item);
        }
    }

}

From source file:ubic.gemma.loader.expression.geo.GeoFamilyParser.java

/**
 * Extract a key and value pair from a line in the format #key = value.
 * //from  www . ja  v a2  s  .c o m
 * @param line.
 * @return Map containing the String key and String value. Return null if it is misformatted.
 */
private Map<String, String> extractKeyValue(String line) {
    if (!line.startsWith("#"))
        throw new IllegalArgumentException("Wrong type of line");
    Map<String, String> result = new HashMap<String, String>();
    String fixed = line.substring(line.indexOf('#') + 1);

    String[] tokens = fixed.split("=", 2);
    if (tokens.length != 2) {
        log.warn("Invalid key-value line, expected an '=' somewhere, got: '" + line + "'");
        return null;
    }
    String key = tokens[0];
    String value = tokens[1];
    key = StringUtils.strip(key);
    value = StringUtils.strip(value);
    result.put(key, value);
    return result;
}

From source file:ubic.gemma.loader.expression.geo.GeoFamilyParser.java

/**
 * Extract a value from a line in the format xxxx=value.
 * /*from w  ww .j a v a2s . c  o m*/
 * @param line
 * @return String following the first occurrence of '=', or null if there is no '=' in the String.
 */
private String extractValue(String line) {
    int eqIndex = line.indexOf('=');
    if (eqIndex < 0) {
        return null; // that's okay, there are lines that just indicate the end of sections.
    }

    return StringUtils.strip(line.substring(eqIndex + 1));
}

From source file:ubic.gemma.loader.expression.geo.service.GeoBrowser.java

/**
 * Retrieves and parses tab delimited file from GEO. File contains pageSize
 * GEO records starting from startPage./*from  w w  w  .  ja  v a2  s .c o  m*/
 * 
 * @param startPage
 * @param pageSize
 * @return list of GeoRecords
 * @throws IOException
 * @throws ParseException
 */
public List<GeoRecord> getRecentGeoRecords(int startPage, int pageSize) throws IOException, ParseException {

    if (startPage < 0 || pageSize < 0)
        throw new IllegalArgumentException("Values must be greater than zero ");

    List<GeoRecord> records = new ArrayList<GeoRecord>();
    URL url = null;
    try {
        url = new URL(GEO_BROWSE_URL + startPage + GEO_BROWSE_SUFFIX + pageSize);
    } catch (MalformedURLException e) {
        throw new RuntimeException("Invalid URL " + url, e);
    }

    InputStream is = null;

    try {
        URLConnection conn = url.openConnection();
        conn.connect();
        is = conn.getInputStream();
    } catch (IOException e) {
        log.error(e, e);
        throw e;
    }

    // We are getting a tab delimited file.
    BufferedReader br = new BufferedReader(new InputStreamReader(is));

    // Read columns headers.
    String headerLine = br.readLine();
    String[] headers = StringUtil.csvSplit(headerLine);

    // Map column names to their indices (handy later).
    Map<String, Integer> columnNameToIndex = new HashMap<String, Integer>();
    for (int i = 0; i < headers.length; i++) {
        columnNameToIndex.put(headers[i], i);
    }

    // Read the rest of the file.
    String line = null;
    while ((line = br.readLine()) != null) {
        String[] fields = StringUtil.csvSplit(line);

        GeoRecord geoRecord = new GeoRecord();
        geoRecord.setGeoAccession(fields[columnNameToIndex.get("Accession")]);
        geoRecord.setTitle(StringUtils
                .strip(fields[columnNameToIndex.get("Title")].replaceAll(FLANKING_QUOTES_REGEX, "")));

        String sampleCountS = fields[columnNameToIndex.get("Sample Count")];
        if (StringUtils.isNotBlank(sampleCountS)) {
            try {
                geoRecord.setNumSamples(Integer.parseInt(sampleCountS));
            } catch (NumberFormatException e) {
                throw new RuntimeException("Could not parse sample count: " + sampleCountS);
            }
        } else {
            log.warn("No sample count for " + geoRecord.getGeoAccession());
        }
        geoRecord
                .setContactName(fields[columnNameToIndex.get("Contact")].replaceAll(FLANKING_QUOTES_REGEX, ""));

        String[] taxons = fields[columnNameToIndex.get("Taxonomy")].replaceAll(FLANKING_QUOTES_REGEX, "")
                .split(";");
        geoRecord.getOrganisms().addAll(Arrays.asList(taxons));

        Date date = DateUtils.parseDate(
                fields[columnNameToIndex.get("Release Date")].replaceAll(FLANKING_QUOTES_REGEX, ""),
                DATE_FORMATS);
        geoRecord.setReleaseDate(date);

        geoRecord.setSeriesType(fields[columnNameToIndex.get("Series Type")]);

        records.add(geoRecord);
    }

    is.close();

    if (records.isEmpty()) {
        log.warn("No records obtained");
    }
    return records;

}

From source file:ubic.gemma.loader.expression.simple.ExperimentalDesignImporterImpl.java

/**
 * This method reads the file line e.g. $Run time : Category=EnvironmentalHistory Type=categorical and creates
 * experimental factors from it and adds them to the experimental design.
 * /*ww  w. jav  a2s  .  c o  m*/
 * @param experimentalDesign Experimental design for this expression experiment
 * @param experimentalFactorFileLines List of strings representing lines from input file containing experimental
 *        factors
 * @param headerFields Sample header line split on tab.
 * @param factorValueLines Lines containing biomaterial names and their factor values
 */
private void addExperimentalFactorsToExperimentalDesign(ExperimentalDesign experimentalDesign,
        List<String> experimentalFactorFileLines, String[] headerFields, List<String> factorValueLines) {

    int maxWait = 0;
    while (!mgedOntologyService.isOntologyLoaded()) {
        try {
            Thread.sleep(1000);
            if (maxWait++ > 100) {
                throw new RuntimeException("MGED is not loaded and gave up waiting");
            }
        } catch (InterruptedException e) {
            e.printStackTrace();
        }
    }

    log.info("Addding experimental factors to experimental design: " + experimentalDesign.getId());

    Collection<OntologyTerm> terms = mgedOntologyService.getMgedTermsByKey("factor");
    if (experimentalDesign.getExperimentalFactors() == null) {
        experimentalDesign.setExperimentalFactors(new HashSet<ExperimentalFactor>());
    }

    Map<String, Set<String>> mapFactorSampleValues = getMapFactorSampleValues(headerFields, factorValueLines);

    for (String experimentalFactorFileLine : experimentalFactorFileLines) {

        // $Run time : Category=EnvironmentalHistory Type=categorical
        String[] experimentalFactorfields = experimentalFactorFileLine.split(":");

        String factorValue = (StringUtils.strip(experimentalFactorfields[0]
                .replaceFirst(Pattern.quote(EXPERIMENTAL_FACTOR_DESCRIPTION_LINE_INDICATOR) + "\\s*", "")))
                        .trim();
        String categoryAndType = StringUtils.strip(experimentalFactorfields[1]);
        String[] categoryAndTypeFields = StringUtils.split(categoryAndType);

        // e.g. Category=EnvironmentalHistory
        String category = categoryAndTypeFields[0];
        // e.g. EnvironmentalHistory
        String categoryValue = StringUtils.split(category, "=")[1];

        ExperimentalFactor experimentalFactorFromFile = ExperimentalFactor.Factory.newInstance();
        experimentalFactorFromFile.setExperimentalDesign(experimentalDesign);
        VocabCharacteristic vc = mgedLookup(categoryValue, terms);

        // e.g. Category=EnvironmentalHistory
        String categoryTypeValue = categoryAndTypeFields[1];
        String factorType = StringUtils.split(categoryTypeValue, "=")[1];

        // vc.setCategory( categoryType );

        experimentalFactorFromFile.setCategory(vc);
        experimentalFactorFromFile.setName(factorValue);
        experimentalFactorFromFile.setDescription(factorValue);
        experimentalFactorFromFile.setType(
                factorType.equalsIgnoreCase("CATEGORICAL") ? FactorType.CATEGORICAL : FactorType.CONTINUOUS);

        addFactorValuesToExperimentalFactor(experimentalFactorFromFile, mapFactorSampleValues, factorType);

        if (!checkForDuplicateExperimentalFactorOnExperimentalDesign(experimentalDesign,
                experimentalFactorFromFile)) {
            // assert experimentalFactorFromFile.getId() != null;
            experimentalDesign.getExperimentalFactors().add(experimentalFactorFromFile);
            // here is was the update
            log.debug("Added experimental factor value " + experimentalFactorFromFile
                    + " to experimental design " + experimentalDesign);

        }
    }

}

From source file:ubic.gemma.loader.expression.simple.ExperimentalDesignImporterImpl.java

/**
 * Add the factor values to the biomaterial
 * //from www .j  a v  a  2  s .c  o  m
 * @param experiment
 * @param experimentBioMaterials Current expression experiment's biomaterials.
 * @param experimentalDesign experimental design
 * @param factorValueLines Lines from file containing factor values and biomaterial ids
 * @param headerFields
 * @return Collection of biomaterials associated with this experiment, this is returned as the biomaterial is in a
 *         bioassay (first one retrieved)
 */
private Collection<BioMaterial> addFactorValuesToBioMaterialsInExpressionExperiment(
        ExpressionExperiment experiment, Collection<BioMaterial> experimentBioMaterials,
        ExperimentalDesign experimentalDesign, List<String> factorValueLines, String[] headerFields) {
    log.debug("Adding factors values to biomaterials: " + experimentalDesign.getId());
    Collection<ExperimentalFactor> experimentalFactorsInExperiment = experimentalDesign
            .getExperimentalFactors();
    Collection<BioMaterial> biomaterialsWithFactorValuesInExperiment = new HashSet<BioMaterial>();

    Collection<BioMaterial> seenBioMaterials = new HashSet<BioMaterial>();

    Map<ExperimentalFactor, Collection<BioMaterial>> factorsAssociatedWithBioMaterials = new HashMap<ExperimentalFactor, Collection<BioMaterial>>();

    for (String factorValueLine : factorValueLines) {
        String[] factorValueFields = StringUtils.splitPreserveAllTokens(factorValueLine, "\t");

        String externalId = null;
        boolean hasExternalId = headerFields[1].toUpperCase().equals("EXTERNALID");
        if (hasExternalId) {
            externalId = factorValueFields[1];
        }
        BioMaterial currentBioMaterial = getBioMaterialFromExpressionExperiment(experiment,
                experimentBioMaterials, factorValueFields[0], externalId);

        if (currentBioMaterial == null) {
            throw new IllegalStateException("No biomaterial for " + factorValueFields[0]);
        }

        if (seenBioMaterials.contains(currentBioMaterial)) {
            throw new IllegalArgumentException(
                    "A biomaterial occurred more than once in the file: " + currentBioMaterial);
        }

        seenBioMaterials.add(currentBioMaterial);

        int start = 1;
        if (hasExternalId) {
            start = 2;
        }

        for (int i = start; i < factorValueFields.length; i++) {
            ExperimentalFactor currentExperimentalFactor = null;
            String currentExperimentalFactorName = StringUtils.strip(headerFields[i]);

            FactorValue currentFactorValue = null;
            String currentFactorValueValue = StringUtils.strip(factorValueFields[i]);

            if (StringUtils.isBlank(currentFactorValueValue)) {
                // Missing value. Note that catching 'NA' etc. is hard, because they could be valid strings.
                continue;
            }

            for (ExperimentalFactor experimentalFactor : experimentalFactorsInExperiment) {
                if (experimentalFactor.getName().equals(currentExperimentalFactorName)) {
                    currentExperimentalFactor = experimentalFactor;
                }
            }

            if (currentExperimentalFactor == null)
                throw new IllegalStateException("No factor matches column " + currentExperimentalFactorName);

            Collection<FactorValue> factorValuesInCurrentExperimentalFactor = currentExperimentalFactor
                    .getFactorValues();

            for (FactorValue factorValue : factorValuesInCurrentExperimentalFactor) {
                if (factorValue.getValue().trim().equalsIgnoreCase(currentFactorValueValue.trim())) {
                    currentFactorValue = factorValue;
                }
            }

            if (currentFactorValue == null) {
                log.error("Current factor value not found " + currentExperimentalFactor
                        + currentFactorValueValue);
            } else {
                if (!checkForDuplicateFactorOnBioMaterial(currentBioMaterial, currentFactorValue)) {
                    currentBioMaterial.getFactorValues().add(currentFactorValue);
                } else {
                    // already got warned.
                }
            }
            log.debug("Added factor value " + currentFactorValue + " to biomaterial " + currentBioMaterial);
            biomaterialsWithFactorValuesInExperiment.add(currentBioMaterial);

            if (!factorsAssociatedWithBioMaterials.containsKey(currentExperimentalFactor)) {
                factorsAssociatedWithBioMaterials.put(currentExperimentalFactor, new HashSet<BioMaterial>());
            }
            factorsAssociatedWithBioMaterials.get(currentExperimentalFactor).add(currentBioMaterial);

        }

    }

    /*
     * Check if every biomaterial got used. Worth a warning, at least.
     */
    for (ExperimentalFactor ef : factorsAssociatedWithBioMaterials.keySet()) {
        if (!factorsAssociatedWithBioMaterials.get(ef).containsAll(experimentBioMaterials)) {
            log.warn(
                    "File did not contain values for all factor - biomaterial combinations: Missing at least one for "
                            + ef + " [populated " + factorsAssociatedWithBioMaterials.get(ef).size() + "/"
                            + experimentBioMaterials.size() + " ]");
        }
    }

    return biomaterialsWithFactorValuesInExperiment;
}

From source file:ubic.gemma.loader.expression.simple.ExperimentalDesignImporterImpl.java

/**
 * Get a map of experimental values keyed on experimental factor name
 * /*from ww  w. j  av a  2 s .  co  m*/
 * @param headerFields
 * @param factorValueLines
 * @return map of experimental factor values keyed on experimental factor
 */
private Map<String, Set<String>> getMapFactorSampleValues(String[] headerFields,
        List<String> factorValueLines) {
    Map<String, Set<String>> factorSampleValues = new HashMap<String, Set<String>>();
    for (String factorValueLine : factorValueLines) {
        String[] factorValueFields = StringUtils.splitPreserveAllTokens(factorValueLine, "\t");

        for (int i = 1; i < headerFields.length; i++) {

            // get the key
            String value = headerFields[i];
            value = StringUtils.strip(value);
            String factorValue = StringUtils.strip(factorValueFields[i]);
            Set<String> listFactorValues = factorSampleValues.get(value);
            if (listFactorValues == null) {
                listFactorValues = new HashSet<String>();
            }
            listFactorValues.add(factorValue);
            factorSampleValues.put(value, listFactorValues);

        }

    }
    return factorSampleValues;

}

From source file:ubic.gemma.loader.expression.simple.ExperimentalDesignImporterImpl.java

/**
 * Validates that the input for experimental factors is correct: Experimental factor file line should be for e.g.
 * #$Run time : Category=EnvironmentalHistory Type=categorical Checks there is a colon, between experimental factor
 * and category and that category is correctly formatted.
 * //from w  w  w .j  a  v  a  2 s  . c  o  m
 * @param sampleHeaderLine Lines in file corresponding to order of experimental factors
 * @param experimentalFactorList The lines in the file corresponding to experimental factors.
 * @throws IOException Experimental factor lines were not correctly format.
 */
private void validateExperimentalFactorFileContent(List<String> experimentalFactorLines,
        String sampleHeaderLine) throws IOException {
    Set<String> experimentalFactorValueNames = new HashSet<String>();
    // validate experimental factor lines
    for (String line : experimentalFactorLines) {
        String[] fields = line.split(":");
        if (fields.length != 2) {
            throw new IOException(
                    "EF description must have two fields with a single ':' in between (" + line + ")");
        }
        String factorName = StringUtils.strip(fields[0]
                .replaceFirst(Pattern.quote(EXPERIMENTAL_FACTOR_DESCRIPTION_LINE_INDICATOR) + "\\s*", ""));

        experimentalFactorValueNames.add(factorName);
        String category = StringUtils.strip(fields[1]);

        String[] descriptions = StringUtils.split(category);

        if (descriptions.length != 2) {
            throw new IOException("EF details should have the format 'Category=CATEGORY Type=TYPE'");
        }

    }

    validateSampleHeaderFileContent(experimentalFactorValueNames, experimentalFactorLines.size(),
            sampleHeaderLine);

}