Example usage for org.apache.commons.lang StringUtils strip

List of usage examples for org.apache.commons.lang StringUtils strip

Introduction

In this page you can find the example usage for org.apache.commons.lang StringUtils strip.

Prototype

public static String strip(String str) 

Source Link

Document

Strips whitespace from the start and end of a String.

Usage

From source file:ubic.gemma.loader.expression.simple.ExperimentalDesignImporterImpl.java

/**
 * Validates that the sample header is correctly formatted. Checks that the experimental factors defined in the
 * header match those in the experimental factor file lines.
 * //from www .jav a  2s .com
 * @param experimentalFactorValueNames
 * @param numberOfExperimentalFactors
 * @param sampleHeaderLine
 * @throws IOException Validation fails.
 */
private void validateSampleHeaderFileContent(Set<String> experimentalFactorValueNames,
        Integer numberOfExperimentalFactors, String sampleHeaderLine) throws IOException {
    String[] headerFields = StringUtils.splitPreserveAllTokens(sampleHeaderLine, "\t");

    // we might have the ids, and the external id.
    if (headerFields.length > numberOfExperimentalFactors + NUMBER_OF_EXTRA_COLUMNS_ALLOWED) {
        throw new IOException("Expected " + (numberOfExperimentalFactors + NUMBER_OF_EXTRA_COLUMNS_ALLOWED)
                + " columns based on EF descriptions (plus id column), got " + headerFields.length);
    }

    for (int i = 1; i < headerFields.length; i++) {

        String value = headerFields[i];

        value = StringUtils.strip(value);

        if (value.equals("ExternalID")) {
            // that's fine.
            continue;
        }

        if (!experimentalFactorValueNames.contains(value)) {
            throw new IOException("Expected to find an EF matching the column heading '" + value + "'");
        }

    }

}

From source file:ubic.gemma.loader.genome.FastaParser.java

/**
 * <pre>/*  w  w  w . j  av  a2  s.c o  m*/
 *        Affymetrix targets or collapsed sequence     target:array:probeset;
 *        Affymetrix &quot;style&quot; file            target:probename
 *        Affymetrix probe                             probe:array:probeset:xcoord:ycoord; Interrogation_Position=XXXX; Antisense;
 *        Affymetrix consensus/exemplar                exemplar:array:probeset; gb|accession; gb:accession /DEF=Homo sapiens metalloprotease-like, disintegrin-like, cysteine-rich protein 2 delta (ADAM22) mRNA, alternative splice product, complete cds.  /FEA=mRNA /GEN=ADAM22 /PROD=metalloprotease-like,
 *        Affymetrix-like format                       array:probe or other string containing ':'.
 * </pre>
 * 
 * @param bioSequence
 * @param header
 * @return
 */
private boolean parseAffyHeader(BioSequence bioSequence, String header) {
    // affymetrix format
    String[] split = StringUtils.split(header, ":;");

    String firstTag = StringUtils.removeStart(split[0], ">");
    if (firstTag.equals("probe")) {
        bioSequence.setName(split[1] + ":" + split[2] + ":" + split[3] + ":" + split[4]);
    } else if (firstTag.equals("target")) {
        // split[1] = array name or probe name
        // split[2] = probe name
        if (split.length > 2) {
            bioSequence.setName(split[2]);
        } else {
            bioSequence.setName(split[1]);
        }

    } else if (firstTag.equals("exemplar")) {
        bioSequence.setName(split[1] + ":" + split[2]);
        bioSequence.setDescription(split[3]);
    } else {
        // This is the case if the xxxx:xxxx format is used on non-affy
        bioSequence.setName(StringUtils.removeStart(header, ">"));
        return true;
    }

    for (String string : split) {

        string = StringUtils.strip(string);

        // fill in the sequence database entry
        if (string.startsWith("gb|") || string.startsWith("gb:")) {
            String[] splits = StringUtils.split(string, ":|");
            String genbankAcc = splits[1];
            DatabaseEntry genbank = ExternalDatabaseUtils.getGenbankAccession(genbankAcc);
            bioSequence.setName(genbank.getAccession());
            bioSequence.setSequenceDatabaseEntry(genbank);
            if (log.isDebugEnabled())
                log.debug("Got genbank accession " + genbankAcc + " for " + bioSequence.getName());
            break;
        }

    }
    return true;
}

From source file:ubic.gemma.loader.genome.GffParser.java

@Override
public Gene parseOneLine(String line) {

    if (this.taxon == null) {
        throw new IllegalStateException("You must set the taxon first");
    }//from w w  w  .j  a  va  2 s  .  c  o m

    String[] fields = StringUtils.splitPreserveAllTokens(line, '\t');
    Gene newGene = Gene.Factory.newInstance();
    GeneProduct geneProduct = GeneProduct.Factory.newInstance();

    String seqName = fields[0]; // chromosome
    // String source = fields[1];
    String featureType = fields[2];
    long start = Long.parseLong(fields[3]);
    long end = Long.parseLong(fields[4]);
    int length = (int) (end - start);
    String strand = fields[6];

    String attributes = fields[8];

    newGene.setDescription(featureType);
    geneProduct.setDescription(featureType);

    String[] attFields = StringUtils.splitPreserveAllTokens(attributes, ';');
    for (int i = 0; i < attFields.length; i++) {

        String f = attFields[i];

        if (f == null || f.length() == 0) {
            continue;
        }

        f = StringUtils.strip(f);
        log.debug(f);
        String[] subf = StringUtils.split(f, '=');

        if (subf.length != 2) {
            throw new IllegalArgumentException("Couldn't parse '" + f + "'");
        }

        String ti = subf[0];
        String val = subf[1];

        if (ti.equals("ID")) {
            val = val.replaceAll("\"", "");
            newGene.setName(val);
            newGene.setOfficialSymbol(val);
            geneProduct.setName(val);
        } else if (ti.equals("ACC")) {
            // don't know what database!
        }
    }

    // String comments = fields[9];

    Chromosome chromosome = Chromosome.Factory.newInstance();
    chromosome.setName(seqName);
    chromosome.setTaxon(taxon);

    PhysicalLocation location = PhysicalLocation.Factory.newInstance();
    location.setChromosome(chromosome);
    location.setNucleotide(start);
    location.setNucleotideLength(length);
    location.setBin(SequenceBinUtils.binFromRange(location.getNucleotide().intValue(),
            location.getNucleotide().intValue() + location.getNucleotideLength().intValue()));
    location.setStrand(strand);

    geneProduct.setPhysicalLocation(location);
    geneProduct.setGene(newGene);

    newGene.setTaxon(taxon);
    newGene.setPhysicalLocation(location);
    newGene.getProducts().add(geneProduct);

    return newGene;

}

From source file:ubic.gemma.loader.genome.taxon.TaxonParser.java

@Override
public Taxon parseOneLine(String line) {
    String[] fields = StringUtils.splitPreserveAllTokens(line, '|');

    int ncbiid = Integer.parseInt(StringUtils.strip(fields[0]));

    if (!results.containsKey(ncbiid)) {
        Taxon t = Taxon.Factory.newInstance();
        t.setNcbiId(ncbiid);//from w  w  w  .  j ava 2s .  c  o m
        t.setIsGenesUsable(false);
        t.setIsSpecies(true);
        results.put(ncbiid, t);
    }

    String tag = StringUtils.strip(fields[3]);
    if (tag.equals("scientific name")) {
        results.get(ncbiid).setScientificName(StringUtils.strip(fields[1]));
    } else if (tag.equals("genbank common name")) {
        results.get(ncbiid).setCommonName(fields[1]);
    }

    return results.get(ncbiid);

}

From source file:ubic.gemma.loader.pazar.PazarParser.java

@Override
public PazarRecord parseOneLine(String line) {
    if (line == null || line.isEmpty())
        return null;

    if (line.startsWith("TF_PAZAR_ID"))
        return null;

    String[] fields = StringUtils.splitPreserveAllTokens(line, '\t');
    if (fields.length < 2)
        return null;

    PazarRecord r = new PazarRecord();

    r.setPazarTfId(StringUtils.strip(fields[0]));
    r.setTfAcc(fields[1]);/*from   w  ww.j av  a 2s  .co m*/
    r.setSpecies(fields[2]);
    r.setPazarTargetGeneId(fields[3]);
    r.setTargetGeneAcc(fields[4]);
    r.setProject(fields[6]);
    r.setPubMedId(fields[7]);
    // r.setMethod(fields[8);

    return r;

}

From source file:ubic.gemma.ontology.providers.MgedOntologyService.java

/**
 * @param key/* w  ww  .  ja  v a 2s .  c  o  m*/
 * @return
 */
public Collection<OntologyTerm> getMgedTermsByKey(String key) {
    Collection<OntologyTerm> t = keyToTermListCache.get(key);
    if (t == null) {
        URL termListUrl = keyToTermListUrl.get(key);
        if (termListUrl == null) {
            log.warn("Unknown term list key '" + key + "'; returning general term list");
            t = getUsefulMgedTerms();
        } else {
            t = new HashSet<OntologyTerm>();
            try {
                Collection<String> wantedTerms = new ArrayList<String>();
                BufferedReader reader = new BufferedReader(new InputStreamReader(termListUrl.openStream()));
                String line;
                while ((line = reader.readLine()) != null) {
                    if (line.startsWith("#"))
                        continue;
                    wantedTerms.add(StringUtils.strip(line));
                }
                reader.close();

                for (OntologyTerm term : getUsefulMgedTerms()) {
                    if (wantedTerms.contains(term.getTerm()))
                        t.add(term);
                }
            } catch (IOException ioe) {
                log.error("Error reading from term list '" + termListUrl + "'; returning general term list",
                        ioe);
                t = getUsefulMgedTerms();
            }
        }
        t = Collections.unmodifiableCollection(t);
        keyToTermListCache.put(key, t);
    }
    return t;
}

From source file:ubic.gemma.search.GeneSetSearchImpl.java

@Override
public GeneSet findByGoId(String goId, Taxon taxon) {
    OntologyTerm goTerm = GeneOntologyServiceImpl.getTermForId(StringUtils.strip(goId));

    if (goTerm == null) {
        return null;
    }/*w w w . ja va2 s.  c  o m*/
    // if taxon is null, this returns a geneset with genes from different taxons
    return goTermToGeneSet(goTerm, taxon);
}

From source file:ubic.gemma.search.GeneSetSearchImpl.java

@Override
public Collection<GeneSet> findByGoTermName(String goTermName, Taxon taxon, Integer maxGoTermsProcessed,
        Integer maxGeneSetSize) {
    Collection<? extends OntologyResource> matches = this.geneOntologyService
            .findTerm(StringUtils.strip(goTermName));

    Collection<GeneSet> results = new HashSet<GeneSet>();

    Integer termsProcessed = 0;/*from   w w w  . j a  v  a2 s.  co m*/

    for (OntologyResource t : matches) {
        GeneSet converted = goTermToGeneSet(t, taxon, maxGeneSetSize);
        // converted will be null if its size is more than maxGeneSetSize
        if (converted != null) {
            results.add(converted);

            if (maxGoTermsProcessed != null) {
                termsProcessed++;
                if (termsProcessed > maxGoTermsProcessed) {
                    return results;
                }
            }
        }
    }

    return results;

}

From source file:ubic.gemma.search.GeneSetSearchImpl.java

@Override
public Collection<GeneSetValueObject> findByPhenotypeName(String phenotypeQuery, Taxon taxon) {

    Collection<CharacteristicValueObject> phenotypes = phenotypeAssociationManagerService
            .searchOntologyForPhenotypes(StringUtils.strip(phenotypeQuery), null);

    Collection<GeneSetValueObject> results = new HashSet<GeneSetValueObject>();

    StopWatch timer = new StopWatch();
    timer.start();//from www.j a  v  a  2 s  . c o m
    log.debug(" Converting CharacteristicValueObjects collection(size:" + phenotypes.size()
            + ") into GeneSets for  phenotype query " + phenotypeQuery);
    int convertedCount = 0;
    for (CharacteristicValueObject cvo : phenotypes) {
        GeneSetValueObject converted = phenotypeAssociationToGeneSet(cvo, taxon);
        if (converted != null) {
            convertedCount++;
            results.add(converted);
        }
    }
    log.info("added " + convertedCount + " results");

    if (timer.getTime() > 1000) {
        log.info("Converted CharacteristicValueObjects collection(size:" + phenotypes.size()
                + ") into GeneSets for  phenotype query " + phenotypeQuery + " in " + timer.getTime() + "ms");
    }
    return results;

}

From source file:ubic.gemma.search.GeneSetSearchImpl.java

@Override
public Collection<GeneSet> findByName(String name) {
    return geneSetService.findByName(StringUtils.strip(name));
}