List of usage examples for org.apache.commons.lang StringUtils strip
public static String strip(String str)
Strips whitespace from the start and end of a String.
From source file:ubic.gemma.loader.expression.simple.ExperimentalDesignImporterImpl.java
/** * Validates that the sample header is correctly formatted. Checks that the experimental factors defined in the * header match those in the experimental factor file lines. * //from www .jav a 2s .com * @param experimentalFactorValueNames * @param numberOfExperimentalFactors * @param sampleHeaderLine * @throws IOException Validation fails. */ private void validateSampleHeaderFileContent(Set<String> experimentalFactorValueNames, Integer numberOfExperimentalFactors, String sampleHeaderLine) throws IOException { String[] headerFields = StringUtils.splitPreserveAllTokens(sampleHeaderLine, "\t"); // we might have the ids, and the external id. if (headerFields.length > numberOfExperimentalFactors + NUMBER_OF_EXTRA_COLUMNS_ALLOWED) { throw new IOException("Expected " + (numberOfExperimentalFactors + NUMBER_OF_EXTRA_COLUMNS_ALLOWED) + " columns based on EF descriptions (plus id column), got " + headerFields.length); } for (int i = 1; i < headerFields.length; i++) { String value = headerFields[i]; value = StringUtils.strip(value); if (value.equals("ExternalID")) { // that's fine. continue; } if (!experimentalFactorValueNames.contains(value)) { throw new IOException("Expected to find an EF matching the column heading '" + value + "'"); } } }
From source file:ubic.gemma.loader.genome.FastaParser.java
/** * <pre>/* w w w . j av a2 s.c o m*/ * Affymetrix targets or collapsed sequence target:array:probeset; * Affymetrix "style" file target:probename * Affymetrix probe probe:array:probeset:xcoord:ycoord; Interrogation_Position=XXXX; Antisense; * Affymetrix consensus/exemplar exemplar:array:probeset; gb|accession; gb:accession /DEF=Homo sapiens metalloprotease-like, disintegrin-like, cysteine-rich protein 2 delta (ADAM22) mRNA, alternative splice product, complete cds. /FEA=mRNA /GEN=ADAM22 /PROD=metalloprotease-like, * Affymetrix-like format array:probe or other string containing ':'. * </pre> * * @param bioSequence * @param header * @return */ private boolean parseAffyHeader(BioSequence bioSequence, String header) { // affymetrix format String[] split = StringUtils.split(header, ":;"); String firstTag = StringUtils.removeStart(split[0], ">"); if (firstTag.equals("probe")) { bioSequence.setName(split[1] + ":" + split[2] + ":" + split[3] + ":" + split[4]); } else if (firstTag.equals("target")) { // split[1] = array name or probe name // split[2] = probe name if (split.length > 2) { bioSequence.setName(split[2]); } else { bioSequence.setName(split[1]); } } else if (firstTag.equals("exemplar")) { bioSequence.setName(split[1] + ":" + split[2]); bioSequence.setDescription(split[3]); } else { // This is the case if the xxxx:xxxx format is used on non-affy bioSequence.setName(StringUtils.removeStart(header, ">")); return true; } for (String string : split) { string = StringUtils.strip(string); // fill in the sequence database entry if (string.startsWith("gb|") || string.startsWith("gb:")) { String[] splits = StringUtils.split(string, ":|"); String genbankAcc = splits[1]; DatabaseEntry genbank = ExternalDatabaseUtils.getGenbankAccession(genbankAcc); bioSequence.setName(genbank.getAccession()); bioSequence.setSequenceDatabaseEntry(genbank); if (log.isDebugEnabled()) log.debug("Got genbank accession " + genbankAcc + " for " + bioSequence.getName()); break; } } return true; }
From source file:ubic.gemma.loader.genome.GffParser.java
@Override public Gene parseOneLine(String line) { if (this.taxon == null) { throw new IllegalStateException("You must set the taxon first"); }//from w w w .j a va 2 s . c o m String[] fields = StringUtils.splitPreserveAllTokens(line, '\t'); Gene newGene = Gene.Factory.newInstance(); GeneProduct geneProduct = GeneProduct.Factory.newInstance(); String seqName = fields[0]; // chromosome // String source = fields[1]; String featureType = fields[2]; long start = Long.parseLong(fields[3]); long end = Long.parseLong(fields[4]); int length = (int) (end - start); String strand = fields[6]; String attributes = fields[8]; newGene.setDescription(featureType); geneProduct.setDescription(featureType); String[] attFields = StringUtils.splitPreserveAllTokens(attributes, ';'); for (int i = 0; i < attFields.length; i++) { String f = attFields[i]; if (f == null || f.length() == 0) { continue; } f = StringUtils.strip(f); log.debug(f); String[] subf = StringUtils.split(f, '='); if (subf.length != 2) { throw new IllegalArgumentException("Couldn't parse '" + f + "'"); } String ti = subf[0]; String val = subf[1]; if (ti.equals("ID")) { val = val.replaceAll("\"", ""); newGene.setName(val); newGene.setOfficialSymbol(val); geneProduct.setName(val); } else if (ti.equals("ACC")) { // don't know what database! } } // String comments = fields[9]; Chromosome chromosome = Chromosome.Factory.newInstance(); chromosome.setName(seqName); chromosome.setTaxon(taxon); PhysicalLocation location = PhysicalLocation.Factory.newInstance(); location.setChromosome(chromosome); location.setNucleotide(start); location.setNucleotideLength(length); location.setBin(SequenceBinUtils.binFromRange(location.getNucleotide().intValue(), location.getNucleotide().intValue() + location.getNucleotideLength().intValue())); location.setStrand(strand); geneProduct.setPhysicalLocation(location); geneProduct.setGene(newGene); newGene.setTaxon(taxon); newGene.setPhysicalLocation(location); newGene.getProducts().add(geneProduct); return newGene; }
From source file:ubic.gemma.loader.genome.taxon.TaxonParser.java
@Override public Taxon parseOneLine(String line) { String[] fields = StringUtils.splitPreserveAllTokens(line, '|'); int ncbiid = Integer.parseInt(StringUtils.strip(fields[0])); if (!results.containsKey(ncbiid)) { Taxon t = Taxon.Factory.newInstance(); t.setNcbiId(ncbiid);//from w w w . j ava 2s . c o m t.setIsGenesUsable(false); t.setIsSpecies(true); results.put(ncbiid, t); } String tag = StringUtils.strip(fields[3]); if (tag.equals("scientific name")) { results.get(ncbiid).setScientificName(StringUtils.strip(fields[1])); } else if (tag.equals("genbank common name")) { results.get(ncbiid).setCommonName(fields[1]); } return results.get(ncbiid); }
From source file:ubic.gemma.loader.pazar.PazarParser.java
@Override public PazarRecord parseOneLine(String line) { if (line == null || line.isEmpty()) return null; if (line.startsWith("TF_PAZAR_ID")) return null; String[] fields = StringUtils.splitPreserveAllTokens(line, '\t'); if (fields.length < 2) return null; PazarRecord r = new PazarRecord(); r.setPazarTfId(StringUtils.strip(fields[0])); r.setTfAcc(fields[1]);/*from w ww.j av a 2s .co m*/ r.setSpecies(fields[2]); r.setPazarTargetGeneId(fields[3]); r.setTargetGeneAcc(fields[4]); r.setProject(fields[6]); r.setPubMedId(fields[7]); // r.setMethod(fields[8); return r; }
From source file:ubic.gemma.ontology.providers.MgedOntologyService.java
/** * @param key/* w ww . ja v a 2s . c o m*/ * @return */ public Collection<OntologyTerm> getMgedTermsByKey(String key) { Collection<OntologyTerm> t = keyToTermListCache.get(key); if (t == null) { URL termListUrl = keyToTermListUrl.get(key); if (termListUrl == null) { log.warn("Unknown term list key '" + key + "'; returning general term list"); t = getUsefulMgedTerms(); } else { t = new HashSet<OntologyTerm>(); try { Collection<String> wantedTerms = new ArrayList<String>(); BufferedReader reader = new BufferedReader(new InputStreamReader(termListUrl.openStream())); String line; while ((line = reader.readLine()) != null) { if (line.startsWith("#")) continue; wantedTerms.add(StringUtils.strip(line)); } reader.close(); for (OntologyTerm term : getUsefulMgedTerms()) { if (wantedTerms.contains(term.getTerm())) t.add(term); } } catch (IOException ioe) { log.error("Error reading from term list '" + termListUrl + "'; returning general term list", ioe); t = getUsefulMgedTerms(); } } t = Collections.unmodifiableCollection(t); keyToTermListCache.put(key, t); } return t; }
From source file:ubic.gemma.search.GeneSetSearchImpl.java
@Override public GeneSet findByGoId(String goId, Taxon taxon) { OntologyTerm goTerm = GeneOntologyServiceImpl.getTermForId(StringUtils.strip(goId)); if (goTerm == null) { return null; }/*w w w . ja va2 s. c o m*/ // if taxon is null, this returns a geneset with genes from different taxons return goTermToGeneSet(goTerm, taxon); }
From source file:ubic.gemma.search.GeneSetSearchImpl.java
@Override public Collection<GeneSet> findByGoTermName(String goTermName, Taxon taxon, Integer maxGoTermsProcessed, Integer maxGeneSetSize) { Collection<? extends OntologyResource> matches = this.geneOntologyService .findTerm(StringUtils.strip(goTermName)); Collection<GeneSet> results = new HashSet<GeneSet>(); Integer termsProcessed = 0;/*from w w w . j a v a2 s. co m*/ for (OntologyResource t : matches) { GeneSet converted = goTermToGeneSet(t, taxon, maxGeneSetSize); // converted will be null if its size is more than maxGeneSetSize if (converted != null) { results.add(converted); if (maxGoTermsProcessed != null) { termsProcessed++; if (termsProcessed > maxGoTermsProcessed) { return results; } } } } return results; }
From source file:ubic.gemma.search.GeneSetSearchImpl.java
@Override public Collection<GeneSetValueObject> findByPhenotypeName(String phenotypeQuery, Taxon taxon) { Collection<CharacteristicValueObject> phenotypes = phenotypeAssociationManagerService .searchOntologyForPhenotypes(StringUtils.strip(phenotypeQuery), null); Collection<GeneSetValueObject> results = new HashSet<GeneSetValueObject>(); StopWatch timer = new StopWatch(); timer.start();//from www.j a v a 2 s . c o m log.debug(" Converting CharacteristicValueObjects collection(size:" + phenotypes.size() + ") into GeneSets for phenotype query " + phenotypeQuery); int convertedCount = 0; for (CharacteristicValueObject cvo : phenotypes) { GeneSetValueObject converted = phenotypeAssociationToGeneSet(cvo, taxon); if (converted != null) { convertedCount++; results.add(converted); } } log.info("added " + convertedCount + " results"); if (timer.getTime() > 1000) { log.info("Converted CharacteristicValueObjects collection(size:" + phenotypes.size() + ") into GeneSets for phenotype query " + phenotypeQuery + " in " + timer.getTime() + "ms"); } return results; }
From source file:ubic.gemma.search.GeneSetSearchImpl.java
@Override public Collection<GeneSet> findByName(String name) { return geneSetService.findByName(StringUtils.strip(name)); }