List of usage examples for javax.xml.stream.events Characters getData
public String getData();
From source file:edu.jhu.hlt.concrete.ingesters.webposts.WebPostIngester.java
@Override public Communication fromCharacterBasedFile(final Path path) throws IngestException { if (!Files.exists(path)) throw new IngestException("No file at: " + path.toString()); AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory(); AnalyticUUIDGenerator g = f.create(); Communication c = new Communication(); c.setUuid(g.next());/*from w w w . j a va 2 s.com*/ c.setType(this.getKind()); c.setMetadata(TooledMetadataConverter.convert(this)); try { ExistingNonDirectoryFile ef = new ExistingNonDirectoryFile(path); c.setId(ef.getName().split("\\.")[0]); } catch (NoSuchFileException | NotFileException e) { // might throw if path is a directory. throw new IngestException(path.toString() + " is not a file, or is a directory."); } String content; try (InputStream is = Files.newInputStream(path); BufferedInputStream bin = new BufferedInputStream(is, 1024 * 8 * 8);) { content = IOUtils.toString(bin, StandardCharsets.UTF_8); c.setText(content); } catch (IOException e) { throw new IngestException(e); } try (InputStream is = Files.newInputStream(path); BufferedInputStream bin = new BufferedInputStream(is, 1024 * 8 * 8); BufferedReader reader = new BufferedReader(new InputStreamReader(bin, StandardCharsets.UTF_8));) { XMLEventReader rdr = null; try { rdr = inF.createXMLEventReader(reader); // Below method moves the reader // to the headline end element. Section headline = this.handleBeginning(rdr, content, c); headline.setUuid(g.next()); c.addToSectionList(headline); TextSpan sts = headline.getTextSpan(); LOGGER.debug("headline text: {}", c.getText().substring(sts.getStart(), sts.getEnding())); int sectNumber = 1; int subSect = 0; int currOff = -1; // Big amounts of characters. while (rdr.hasNext()) { XMLEvent nextEvent = rdr.nextEvent(); currOff = nextEvent.getLocation().getCharacterOffset(); // First: see if document is going to end. // If yes: exit. if (nextEvent.isEndDocument()) break; // region // enables ingestion of quotes inside a usenet webpost. // by Tongfei Chen if (nextEvent.isStartElement() && nextEvent.asStartElement().getName().equals(QName.valueOf("QUOTE"))) { Attribute attrQuote = nextEvent.asStartElement() .getAttributeByName(QName.valueOf("PREVIOUSPOST")); String quote = StringEscapeUtils.escapeXml(attrQuote.getValue()); int location = attrQuote.getLocation().getCharacterOffset() + "<QUOTE PREVIOUSPOST=\"".length(); Section quoteSection = new Section(g.next(), "quote") .setTextSpan(new TextSpan(location, location + quote.length())); c.addToSectionList(quoteSection); } // endregion // Check if start element. if (nextEvent.isCharacters()) { Characters chars = nextEvent.asCharacters(); if (!chars.isWhiteSpace()) { String fpContent = chars.getData(); LOGGER.debug("Character offset: {}", currOff); LOGGER.debug("Character based data: {}", fpContent); SimpleImmutableEntry<Integer, Integer> pads = trimSpacing(fpContent); final int tsb = currOff + pads.getKey(); final int tse = currOff + fpContent.replace("\"", """).replace("<", "<") .replace(">", ">").length() - (pads.getValue()); // MAINTAIN CORRECT TEXT SPAN // CANNOT USE StringEscapeUtils.escapeXml because it will escape "'", which // is not escaped in the data // @tongfei LOGGER.debug("Section text: {}", content.substring(tsb, tse)); TextSpan ts = new TextSpan(tsb, tse); String sk; if (subSect == 0) sk = "poster"; else if (subSect == 1) sk = "postdate"; else sk = "post"; Section s = new Section(); s.setKind(sk); s.setTextSpan(ts); s.setUuid(g.next()); List<Integer> intList = new ArrayList<>(); intList.add(sectNumber); intList.add(subSect); s.setNumberList(intList); c.addToSectionList(s); subSect++; } } else if (nextEvent.isEndElement()) { EndElement ee = nextEvent.asEndElement(); currOff = ee.getLocation().getCharacterOffset(); QName name = ee.getName(); String localName = name.getLocalPart(); LOGGER.debug("Hit end element: {}", localName); if (localName.equalsIgnoreCase(POST_LOCAL_NAME)) { LOGGER.debug("Switching to new post."); sectNumber++; subSect = 0; } else if (localName.equalsIgnoreCase(TEXT_LOCAL_NAME)) { // done with document. break; } } } return c; } catch (XMLStreamException | ConcreteException | StringIndexOutOfBoundsException | ClassCastException x) { throw new IngestException(x); } finally { if (rdr != null) try { rdr.close(); } catch (XMLStreamException e) { // not likely. LOGGER.info("Error closing XMLReader.", e); } } } catch (IOException e) { throw new IngestException(e); } }
From source file:act.installer.pubchem.PubchemParser.java
/** * Incrementally parses a stream of XML events from a PubChem file, extracting the next available PC-Compound entry * as a Chemical object./*from ww w .j av a2s . c o m*/ * @param eventReader The xml event reader we are parsing the XML from * @return The constructed chemical * @throws XMLStreamException * @throws XPathExpressionException */ public Chemical extractNextChemicalFromXMLStream(XMLEventReader eventReader) throws XMLStreamException, JaxenException { Document bufferDoc = null; Element currentElement = null; StringBuilder textBuffer = null; /* With help from * http://stackoverflow.com/questions/7998733/loading-local-chunks-in-dom-while-parsing-a-large-xml-file-in-sax-java */ while (eventReader.hasNext()) { XMLEvent event = eventReader.nextEvent(); switch (event.getEventType()) { case XMLStreamConstants.START_ELEMENT: String eventName = event.asStartElement().getName().getLocalPart(); if (COMPOUND_DOC_TAG.equals(eventName)) { // Create a new document if we've found the start of a compound object. bufferDoc = documentBuilder.newDocument(); currentElement = bufferDoc.createElement(eventName); bufferDoc.appendChild(currentElement); } else if (currentElement != null) { // Wait until we've found a compound entry to start slurping up data. // Create a new child element and push down the current pointer when we find a new node. Element newElement = bufferDoc.createElement(eventName); currentElement.appendChild(newElement); currentElement = newElement; } // If we aren't in a PC-Compound tree, we just let the elements pass by. break; case XMLStreamConstants.CHARACTERS: if (currentElement == null) { // Ignore this event if we're not in a PC-Compound tree. continue; } Characters chars = event.asCharacters(); // Ignore only whitespace strings, which just inflate the size of the DOM. Text coalescing makes this safe. if (chars.isWhiteSpace()) { continue; } // Rely on the XMLEventStream to coalesce consecutive text events. Text textNode = bufferDoc.createTextNode(chars.getData()); currentElement.appendChild(textNode); break; case XMLStreamConstants.END_ELEMENT: if (currentElement == null) { // Ignore this event if we're not in a PC-Compound tree. continue; } eventName = event.asEndElement().getName().getLocalPart(); Node parentNode = currentElement.getParentNode(); if (parentNode instanceof Element) { currentElement = (Element) parentNode; } else if (parentNode instanceof Document && eventName.equals(COMPOUND_DOC_TAG)) { // We're back at the top of the node stack! Convert the buffered document into a Chemical. PubchemEntry entry = extractPCCompoundFeatures(bufferDoc); if (entry != null) { return entry.asChemical(); } else { // Skip this entry if we can't process it correctly by resetting the world and continuing on. bufferDoc = null; currentElement = null; } } else { // This should not happen, but is here as a sanity check. throw new RuntimeException(String.format("Parent of XML element %s is of type %d, not Element", currentElement.getTagName(), parentNode.getNodeType())); } break; // TODO: do we care about attributes or other XML structures? } } // Return null when we run out of chemicals, just like readLine(). return null; }
From source file:edu.jhu.hlt.concrete.ingesters.bolt.BoltForumPostIngester.java
@Override public Communication fromCharacterBasedFile(final Path path) throws IngestException { if (!Files.exists(path)) throw new IngestException("No file at: " + path.toString()); AnalyticUUIDGeneratorFactory f = new AnalyticUUIDGeneratorFactory(); AnalyticUUIDGenerator gen = f.create(); Communication c = new Communication(); c.setUuid(gen.next());//from ww w .j av a 2s. c om c.setType(this.getKind()); c.setMetadata(TooledMetadataConverter.convert(this)); try { ExistingNonDirectoryFile ef = new ExistingNonDirectoryFile(path); c.setId(ef.getName().split("\\.")[0]); } catch (NoSuchFileException | NotFileException e) { // might throw if path is a directory. throw new IngestException(path.toString() + " is not a file, or is a directory."); } String content; try (InputStream is = Files.newInputStream(path); BufferedInputStream bin = new BufferedInputStream(is, 1024 * 8 * 8);) { content = IOUtils.toString(bin, StandardCharsets.UTF_8); c.setText(content); } catch (IOException e) { throw new IngestException(e); } try (InputStream is = Files.newInputStream(path); BufferedInputStream bin = new BufferedInputStream(is, 1024 * 8 * 8); BufferedReader reader = new BufferedReader(new InputStreamReader(bin, StandardCharsets.UTF_8));) { XMLEventReader rdr = null; try { rdr = inF.createXMLEventReader(reader); // Below method moves the reader // to the first post element. Section headline = handleHeadline(rdr, content); headline.setUuid(gen.next()); c.addToSectionList(headline); int start = headline.getTextSpan().getStart(); int ending = headline.getTextSpan().getEnding(); if (ending < start) ending = start; // @tongfei: handle empty headlines String htxt = c.getText().substring(start, ending); LOGGER.debug("headline text: {}", htxt); // Section indices. int sectNumber = 1; int subSect = 0; // Move iterator to post start element. this.iterateToPosts(rdr); // Offset pointer. int currOff = -1; SectionFactory sf = new SectionFactory(gen); // First post element. while (rdr.hasNext()) { XMLEvent nextEvent = rdr.nextEvent(); currOff = nextEvent.getLocation().getCharacterOffset(); if (currOff > 0) { int currOffPlus = currOff + 20; int currOffLess = currOff - 20; LOGGER.debug("Offset: {}", currOff); if (currOffPlus < content.length()) LOGGER.debug("Surrounding text: {}", content.substring(currOffLess, currOffPlus)); } // First: see if document is going to end. // If yes: exit. if (nextEvent.isEndDocument()) break; // XMLEvent peeker = rdr.peek(); // Check if start element. if (nextEvent.isStartElement()) { StartElement se = nextEvent.asStartElement(); QName name = se.getName(); final String localName = name.getLocalPart(); LOGGER.debug("Hit start element: {}", localName); //region // Add sections for authors and datetimes for each bolt post // by Tongfei Chen Attribute attrAuthor = se.getAttributeByName(QName.valueOf("author")); Attribute attrDateTime = se.getAttributeByName(QName.valueOf("datetime")); if (attrAuthor != null && attrDateTime != null) { int loc = attrAuthor.getLocation().getCharacterOffset(); int sectAuthorBeginningOffset = loc + "<post author=\"".length(); Section sectAuthor = sf.fromTextSpan(new TextSpan(sectAuthorBeginningOffset, sectAuthorBeginningOffset + attrAuthor.getValue().length()), "author"); c.addToSectionList(sectAuthor); int sectDateTimeBeginningOffset = sectAuthorBeginningOffset + attrAuthor.getValue().length() + " datetime=".length(); Section sectDateTime = sf.fromTextSpan( new TextSpan(sectDateTimeBeginningOffset, sectDateTimeBeginningOffset + attrDateTime.getValue().length()), "datetime"); c.addToSectionList(sectDateTime); } //endregion // Move past quotes, images, and links. if (localName.equals(QUOTE_LOCAL_NAME)) { this.handleQuote(rdr); } else if (localName.equals(IMG_LOCAL_NAME)) { this.handleImg(rdr); } else if (localName.equals(LINK_LOCAL_NAME)) { this.handleLink(rdr); } // not a start element } else if (nextEvent.isCharacters()) { Characters chars = nextEvent.asCharacters(); int coff = chars.getLocation().getCharacterOffset(); if (!chars.isWhiteSpace()) { // content to be captured String fpContent = chars.getData(); LOGGER.debug("Character offset: {}", coff); LOGGER.debug("Character based data: {}", fpContent); // LOGGER.debug("Character data via offset diff: {}", content.substring(coff - fpContent.length(), coff)); SimpleImmutableEntry<Integer, Integer> pads = trimSpacing(fpContent); final int tsb = currOff + pads.getKey(); final int tse = currOff + fpContent.length() - pads.getValue(); final String subs = content.substring(tsb, tse); if (subs.replaceAll("\\p{Zs}", "").replaceAll("\\n", "").isEmpty()) { LOGGER.info("Found empty section: skipping."); continue; } LOGGER.debug("Section text: {}", subs); TextSpan ts = new TextSpan(tsb, tse); Section s = sf.fromTextSpan(ts, "post"); List<Integer> intList = new ArrayList<>(); intList.add(sectNumber); intList.add(subSect); s.setNumberList(intList); c.addToSectionList(s); subSect++; } } else if (nextEvent.isEndElement()) { EndElement ee = nextEvent.asEndElement(); currOff = ee.getLocation().getCharacterOffset(); QName name = ee.getName(); String localName = name.getLocalPart(); LOGGER.debug("Hit end element: {}", localName); if (localName.equalsIgnoreCase(POST_LOCAL_NAME)) { sectNumber++; subSect = 0; } } } return c; } catch (XMLStreamException | ConcreteException | StringIndexOutOfBoundsException x) { throw new IngestException(x); } finally { if (rdr != null) try { rdr.close(); } catch (XMLStreamException e) { // not likely. LOGGER.info("Error closing XMLReader.", e); } } } catch (IOException e) { throw new IngestException(e); } }
From source file:ca.uhn.fhir.jpa.dao.BaseHapiFhirDao.java
private static String parseNarrativeTextIntoWords(IBaseResource theResource) { StringBuilder b = new StringBuilder(); if (theResource instanceof IResource) { IResource resource = (IResource) theResource; List<XMLEvent> xmlEvents = resource.getText().getDiv().getValue(); if (xmlEvents != null) { for (XMLEvent next : xmlEvents) { if (next.isCharacters()) { Characters characters = next.asCharacters(); b.append(characters.getData()).append(" "); }// w w w .ja va 2 s .c om } } } else if (theResource instanceof IDomainResource) { IDomainResource resource = (IDomainResource) theResource; try { String divAsString = resource.getText().getDivAsString(); XhtmlDt xhtml = new XhtmlDt(divAsString); List<XMLEvent> xmlEvents = xhtml.getValue(); if (xmlEvents != null) { for (XMLEvent next : xmlEvents) { if (next.isCharacters()) { Characters characters = next.asCharacters(); b.append(characters.getData()).append(" "); } } } } catch (Exception e) { throw new DataFormatException("Unable to convert DIV to string", e); } } return b.toString(); }
From source file:com.evolveum.polygon.connector.hcm.DocumentProcessing.java
public Map<String, Object> parseXMLData(HcmConnectorConfiguration conf, ResultsHandler handler, Map<String, Object> schemaAttributeMap, Filter query) { XMLInputFactory factory = XMLInputFactory.newInstance(); try {//from w ww . jav a2s. co m String uidAttributeName = conf.getUidAttribute(); String primariId = conf.getPrimaryId(); String startName = ""; String value = null; StringBuilder assignmentXMLBuilder = null; List<String> builderList = new ArrayList<String>(); Integer nOfIterations = 0; Boolean isSubjectToQuery = false; Boolean isAssigment = false; Boolean evaluateAttr = true; Boolean specificAttributeQuery = false; XMLEventReader eventReader = factory.createXMLEventReader(new FileReader(conf.getFilePath())); List<String> dictionary = populateDictionary(FIRSTFLAG); if (!attrsToGet.isEmpty()) { attrsToGet.add(uidAttributeName); attrsToGet.add(primariId); specificAttributeQuery = true; evaluateAttr = false; LOGGER.ok("The uid and primary id were added to the queried attribute list"); schemaAttributeMap = modifySchemaAttributeMap(schemaAttributeMap); } while (eventReader.hasNext()) { XMLEvent event = eventReader.nextEvent(); Integer code = event.getEventType(); if (code == XMLStreamConstants.START_ELEMENT) { StartElement startElement = event.asStartElement(); startName = startElement.getName().getLocalPart(); if (!evaluateAttr && attrsToGet.contains(startName)) { evaluateAttr = true; } if (!elementIsEmployeeData) { if (startName.equals(EMPLOYEES)) { if (dictionary.contains(nOfIterations.toString())) { LOGGER.ok("The defined number of iterations has been hit: {0}", nOfIterations.toString()); break; } else { startName = ""; elementIsEmployeeData = true; nOfIterations++; } } } else if (evaluateAttr) { if (!isAssigment) { if (!ASSIGNMENTTAG.equals(startName)) { } else { assignmentXMLBuilder = new StringBuilder(); isAssigment = true; } } else { builderList = processAssignment(startName, null, START, builderList); } if (multiValuedAttributesList.contains(startName)) { elementIsMultiValued = true; } } } else if (elementIsEmployeeData) { if (code == XMLStreamConstants.CHARACTERS && evaluateAttr) { Characters characters = event.asCharacters(); if (!characters.isWhiteSpace()) { StringBuilder valueBuilder; if (value != null) { valueBuilder = new StringBuilder(value).append("") .append(characters.getData().toString()); } else { valueBuilder = new StringBuilder(characters.getData().toString()); } value = valueBuilder.toString(); // value = StringEscapeUtils.escapeXml10(value); // LOGGER.info("The attribute value for: {0} is // {1}", startName, value); } } else if (code == XMLStreamConstants.END_ELEMENT) { EndElement endElement = event.asEndElement(); String endName = endElement.getName().getLocalPart(); isSubjectToQuery = checkFilter(endName, value, query, uidAttributeName); if (!isSubjectToQuery) { attributeMap.clear(); elementIsEmployeeData = false; value = null; endName = EMPLOYEES; } if (endName.equals(EMPLOYEES)) { attributeMap = handleEmployeeData(attributeMap, schemaAttributeMap, handler, uidAttributeName, primariId); elementIsEmployeeData = false; } else if (evaluateAttr) { if (endName.equals(startName)) { if (value != null) { if (!isAssigment) { if (!elementIsMultiValued) { attributeMap.put(startName, value); } else { multiValuedAttributeBuffer.put(startName, value); } } else { value = StringEscapeUtils.escapeXml10(value); builderList = processAssignment(endName, value, VALUE, builderList); builderList = processAssignment(endName, null, END, builderList); } // LOGGER.info("Attribute name: {0} and the // Attribute value: {1}", endName, value); value = null; } } else { if (endName.equals(ASSIGNMENTTAG)) { builderList = processAssignment(endName, null, CLOSE, builderList); // if (assigmentIsActive) { for (String records : builderList) { assignmentXMLBuilder.append(records); } attributeMap.put(ASSIGNMENTTAG, assignmentXMLBuilder.toString()); // } else { // } builderList = new ArrayList<String>(); // assigmentIsActive = false; isAssigment = false; } else if (multiValuedAttributesList.contains(endName)) { processMultiValuedAttributes(multiValuedAttributeBuffer); } } } if (specificAttributeQuery && evaluateAttr) { evaluateAttr = false; } } } else if (code == XMLStreamConstants.END_DOCUMENT) { handleBufferedData(uidAttributeName, primariId, handler); } } } catch (FileNotFoundException e) { StringBuilder errorBuilder = new StringBuilder("File not found at the specified path.") .append(e.getLocalizedMessage()); LOGGER.error("File not found at the specified path: {0}", e); throw new ConnectorIOException(errorBuilder.toString()); } catch (XMLStreamException e) { LOGGER.error("Unexpected processing error while parsing the .xml document : {0}", e); StringBuilder errorBuilder = new StringBuilder( "Unexpected processing error while parsing the .xml document. ") .append(e.getLocalizedMessage()); throw new ConnectorIOException(errorBuilder.toString()); } return attributeMap; }
From source file:edu.monash.merc.system.parser.xml.HPAWSXmlParser.java
public List<HPAEntryBean> parseHPAXml(String fileName, XMLInputFactory2 factory2) { xmlif2 = factory2;//from ww w . j ava 2s .com logger.info("Starting to parse " + fileName); List<HPAEntryBean> hpaEntryBeans = new ArrayList<HPAEntryBean>(); XMLEventReader2 xmlEventReader = null; try { xmlEventReader = (XMLEventReader2) xmlif2.createXMLEventReader(new FileInputStream(fileName)); QName entryQN = new QName(ELE_ENTRY); QName versionQN = new QName(ATTR_VERSION); QName urlQN = new QName(ATTR_URL); QName nameQN = new QName(ELE_NAME); QName identiferQN = new QName(ELE_IDENTIFIER); QName idQN = new QName(ATTR_ID); QName xrefQN = new QName(ELE_XREF); QName dbQN = new QName(ATTR_DB); QName tissueExpQN = new QName(ELE_TISSUE_EXPRESSION); QName typeQN = new QName(ATTR_TYPE); QName verificationQN = new QName(ELE_VERIFICATION); QName dataQN = new QName(ELE_DATA); QName tissueQN = new QName(ELE_TISSUE); QName statusQN = new QName(ATTR_STATUS); QName cellTypeQN = new QName(ELE_CELLTYPE); QName levelQN = new QName(ELE_LEVEL); QName antibodyQN = new QName(ELE_ANTIBODY); String version = null; String url = null; String geneName = null; String geneAccession = null; String dbNameForIdentifier = null; String xrefAc = null; String xrefDb = null; boolean tissueExpressionPresent = false; boolean antibodyPresent = false; String tissueStatus = null; String tissue = null; String cellType = null; String levelType = null; String level = null; String verificationType = null; String verification = null; HPAEntryBean hpaEntryBean = null; GeneBean geneBean = null; List<DbSourceAcEntryBean> dbSourceAcEntryBeans = new ArrayList<DbSourceAcEntryBean>(); List<PEEvidenceBean> peAntiIHCNormEvidenceBeans = new ArrayList<PEEvidenceBean>(); PEEvidenceBean antiIHCNormEvidenceBean = null; AccessionBean identifiedAcBean = null; while (xmlEventReader.hasNextEvent()) { //eventType = reader.next(); XMLEvent event = xmlEventReader.nextEvent(); if (event.isStartElement()) { StartElement element = event.asStartElement(); //hpa entry if (element.getName().equals(entryQN)) { //start to create a hpaEntryBean hpaEntryBean = new HPAEntryBean(); //create a GeneBean geneBean = new GeneBean(); //create a list of DbSourceAcEntryBean to store all DbSource and Ac dbSourceAcEntryBeans = new ArrayList<DbSourceAcEntryBean>(); //create a list of PEEvidenceBean to store all antibody evidencs for the current gene peAntiIHCNormEvidenceBeans = new ArrayList<PEEvidenceBean>(); //get the version attribute Attribute versionAttr = element.getAttributeByName(versionQN); if (versionAttr != null) { version = versionAttr.getValue(); } //get the url attribute Attribute urlAttr = element.getAttributeByName(urlQN); if (urlAttr != null) { url = urlAttr.getValue(); } } //parse the gene name in the name element if (element.getName().equals(nameQN)) { if (xmlEventReader.peek().isCharacters()) { event = xmlEventReader.nextEvent(); Characters geneCharacters = event.asCharacters(); if (geneCharacters != null) { geneName = geneCharacters.getData(); } } } //parse the ensg accession and db in the identifier element if (element.getName().equals(identiferQN)) { Attribute idAttr = element.getAttributeByName(idQN); if (idAttr != null) { geneAccession = idAttr.getValue(); } Attribute dbAttr = element.getAttributeByName(dbQN); if (dbAttr != null) { dbNameForIdentifier = dbAttr.getValue(); } } //parse all db and accession pair in xref element if (element.getName().equals(xrefQN)) { Attribute idAttr = element.getAttributeByName(idQN); if (idAttr != null) { xrefAc = idAttr.getValue(); } Attribute dbAttr = element.getAttributeByName(dbQN); if (dbAttr != null) { xrefDb = dbAttr.getValue(); } } //parse tissueExpression if (element.getName().equals(tissueExpQN)) { //we only focus on the tissueExpression element in the path /entry/tissueExpression if (!antibodyPresent) { //set the tissueExpression present flag into true; tissueExpressionPresent = true; //create a list of PEEvidenceBean to store the PEEvidence for antibody peAntiIHCNormEvidenceBeans = new ArrayList<PEEvidenceBean>(); } } //parse the verification element to get reliability or validation value if (element.getName().equals(verificationQN)) { //we only focus on the verification element in the path /entry/tissueExpression/verification if (!antibodyPresent && tissueExpressionPresent) { Attribute verifAttr = element.getAttributeByName(typeQN); if (verifAttr != null) { verificationType = element.getAttributeByName(typeQN).getValue(); } if (xmlEventReader.peek().isCharacters()) { event = xmlEventReader.nextEvent(); verification = event.asCharacters().getData(); } } } //start of the data element if (element.getName().equals(dataQN)) { //we only focus on the data element in the path /entry/tissueExpression/data if (!antibodyPresent && tissueExpressionPresent) { antiIHCNormEvidenceBean = new PEEvidenceBean(); TPBDataTypeBean dataTypeBean = createTPBDataTypeBeanForPEANTIIHCNORM(); antiIHCNormEvidenceBean.setTpbDataTypeBean(dataTypeBean); } } //start of tissue if (element.getName().equals(tissueQN)) { //we only focus on the tissue element in the path /entry/tissueExpression/data/tissue if (!antibodyPresent && tissueExpressionPresent) { Attribute tissueStatusAttr = element.getAttributeByName(statusQN); if (tissueStatusAttr != null) { tissueStatus = tissueStatusAttr.getValue(); } if (xmlEventReader.peek().isCharacters()) { event = xmlEventReader.nextEvent(); tissue = event.asCharacters().getData(); } } } //start of cellType if (element.getName().equals(cellTypeQN)) { //we only focus on the cellType element in the path /entry/tissueExpression/data/cellType if (!antibodyPresent && tissueExpressionPresent) { if (xmlEventReader.peek().isCharacters()) { event = xmlEventReader.nextEvent(); cellType = event.asCharacters().getData(); } } } //start of level if (element.getName().equals(levelQN)) { //we only focus on the level element in the path /entry/tissueExpression/data/level if (!antibodyPresent && tissueExpressionPresent) { Attribute typeAttr = element.getAttributeByName(typeQN); if (typeAttr != null) { levelType = typeAttr.getValue(); } if (xmlEventReader.peek().isCharacters()) { event = xmlEventReader.nextEvent(); level = event.asCharacters().getData(); } } } //start of antibody element if (element.getName().equals(antibodyQN)) { //we have to setup antibodyPresent flag as true antibodyPresent = true; } } //End of element if (event.isEndElement()) { EndElement endElement = event.asEndElement(); //hpa entry end if (endElement.getName().equals(entryQN)) { //set hpa version hpaEntryBean.setHpaVersion(version); //hpaEntryBean set gene bean hpaEntryBean.setGeneBean(geneBean); //create the primary dbsource bean DBSourceBean primaryDbSourceBean = createPrimaryDBSourceBeanForHPA(); //set the primary DBSourceBean hpaEntryBean.setPrimaryDbSourceBean(primaryDbSourceBean); //set the identified accesion bean hpaEntryBean.setIdentifiedAccessionBean(identifiedAcBean); //set DbSourceAcEntryBean list hpaEntryBean.setDbSourceAcEntryBeans(dbSourceAcEntryBeans); //set all the PeAntiIHCBody evidences if (peAntiIHCNormEvidenceBeans.size() == 0) { peAntiIHCNormEvidenceBeans.add(createNonePEEvidence(url)); } hpaEntryBean.setPeAntiIHCNormEvidencesBeans(peAntiIHCNormEvidenceBeans); //add the current hpa entry bean into list hpaEntryBeans.add(hpaEntryBean); //reset version and url version = null; url = null; identifiedAcBean = null; } //end of gene name, populate the gene name if (endElement.getName().equals(nameQN)) { //set gene name geneBean.setDisplayName(geneName); } //end of identifier, populating for gene accession, db and accessions if any if (endElement.getName().equals(identiferQN)) { //set the gene accession geneBean.setEnsgAccession(geneAccession); identifiedAcBean = createIdentifiedAcBean(geneAccession, dbNameForIdentifier); //create a DbSourceAcEntryBean based on the identifier element DbSourceAcEntryBean dbSourceAcEntryBean = createDbSourceAcEntry(dbNameForIdentifier, geneAccession); //add this DbSourceAcEntryBean into list dbSourceAcEntryBeans.add(dbSourceAcEntryBean); } //end of xref element. populate for db and accessions if any if (endElement.getName().equals(xrefQN)) { //create a DbSourceAcEntryBean based on the xref element DbSourceAcEntryBean dbSourceAcEntryBean = createDbSourceAcEntry(xrefDb, xrefAc); //add this DbSoureAcEntryBean into list dbSourceAcEntryBeans.add(dbSourceAcEntryBean); //set rest of db and accession values xrefDb = null; xrefAc = null; } //end of the tissueExpression if (endElement.getName().equals(tissueExpQN)) { //we only focus on the tissueExpression element in the path /entry/tissueExpression if (!antibodyPresent) { //the tissueExpression is end. we have to reset tissueExpressionPresent, //verificationType and verification values under the tissueExpression element level //reset tissueExpression present flag into false tissueExpressionPresent = false; //reset verification type verificationType = null; //reset verification value verification = null; } } //end of data element if (endElement.getName().equals(dataQN)) { //we only focus on the data element in the path /entry/tissueExpression/data if (!antibodyPresent && tissueExpressionPresent) { //we only consider the tissue status is normal one if (StringUtils.endsWithIgnoreCase(tissueStatus, TISSUE_STATUS_NORMAL)) { setAntiEvidence(antiIHCNormEvidenceBean, url, verification, tissue, cellType, level, levelType); //add anti evidence peAntiIHCNormEvidenceBeans.add(antiIHCNormEvidenceBean); } //the data element is end. we have to reset the tissueStatus, tissue, cellType and level values under the data element level tissueStatus = null; tissue = null; cellType = null; level = null; levelType = null; } } //end of antibody if (endElement.getName().equals(antibodyQN)) { //we have to reset antibodyPresent flag as false antibodyPresent = false; } } //End of XML document if (event.isEndDocument()) { // finished to parse the whole document; break; } } } catch (Exception ex) { logger.error(ex); throw new DMXMLParserException(ex); } finally { if (xmlEventReader != null) { try { xmlEventReader.close(); } catch (Exception e) { //ignore whatever caught. } } } return hpaEntryBeans; }
From source file:microsoft.exchange.webservices.data.core.EwsXmlReader.java
/** * Reads the value. Should return content element or text node as string * Present event must be START ELEMENT. After executing this function * Present event will be set on END ELEMENT * * @param keepWhiteSpace Do not remove whitespace characters if true * @return String/*from w w w . j a v a 2 s.c o m*/ * @throws XMLStreamException the XML stream exception * @throws ServiceXmlDeserializationException the service xml deserialization exception */ public String readValue(boolean keepWhiteSpace) throws XMLStreamException, ServiceXmlDeserializationException { if (this.presentEvent.isStartElement()) { // Go to next event and check for Characters event this.read(keepWhiteSpace); if (this.presentEvent.isCharacters()) { final StringBuilder elementValue = new StringBuilder(); do { if (this.getNodeType().nodeType == XmlNodeType.CHARACTERS) { Characters characters = (Characters) this.presentEvent; if (keepWhiteSpace || (!characters.isIgnorableWhiteSpace() && !characters.isWhiteSpace())) { final String charactersData = characters.getData(); if (charactersData != null && !charactersData.isEmpty()) { elementValue.append(charactersData); } } } this.read(); } while (!this.presentEvent.isEndElement()); // Characters chars = this.presentEvent.asCharacters(); // String elementValue = chars.getData(); // Advance to next event post Characters (ideally it will be End // Element) // this.read(); return elementValue.toString(); } else if (this.presentEvent.isEndElement()) { return ""; } else { throw new ServiceXmlDeserializationException( getReadValueErrMsg("Could not find " + XmlNodeType.getString(XmlNodeType.CHARACTERS))); } } else if (this.presentEvent.getEventType() == XmlNodeType.CHARACTERS && this.presentEvent.isCharacters()) { /* * if(this.presentEvent.asCharacters().getData().equals("<")) { */ final String charData = this.presentEvent.asCharacters().getData(); final StringBuilder data = new StringBuilder(charData == null ? "" : charData); do { this.read(keepWhiteSpace); if (this.getNodeType().nodeType == XmlNodeType.CHARACTERS) { Characters characters = (Characters) this.presentEvent; if (keepWhiteSpace || (!characters.isIgnorableWhiteSpace() && !characters.isWhiteSpace())) { final String charactersData = characters.getData(); if (charactersData != null && !charactersData.isEmpty()) { data.append(charactersData); } } } } while (!this.presentEvent.isEndElement()); return data.toString();// this.presentEvent. = new XMLEvent(); /* * } else { Characters chars = this.presentEvent.asCharacters(); * String elementValue = chars.getData(); // Advance to next event * post Characters (ideally it will be End // Element) this.read(); * return elementValue; } */ } else { throw new ServiceXmlDeserializationException( getReadValueErrMsg("Expected is " + XmlNodeType.getString(XmlNodeType.START_ELEMENT))); } }
From source file:com.logiware.accounting.domain.EdiInvoice.java
private void setValue(Characters text) throws Exception { if (!text.isWhiteSpace()) { if (isHeader) { if ("Applicationreference".equals(elementType) && !"INVOICE".equalsIgnoreCase(text.getData())) { throw new AccountingException("Bad file. Not an Invoice."); } else if ("Reference".equals(elementType)) { ediReference = text.getData(); } else if ("Sender".equals(elementType) && "Code".equals(characterType)) { ediCode = text.getData(); VendorModel vendor = new EdiInvoiceDAO().getVendor(ediCode); if (null != vendor && CommonUtils.isNotEmpty(vendor.getVendorNumber())) { vendorNumber = vendor.getVendorNumber(); vendorName = vendor.getVendorName(); }//from ww w . j av a 2 s . c o m } } else if (isBody) { if (isInformation) { if ("Invoice".equals(elementType)) { if ("Number".equals(characterType)) { invoiceNumber = text.getData(); searchInvoiceNumber = invoiceNumber.replaceAll("[^\\p{Alpha}\\p{Digit}]+", ""); } else if ("Date".equals(characterType)) { invoiceDate = DateUtils.parseDate(text.getData(), "yyyy-MM-dd"); } } else if ("RelatedReferences".equals(elementType)) { if ("EFR".equals(characterType)) { ourReference = text.getData(); } else if ("BLR".equals(characterType)) { blNumber = text.getData(); } else if ("CR".equals(characterType)) { yourReference1 = text.getData(); } else if ("TID".equals(characterType)) { yourReference2 = text.getData(); } } else if ("Company".equals(elementType)) { if ("Name".equals(characterType)) { party = new EdiInvoiceParty(); party.setEdiInvoice(this); party.setType("Company"); party.setName(text.getData()); } else if ("Street".equals(characterType)) { party.setStreet(text.getData()); } else if ("Zip".equals(characterType)) { party.setZip(text.getData()); } else if ("City".equals(characterType)) { party.setCity(text.getData()); } else if ("Country".equals(characterType)) { party.setCountry(text.getData()); } else if ("VATRegistrationNumber".equalsIgnoreCase(characterType)) { party.setVatNumber(text.getData()); } } else if ("Vendor".equals(elementType)) { if (isBank) { if ("Name".equals(characterType)) { bank = new EdiInvoiceBank(); bank.setEdiInvoice(this); bank.setName(text.getData()); } else if ("Street".equals(characterType)) { if (null == bank.getStreet1()) { bank.setStreet1(text.getData()); } else { bank.setStreet2(text.getData()); } } else if ("Zip".equals(characterType)) { bank.setZip(text.getData()); } else if ("City".equals(characterType)) { bank.setCity(text.getData()); } else if ("Country".equals(characterType)) { bank.setCountry(text.getData()); } else if ("UnCode".equals(characterType)) { bank.setUnCode(text.getData()); } else if ("AccountNumber".equalsIgnoreCase(characterType)) { bank.setAccount(text.getData()); } else if ("IBAN".equalsIgnoreCase(characterType)) { bank.setIban(text.getData()); } else if ("BIC".equalsIgnoreCase(characterType)) { bank.setBic(text.getData()); } } else { if ("Name".equals(characterType)) { party = new EdiInvoiceParty(); party.setEdiInvoice(this); party.setType("Vendor"); party.setName(text.getData()); } else if ("Street".equals(characterType)) { party.setStreet(text.getData()); } else if ("Zip".equals(characterType)) { party.setZip(text.getData()); } else if ("City".equals(characterType)) { party.setCity(text.getData()); } else if ("Country".equals(characterType)) { party.setCountry(text.getData()); } else if ("VATRegistrationNumber".equalsIgnoreCase(characterType)) { party.setVatNumber(text.getData()); } else if ("CompanyRegistrationNumber".equalsIgnoreCase(characterType)) { party.setRegistrationNumber(text.getData()); } else if ("CompanyLicenseNumber".equalsIgnoreCase(characterType)) { party.setLicenseNumber(text.getData()); } } } else if ("PaymentTerms".equalsIgnoreCase(elementType)) { if ("Description".equalsIgnoreCase(characterType)) { paymentTerms = text.getData(); } } else if ("ShipmentInformation".equalsIgnoreCase(elementType)) { if ("Vessel".equalsIgnoreCase(characterType)) { ediInvoiceShippingDetails = new EdiInvoiceShippingDetails(); ediInvoiceShippingDetails.setEdiInvoice(this); ediInvoiceShippingDetails.setVessel(text.getData()); } else if ("Date".equalsIgnoreCase(characterType)) { ediInvoiceShippingDetails.setDate(DateUtils.parseDate(text.getData(), "yyyy-MM-dd")); } else if ("Routing".equalsIgnoreCase(characterType)) { String routing = (null != ediInvoiceShippingDetails.getRouting() ? ediInvoiceShippingDetails.getRouting() : "") + text.getData(); ediInvoiceShippingDetails.setRouting(routing); } else if ("Quantity".equalsIgnoreCase(characterType)) { ediInvoiceShippingDetails.setPackageQuantity(text.getData()); } else if ("Description".equalsIgnoreCase(characterType)) { ediInvoiceShippingDetails.setPackageDescription(text.getData()); } else if ("Weigth".equalsIgnoreCase(characterType)) { ediInvoiceShippingDetails.setWeight(text.getData()); } else if ("Volume".equalsIgnoreCase(characterType)) { ediInvoiceShippingDetails.setVolume(text.getData()); } } } else if (isDetails) { if ("Detail".equalsIgnoreCase(elementType)) { if ("ItemDescription".equals(characterType)) { detail = new EdiInvoiceDetail(); detail.setEdiInvoice(this); detail.setDescription(text.getData()); } else if ("Quantity".equals(characterType)) { detail.setQuantity(text.getData()); } else if ("CalculationCode".equals(characterType)) { detail.setCalculationCode(text.getData()); } else if ("Price".equals(characterType)) { detail.setPrice(text.getData()); } else if ("Rate".equals(characterType)) { detail.setRate(text.getData()); } else if ("Currency".equals(characterType)) { detail.setCurrency(text.getData()); } else if ("AmountVATExcl".equals(characterType)) { detail.setVatExcludedAmount(text.getData()); detail.setApAmount(text.getData()); detail.setArAmount(text.getData()); } else if ("AmountVATIncl".equals(characterType)) { detail.setVatIncludedAmount(text.getData()); } else if ("AmountVAT".equals(characterType)) { detail.setVatAmount(text.getData()); } else if ("VATPercentage".equals(characterType)) { detail.setVatPercentage(text.getData()); } else if ("BLReference".equalsIgnoreCase(characterType)) { detail.setBlReference(text.getData()); } } } else if (isSummary) { if ("TotalMonetaryAmount".equalsIgnoreCase(elementType)) { if ("TotalVATIncl".equals(characterType)) { invoiceAmount = NumberUtils.parseNumber(text.getData()); } } else if ("TotalMonetaryAmountGroupByVAT".equalsIgnoreCase(elementType)) { if ("TotalVAT".equals(characterType)) { vatAmount = text.getData(); } else if ("VATPercentage".equals(characterType)) { vatPercentage = text.getData(); } } } } } }
From source file:org.apache.hadoop.gateway.filter.rewrite.impl.xml.XmlFilterReader.java
private void processCharacters(Characters event) throws XPathExpressionException { //System.out.println( "T[" + event.isCData() + "," + event.isWhiteSpace() + "," + event.isIgnorableWhiteSpace() + "]=" + event ); Level level = stack.peek();//from w w w. j av a2 s. co m Node node = stack.peek().node; if (event.isCData()) { node.appendChild(document.createCDATASection(event.getData())); } else { node.appendChild(document.createTextNode(event.getData())); } if (!currentlyBuffering()) { String value = event.getData(); if (!event.isWhiteSpace()) { if (level.scopeConfig == null || level.scopeConfig.getSelectors().isEmpty()) { value = filterText(extractQName(node), value, null); } else { UrlRewriteFilterPathDescriptor path = pickFirstMatchingPath(level); if (path instanceof UrlRewriteFilterApplyDescriptor) { String rule = ((UrlRewriteFilterApplyDescriptor) path).rule(); value = filterText(extractQName(node), value, rule); } } } if (event.isCData()) { writer.write("<![CDATA["); writer.write(value); writer.write("]]>"); } else { writer.write(StringEscapeUtils.escapeXml(value)); } } }
From source file:org.apache.hadoop.util.ConfTest.java
public static List<String> checkConf(InputStream in) { List<NodeInfo> nodes = null; List<String> errors = new ArrayList<String>(); try {/*from w w w . j ava 2 s. c o m*/ nodes = parseConf(in); if (nodes == null) { errors.add("bad conf file: top-level element not <configuration>"); } } catch (XMLStreamException e) { errors.add("bad conf file: " + e.getMessage()); } if (!errors.isEmpty()) { return errors; } Map<String, List<Integer>> duplicatedProperties = new HashMap<String, List<Integer>>(); for (NodeInfo node : nodes) { StartElement element = node.getStartElement(); int line = element.getLocation().getLineNumber(); if (!element.getName().equals(new QName("property"))) { errors.add(String.format("Line %d: element not <property>", line)); continue; } List<XMLEvent> events = node.getXMLEventsForQName(new QName("name")); if (events == null) { errors.add(String.format("Line %d: <property> has no <name>", line)); } else { String v = null; for (XMLEvent event : events) { if (event.isAttribute()) { v = ((Attribute) event).getValue(); } else { Characters c = node.getElement(event.asStartElement()); if (c != null) { v = c.getData(); } } if (v == null || v.isEmpty()) { errors.add(String.format("Line %d: <property> has an empty <name>", line)); } } if (v != null && !v.isEmpty()) { List<Integer> lines = duplicatedProperties.get(v); if (lines == null) { lines = new ArrayList<Integer>(); duplicatedProperties.put(v, lines); } lines.add(node.getStartElement().getLocation().getLineNumber()); } } events = node.getXMLEventsForQName(new QName("value")); if (events == null) { errors.add(String.format("Line %d: <property> has no <value>", line)); } for (QName qName : node.getDuplicatedQNames()) { if (!qName.equals(new QName("source"))) { errors.add(String.format("Line %d: <property> has duplicated <%s>s", line, qName)); } } } for (Entry<String, List<Integer>> e : duplicatedProperties.entrySet()) { List<Integer> lines = e.getValue(); if (1 < lines.size()) { errors.add(String.format("Line %s: duplicated <property>s for %s", StringUtils.join(", ", lines), e.getKey())); } } return errors; }