List of usage examples for org.dom4j.io SAXReader addHandler
public void addHandler(String path, ElementHandler handler)
ElementHandler
to be called when the specified path is encounted. From source file:musite.io.xml.UniProtXMLReader.java
License:Open Source License
public Proteins read(InputStream is) throws IOException { if (is == null) { throw new IllegalArgumentException(); }/*from www .j ava 2 s . c o m*/ final Proteins result = data == null ? new ProteinsImpl() : data; SAXReader saxReader = new SAXReader(); final StringBuilder acc = new StringBuilder(30); final StringBuilder name = new StringBuilder(30); final StringBuilder fullName = new StringBuilder(200); final StringBuilder org = new StringBuilder(30); final StringBuilder seq = new StringBuilder(2000); final List<List> sites = new ArrayList(4); // location, ptm, enzyme, annotation final Set<String> accs = new HashSet(); // entry saxReader.addHandler("/uniprot/entry", new ElementHandler() { public void onStart(ElementPath path) { acc.setLength(0); fullName.setLength(0); seq.setLength(0); org.setLength(0); name.setLength(0); sites.clear(); accs.clear(); } public void onEnd(ElementPath path) { // process a element if (org.length() > 0 && (organismFilter == null || organismFilter.contains(org.toString())) && acc.length() > 0 && seq.length() > 0) { String accession = acc.toString(); String sequence = seq.toString(); ProteinImpl protein = new ProteinImpl(acc.toString(), sequence, name.length() == 0 ? null : name.toString(), fullName.length() == 0 ? null : fullName.toString(), org.length() == 0 ? null : org.toString()); result.addProtein(protein); for (List l : sites) { Integer site = (Integer) l.get(0); PTM ptm = (PTM) l.get(1); String enzyme = (String) l.get(2); if (enzyme != null && enzyme.equalsIgnoreCase("autocatalysis")) { enzyme = name.toString(); } Map ann = (Map) l.get(3); try { PTMAnnotationUtil.annotate(protein, site, ptm, enzyme, ann); } catch (Exception e) { e.printStackTrace(); } } if (keepAllIds) { for (String ac : accs) { mapIdMainId.put(ac, accession); } if (!accs.isEmpty()) protein.putInfo("other-accessions", new HashSet(accs)); } //System.out.println(accession); } // prune the tree Element row = path.getCurrent(); row.detach(); } }); // accession saxReader.addHandler("/uniprot/entry/accession", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { if (acc.length() == 0) { Element el = path.getCurrent(); acc.append(el.getText()); // if (keepAllIds) { // accs.add(acc.toString()); // } } else { if (keepAllIds) { accs.add(path.getCurrent().getText()); } } } }); // name saxReader.addHandler("/uniprot/entry/name", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { if (name.length() > 0) return; Element el = path.getCurrent(); name.append(el.getText()); } }); // full name saxReader.addHandler("/uniprot/entry/protein/recommendedName/fullName", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { if (fullName.length() > 0) return; Element el = path.getCurrent(); fullName.append(el.getTextTrim()); } }); saxReader.addHandler("/uniprot/entry/organism/name", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { if (org.length() > 0) return; Element el = path.getCurrent(); String attr = el.attributeValue("type"); if (attr == null || !attr.equalsIgnoreCase("scientific")) { return; } org.append(el.getText()); } }); saxReader.addHandler("/uniprot/entry/sequence", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { if (seq.length() > 0) return; Element el = path.getCurrent(); seq.append(el.getText().replaceAll("\\p{Space}", "")); } }); saxReader.addHandler("/uniprot/entry/feature", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { Element el = path.getCurrent(); String type = el.attributeValue("type"); if (type == null) return; PTM ptm = null; String enzyme = null; String description = null; String keyword = null; if (UNIPROT_TYPES.contains(type.toLowerCase())) { description = el.attributeValue("description"); if (description == null) return; String[] descs = description.split("; "); for (String desc : descs) { PTM tmp = PTM.ofKeyword(desc); if (tmp != null) { ptm = tmp; keyword = desc; } else if (desc.startsWith("by ")) { enzyme = desc.substring(3); } } } // else if (type.equalsIgnoreCase("glycosylation site")) { // description = el.attributeValue("description"); // ptm = PTM.GLYCOSYLATION; // } // else if (type.equalsIgnoreCase()) { // description = el.attributeValue("description"); // String[] descs = description.split("; "); // for (String desc : descs) { // PTM tmp = PTM.ofKeyword(desc); // if (tmp != null) { // ptm = tmp; // keyword = desc; // } else if (desc.startsWith("by ")) { // enzyme = desc.substring(3); // } // } // } if (ptm == null || (ptmFilter != null && !ptmFilter.contains(ptm))) return; String status = el.attributeValue("status"); if (status != null) { if (!includeBySimilarity && status.equalsIgnoreCase("By similarity")) return; if (!includeProbable && status.equalsIgnoreCase("Probable")) return; if (!includePotential && status.equalsIgnoreCase("Potential")) return; } int site = -1; List<Element> locs = el.elements("location"); for (Element loc : locs) { List<Element> poss = loc.elements("position"); for (Element pos : poss) { String str = pos.attributeValue("position"); if (str == null) continue; try { site = Integer.parseInt(str) - 1; //start from 0 } catch (NumberFormatException e) { continue; } } } if (site != -1) { List l = new ArrayList(); l.add(site); l.add(ptm); l.add(enzyme); Map<String, Object> m = new HashMap(); if (keyword != null) m.put("keyword", keyword); if (description != null) m.put("description", description); if (status != null) m.put("status", status); l.add(m); sites.add(l); } } }); BufferedInputStream bis = new BufferedInputStream(is); try { saxReader.read(bis); } catch (DocumentException e) { throw new IOException(e.getMessage()); } return result; }
From source file:musite.taxonomy.UniprotTaxonomyXMLReader.java
License:Open Source License
public TaxonomyTree read(InputStream is) throws IOException { if (is == null) { throw new IllegalArgumentException(); }/* w ww .j av a2 s. c o m*/ final TaxonomyTree tree = new TaxonomyTree(); SAXReader saxReader = new SAXReader(); final TaxonomyNode currentNode = new TaxonomyNode(); // entry saxReader.addHandler("/RDF/Description", new ElementHandler() { public void onStart(ElementPath path) { currentNode.clearMembers(); Element element = path.getCurrent(); Attribute attribute = element.attribute("about"); String TaxonomyID = attribute.getValue().replaceAll(UniprotTaxonomySettings.ID_ADDRESS, ""); currentNode.setIdentifier(TaxonomyID); } public void onEnd(ElementPath path) { // process an element //create a new node TaxonomyNode node = tree.getTaxonomyNode(currentNode.getIdentifier()); if (node == null) { node = new TaxonomyNode(); currentNode.copyMembersTo(node); tree.addtoNodelist(node); } else { currentNode.copyMembersTo(node); } //change the parent from currentNode to node ArrayList<TaxonomyNode> parentlist = node.getParents(); for (int i = 0; i < parentlist.size(); i++) { TaxonomyNode parent = parentlist.get(i); parent.getChildren().add(node); } // prune the tree Element row = path.getCurrent(); row.detach(); } }); // type saxReader.addHandler("/RDF/Description/type", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { Element typeElement = (Element) path.getCurrent(); Attribute typeAttribute = typeElement.attribute("resource"); String Type = typeAttribute.getValue().replaceAll(UniprotTaxonomySettings.TYPE_ADDRESS, ""); currentNode.setType(Type); } }); // rank saxReader.addHandler("/RDF/Description/rank", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { Element rankElement = (Element) path.getCurrent(); Attribute rankAttribute = rankElement.attribute("resource"); String Rank = rankAttribute.getValue().replaceAll(UniprotTaxonomySettings.RANK_ADDRESS, ""); currentNode.setRank(Rank); } }); // scientificName saxReader.addHandler("/RDF/Description/scientificName", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { Element scientificnameElement = (Element) path.getCurrent(); String ScientificName = scientificnameElement.getText(); currentNode.setScientificName(ScientificName); } }); // otherName saxReader.addHandler("/RDF/Description/otherName", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { Element othernameElement = (Element) path.getCurrent(); String tempname = othernameElement.getText(); currentNode.addOthernames(tempname); } }); // partOfLineage saxReader.addHandler("/RDF/Description/partOfLineage", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { Element lineageElement = (Element) path.getCurrent(); String temptext = lineageElement.getText(); boolean partOfLineage; if (temptext.equals("true")) { partOfLineage = true; } else partOfLineage = false; currentNode.setPartOfLineage(partOfLineage); } }); // Add parent saxReader.addHandler("/RDF/Description/subClassOf", new ElementHandler() { public void onStart(ElementPath path) { // do nothing } public void onEnd(ElementPath path) { Element subclassElement = (Element) path.getCurrent(); Attribute subclassAttribute = subclassElement.attribute("resource"); String subclassID = subclassAttribute.getValue().replaceAll(UniprotTaxonomySettings.ID_ADDRESS, ""); TaxonomyNode parent = tree.getTaxonomyNode(subclassID); if (parent != null) { currentNode.addParentOnly(parent); } else { parent = new TaxonomyNode(); parent.setIdentifier(subclassID); tree.addtoNodelist(parent); currentNode.addParentOnly(parent); } } }); BufferedInputStream bis = new BufferedInputStream(is); Document doc; try { doc = saxReader.read(bis); } catch (DocumentException e) { throw new IOException(e.getMessage()); } tree.searchRoot(); return tree; }
From source file:org.dom4j.samples.LargeDocumentDemo.java
License:Open Source License
protected Document parse(String url) throws Exception { SAXReader reader = new SAXReader(); println("Parsing document: " + url); println("Using Pruning Path: " + pruningPath); // enable pruning to call me back as each Element is complete reader.addHandler(pruningPath, this); println("##### starting parse"); Document document = reader.read(url); println("##### finished parse"); // the document will be complete but have the prunePath elements pruned println("Now lets dump what is left of the document after pruning..."); return document; }
From source file:org.dom4j.samples.LargeDocumentDemo2.java
License:Open Source License
protected Document parse(String url) throws Exception { SAXReader reader = new SAXReader(); println("Parsing document: " + url); // enable pruning to call me back as each Element is complete reader.addHandler("/PLAY/ACT", new playActHandler()); println("##### starting parse"); Document document = reader.read(url); println("##### finished parse"); // the document will be complete but have the prunePath elements pruned println("Now lets dump what is left of the document after pruning..."); return document; }
From source file:org.localmatters.serializer.config.XmlSerializationParser.java
License:Apache License
/** * @see org.localmatters.serializer.config.SerializationParser#parse(java.io.InputStream) *///from w w w . jav a 2 s . c om public Map<String, Serialization> parse(InputStream input) throws IOException { try { SerializationElementHandler handler = new SerializationElementHandler(); SAXReader saxReader = new SAXReader(); saxReader.addHandler("/" + SerializationElementHandler.TYPE_ROOT, handler); saxReader.setEncoding(getDefaultEncoding()); saxReader.read(input); return handler.getSerializations(); } catch (DocumentException e) { if (e.getNestedException() instanceof ConfigurationException) { throw (ConfigurationException) e.getNestedException(); } throw new ConfigurationException("Unable to parse the serialization configuration!", e); } finally { IOUtils.closeQuietly(input); } }
From source file:org.pentaho.di.trans.steps.getxmldata.GetXMLData.java
License:Apache License
protected boolean setDocument(String StringXML, FileObject file, boolean IsInXMLField, boolean readurl) throws KettleException { this.prevRow = buildEmptyRow(); // pre-allocate previous row try {//ww w. j a va 2 s . c o m SAXReader reader = XMLParserFactoryProducer.getSAXReader(null); data.stopPruning = false; // Validate XML against specified schema? if (meta.isValidating()) { reader.setValidation(true); reader.setFeature("http://apache.org/xml/features/validation/schema", true); } else { // Ignore DTD declarations reader.setEntityResolver(new IgnoreDTDEntityResolver()); } // Ignore comments? if (meta.isIgnoreComments()) { reader.setIgnoreComments(true); } if (data.prunePath != null) { // when pruning is on: reader.read() below will wait until all is processed in the handler if (log.isDetailed()) { logDetailed(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.Activated")); } if (data.PathValue.equals(data.prunePath)) { // Edge case, but if true, there will only ever be one item in the list data.an = new ArrayList<>(1); // pre-allocate array and sizes data.an.add(null); } reader.addHandler(data.prunePath, new ElementHandler() { public void onStart(ElementPath path) { // do nothing here... } public void onEnd(ElementPath path) { if (isStopped()) { // when a large file is processed and it should be stopped it is still reading the hole thing // the only solution I see is to prune / detach the document and this will lead into a // NPE or other errors depending on the parsing location - this will be treated in the catch part below // any better idea is welcome if (log.isBasic()) { logBasic(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.Stopped")); } data.stopPruning = true; path.getCurrent().getDocument().detach(); // trick to stop reader return; } // process a ROW element if (log.isDebug()) { logDebug(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.StartProcessing")); } Element row = path.getCurrent(); try { // Pass over the row instead of just the document. If // if there's only one row, there's no need to // go back to the whole document. processStreaming(row); } catch (Exception e) { // catch the KettleException or others and forward to caller, e.g. when applyXPath() has a problem throw new RuntimeException(e); } // prune the tree row.detach(); if (log.isDebug()) { logDebug(BaseMessages.getString(PKG, "GetXMLData.Log.StreamingMode.EndProcessing")); } } }); } if (IsInXMLField) { // read string to parse data.document = reader.read(new StringReader(StringXML)); } else if (readurl && KettleVFS.startsWithScheme(StringXML)) { data.document = reader.read(KettleVFS.getInputStream(StringXML)); } else if (readurl) { // read url as source HttpClient client = HttpClientManager.getInstance().createDefaultClient(); HttpGet method = new HttpGet(StringXML); method.addHeader("Accept-Encoding", "gzip"); HttpResponse response = client.execute(method); Header contentEncoding = response.getFirstHeader("Content-Encoding"); HttpEntity responseEntity = response.getEntity(); if (responseEntity != null) { if (contentEncoding != null) { String acceptEncodingValue = contentEncoding.getValue(); if (acceptEncodingValue.contains("gzip")) { GZIPInputStream in = new GZIPInputStream(responseEntity.getContent()); data.document = reader.read(in); } } else { data.document = reader.read(responseEntity.getContent()); } } } else { // get encoding. By default UTF-8 String encoding = "UTF-8"; if (!Utils.isEmpty(meta.getEncoding())) { encoding = meta.getEncoding(); } InputStream is = KettleVFS.getInputStream(file); try { data.document = reader.read(is, encoding); } finally { BaseStep.closeQuietly(is); } } if (meta.isNamespaceAware()) { prepareNSMap(data.document.getRootElement()); } } catch (Exception e) { if (data.stopPruning) { // ignore error when pruning return false; } else { throw new KettleException(e); } } return true; }
From source file:org.rivetlogic.export.components.AbstractXMLProcessor.java
License:Open Source License
public Object onCall(MuleEventContext eventContext) throws Exception { SAXReader saxReader = new SAXReader(); saxReader.setDocumentFactory(DOMDocumentFactory.getInstance()); XMLElementHandler xmlElementHandler = new XMLElementHandler(); saxReader.addHandler(xPath, xmlElementHandler); this.eventContext = eventContext; //this.eventContext.getMessage().setStringProperty(EXTRACT_ID, String.valueOf(System.currentTimeMillis())); InputStream input = getInputStream(eventContext); DOMDocument document = (DOMDocument) saxReader.read(input); String extractsId = document.getRootElement().elementText(EXTRACTS_ID); ExtractsReportData extractsReportData = new ExtractsReportData(); extractsReportData.setExtractsId(extractsId); extractsReportData.setNumExtracts(xmlElementHandler.numExtractsInRequest); extractsReportData.setFileForNoResults(document.getRootElement().elementText(FILE_FOR_NO_RESULTS) != null ? Boolean.valueOf(document.getRootElement().elementText(FILE_FOR_NO_RESULTS)) : true);/*w w w .jav a2 s. c o m*/ extractsReportData.setTotalNumResults(xmlElementHandler.totalNumResults); document.clearContent(); document = null; input.close(); return extractsReportData; }
From source file:org.snipsnap.snip.XMLSnipImport.java
License:Open Source License
/** * Load snips and users into the SnipSpace from an xml document out of a stream. * * @param in the input stream to load from * @param flags whether or not to overwrite existing content *///from w ww.ja v a 2 s . c o m public static void load(InputStream in, final int flags) throws IOException { SAXReader saxReader = new SAXReader(); try { saxReader.addHandler("/snipspace/user", new ElementHandler() { public void onStart(ElementPath elementPath) { // nothing to do here ... } public void onEnd(ElementPath elementPath) { Element userElement = elementPath.getCurrent(); if ((flags & IMPORT_USERS) != 0) { try { XMLSnipImport.loadUser(elementPath.getCurrent(), flags); } catch (Exception e) { Logger.fatal("XMLSnipImport: error importing user: " + userElement.elementText("name")); } getStatus().inc(); } // prune the element to save memory userElement.detach(); } }); saxReader.addHandler("/snipspace/snip", new ElementHandler() { public void onStart(ElementPath elementPath) { // nothing to do here ... } public void onEnd(ElementPath elementPath) { Element snipElement = elementPath.getCurrent(); if ((flags & IMPORT_SNIPS) != 0) { try { XMLSnipImport.loadSnip(snipElement, flags); } catch (Exception e) { Logger.fatal("XMLSnipImport: error importing snip: " + snipElement.elementText("name")); } getStatus().inc(); } // prune the element to save memory snipElement.detach(); } }); // add a reader wrapper to remove illegal characters from input stream // it looks like the database export (XMLWriter) allows these to get through InputStreamReader reader = new InputStreamReader(in, "UTF-8") { public int read(char[] chars) throws IOException { int n = super.read(chars); for (int i = 0; i < n; i++) { chars[i] = replaceIfIllegal(chars[i]); } return n; } public int read(char[] chars, int start, int length) throws IOException { int n = super.read(chars, start, length); for (int i = 0; i < n; i++) { chars[i] = replaceIfIllegal(chars[i]); } return n; } private char replaceIfIllegal(char c) { if (c < 0x20 && !(c == 0x09 || c == 0x0a || c == 0x0d)) { charErrCount++; return (char) 0x20; } return c; } }; saxReader.read(reader); Logger.warn("XMLSnipImport: corrected " + charErrCount + " characters in input"); Logger.log("XMLSnipImport: imported " + getStatus().getValue() + " data records"); } catch (DocumentException e) { Logger.warn("XMLSnipImport: unable to parse document", e); throw new IOException("Error parsing document: " + e); } }
From source file:uidserver.Config.java
public Config(String configFilePath) { File configFile = new File(configFilePath); if (configFile.exists()) { try {//from ww w. j a va 2 s. c o m SAXReader reader = new SAXReader(); reader.addHandler("/config", new ElementHandler() { @Override public void onStart(ElementPath elementPath) { } @Override public void onEnd(ElementPath elementPath) { Element row = elementPath.getCurrent(); readElement(row); row.detach(); } private void readElement(Element row) { List<Element> nodes = row.elements(); if (!nodes.isEmpty()) { for (Element node : nodes) { String name = node.getName().toLowerCase(); String value = node.getText(); switch (name) { case "logpath": logPath = new File(value); break; case "port": port = value; break; case "timeout": timeOut = Integer.valueOf(value); break; case "uidfile": uidFile = new File(value); break; } } } else { System.out.println("Error: empty elements in config file, please add correct setup"); System.exit(0); } } }); reader.setValidation(false); Document document = reader.read(configFile); if (logPath != null && port != null && uidFile != null) { if (!logPath.exists()) { if (!logPath.mkdirs()) { System.out.println("Failed to create log file: " + logPath.getAbsoluteFile()); System.out.println("Please setup correct log file path"); System.exit(0); } } } else { System.out.println("Please set up correct Port/LogFile/UidFile"); System.exit(0); } } catch (DocumentException ex) { Logger.getLogger(Config.class.getName()).log(Level.SEVERE, null, ex); System.out.println("Error during reading xml config file, please double check file content"); System.exit(0); } } else { System.out.println("The specified config file: " + configFile.getAbsolutePath() + " doesn't exist"); System.out.println("Please key in correct config file path"); System.exit(0); } }