List of usage examples for org.jdom2.input SAXBuilder SAXBuilder
public SAXBuilder(final XMLReaderJDOMFactory readersouce)
From source file:de.herm_detlef.java.application.io.Import.java
License:Apache License
private static Document createDocument(String filename) throws JDOMException, IOException { InputStream in = Import.class.getResourceAsStream(ApplicationConstants.XML_SCHEMA_DEFINITION); XMLReaderJDOMFactory schemafac = new XMLReaderXSDFactory(new StreamSource(in)); SAXBuilder builder = new SAXBuilder(schemafac); File xmlFile = new File(filename); return builder.build(xmlFile);// XML validation happens here }
From source file:de.intranda.goobi.plugins.sru.SRUHelper.java
License:Open Source License
public static Node parseHaabResult(GbvMarcSruImport opac, String catalogue, String schema, String searchField, String searchValue, String resultString, String packing, String version, boolean ignoreAnchor) throws IOException, JDOMException, ParserConfigurationException { SAXBuilder builder = new SAXBuilder(XMLReaders.NONVALIDATING); builder.setFeature("http://xml.org/sax/features/validation", false); builder.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); builder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); Document doc = builder.build(new StringReader(resultString), "utf-8"); Element record = getRecordWithoutSruHeader(doc); if (record == null) { opac.setHitcount(0);/*from w w w . ja v a 2s . c o m*/ return null; } opac.setHitcount(1); boolean isPeriodical = false; boolean isManuscript = false; boolean isCartographic = false; boolean isMultiVolume = false; boolean isFSet = false; String anchorPpn = null; String otherAnchorPpn = null; String otherAnchorEpn = null; String otherPpn = null; String currentEpn = null; String otherEpn = null; boolean foundMultipleEpns = false; // generate an answer document DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = dbfac.newDocumentBuilder(); org.w3c.dom.Document answer = docBuilder.newDocument(); org.w3c.dom.Element collection = answer.createElement("collection"); answer.appendChild(collection); boolean shelfmarkFound = false; List<Element> data = record.getChildren(); for (Element el : data) { if (el.getName().equalsIgnoreCase("leader")) { String value = el.getText(); if (value.length() < 24) { value = "00000" + value; } char c6 = value.toCharArray()[6]; char c7 = value.toCharArray()[7]; char c19 = value.toCharArray()[19]; if (c6 == 'a' && (c7 == 's' || c7 == 'd')) { isPeriodical = true; } else if (c6 == 't') { isManuscript = true; } else if (c6 == 'e') { isCartographic = true; } if (c19 == 'b' || c19 == 'c') { isFSet = true; } } if (el.getName().equalsIgnoreCase("datafield")) { String tag = el.getAttributeValue("tag"); List<Element> subfields = el.getChildren(); boolean isCurrentEpn = false; for (Element sub : subfields) { String code = sub.getAttributeValue("code"); // anchor identifier if (tag.equals("773") && code.equals("w")) { if (ignoreAnchor) { sub.setText(""); } else if (isFSet || isPeriodical) { isMultiVolume = true; anchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } } else if (tag.equals("800") && code.equals("w")) { isMultiVolume = true; anchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else if (isManuscript && tag.equals("810") && code.equals("w")) { isMultiVolume = true; anchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else if (tag.equals("830") && code.equals("w")) { if (isCartographic || (isFSet && anchorPpn == null)) { isMultiVolume = true; anchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } } else if (tag.equals("776") && code.equals("w")) { if (otherPpn == null) { // found first/only occurrence otherPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else { otherPpn = null; foundMultipleEpns = true; } } else if (tag.equals("954")) { if (code.equals("b")) { if (searchField.equals("pica.epn")) { // remove wrong epns currentEpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); isCurrentEpn = true; if (!searchValue.trim().equals(currentEpn)) { sub.setAttribute("code", "invalid"); for (Element exemplarData : subfields) { if (exemplarData.getAttributeValue("code").equals("d")) { exemplarData.setAttribute("code", "invalid"); } } } } else { if (currentEpn == null) { isCurrentEpn = true; currentEpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else { foundMultipleEpns = true; } } } else if (code.equals("d")) { if (!shelfmarkFound && isCurrentEpn) { shelfmarkFound = true; } else { sub.setAttribute("code", "invalid"); } } } } } } // search for pica.zdb for periodca // get digital epn from digital ppn record if (otherPpn != null) { String otherResult = SRUHelper.search(catalogue, schema, isPeriodical ? "pica.zdb" : "pica.ppn", otherPpn, packing, version); Document otherDocument = new SAXBuilder().build(new StringReader(otherResult), "utf-8"); if (otherDocument != null) { Element otherRecord = getRecordWithoutSruHeader(otherDocument); if (otherRecord == null) { Helper.setFehlerMeldung("import_OtherEPNNotFound"); } else { List<Element> controlList = otherRecord.getChildren("controlfield", MARC); for (Element field : controlList) { if (field.getAttributeValue("tag").equals("001")) { otherPpn = field.getText(); } } List<Element> fieldList = otherRecord.getChildren("datafield", MARC); for (Element field : fieldList) { String tag = field.getAttributeValue("tag"); List<Element> subfields = field.getChildren(); for (Element sub : subfields) { String code = sub.getAttributeValue("code"); // anchor identifier if (tag.equals("773") && code.equals("w")) { otherAnchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else if (tag.equals("800") && code.equals("w")) { otherAnchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else if (isManuscript && tag.equals("810") && code.equals("w")) { otherAnchorPpn = sub.getText().replaceAll("\\(.+\\)", ""); } else if (isCartographic && tag.equals("830") && code.equals("w")) { otherAnchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else if (tag.equals("954") && code.equals("b")) { if (otherEpn == null) { otherEpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else { foundMultipleEpns = true; otherEpn = null; } } } } } if (otherPpn != null) { Element datafield = new Element("datafield", MARC); datafield.setAttribute("tag", "ppnDigital"); datafield.setAttribute("ind1", ""); datafield.setAttribute("ind2", ""); Element subfield = new Element("subfield", MARC); subfield.setAttribute("code", "a"); subfield.setText(otherPpn); datafield.addContent(subfield); data.add(datafield); } if (otherEpn != null && !foundMultipleEpns) { Element datafield = new Element("datafield", MARC); datafield.setAttribute("tag", "epnDigital"); datafield.setAttribute("ind1", ""); datafield.setAttribute("ind2", ""); Element subfield = new Element("subfield", MARC); subfield.setAttribute("code", "a"); subfield.setText(otherEpn); datafield.addContent(subfield); data.add(datafield); } } } org.w3c.dom.Element marcRecord = getRecord(answer, data, opac); if (isMultiVolume) { // get anchor record String anchorResult = SRUHelper.search(catalogue, schema, "pica.ppn", anchorPpn, packing, version); Document anchorDoc = new SAXBuilder().build(new StringReader(anchorResult), "utf-8"); Element anchorRecord = getRecordWithoutSruHeader(anchorDoc); if (anchorRecord != null) { List<Element> anchorData = anchorRecord.getChildren(); // get EPN/PPN digital for anchor String otherAnchorResult = SRUHelper.search(catalogue, schema, isPeriodical ? "pica.zdb" : "pica.ppn", otherAnchorPpn, packing, version); Document otherAnchorDoc = new SAXBuilder().build(new StringReader(otherAnchorResult), "utf-8"); Element otherAnchorRecord = getRecordWithoutSruHeader(otherAnchorDoc); if (otherAnchorRecord == null) { Helper.setFehlerMeldung("import_OtherEPNNotFound"); } else { List<Element> controlList = otherAnchorRecord.getChildren("controlfield", MARC); for (Element field : controlList) { if (field.getAttributeValue("tag").equals("001")) { otherAnchorPpn = field.getText(); } } List<Element> fieldList = otherAnchorRecord.getChildren("datafield", MARC); for (Element field : fieldList) { if (field.getAttributeValue("tag").equals("954")) { List<Element> subfields = field.getChildren(); for (Element sub : subfields) { String code = sub.getAttributeValue("code"); if (code.equals("b")) { if (otherAnchorEpn == null) { otherAnchorEpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); ; } else { foundMultipleEpns = true; } } } } } if (otherAnchorPpn != null) { Element datafield = new Element("datafield", MARC); datafield.setAttribute("tag", "ppnDigital"); datafield.setAttribute("ind1", ""); datafield.setAttribute("ind2", ""); Element subfield = new Element("subfield", MARC); subfield.setAttribute("code", "a"); subfield.setText(otherAnchorPpn); datafield.addContent(subfield); anchorData.add(datafield); } if (otherAnchorEpn != null && !foundMultipleEpns) { Element datafield = new Element("datafield", MARC); datafield.setAttribute("tag", "epnDigital"); datafield.setAttribute("ind1", ""); datafield.setAttribute("ind2", ""); Element subfield = new Element("subfield", MARC); subfield.setAttribute("code", "a"); subfield.setText(otherAnchorEpn); datafield.addContent(subfield); anchorData.add(datafield); } } org.w3c.dom.Element anchorMarcRecord = getRecord(answer, anchorData, opac); collection.appendChild(anchorMarcRecord); } } if (foundMultipleEpns) { Helper.setFehlerMeldung("import_foundMultipleEPNs"); } collection.appendChild(marcRecord); return answer.getDocumentElement(); }
From source file:de.intranda.goobi.plugins.sru.SRUHelper.java
License:Open Source License
public static Node parseGbvResult(GbvMarcSruImport opac, String catalogue, String schema, String searchField, String resultString, String packing, String version) throws IOException, JDOMException, ParserConfigurationException { // removed validation against external dtd SAXBuilder builder = new SAXBuilder(XMLReaders.NONVALIDATING); builder.setFeature("http://xml.org/sax/features/validation", false); builder.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); builder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); Document doc = builder.build(new StringReader(resultString), "utf-8"); // srw:searchRetrieveResponse Element record = getRecordWithoutSruHeader(doc); if (record == null) { opac.setHitcount(0);//from w w w . java 2s .co m return null; } else { opac.setHitcount(1); // generate an answer document DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = dbfac.newDocumentBuilder(); org.w3c.dom.Document answer = docBuilder.newDocument(); org.w3c.dom.Element collection = answer.createElement("collection"); answer.appendChild(collection); boolean isMultiVolume = false; boolean isPeriodical = false; boolean isManuscript = false; boolean isCartographic = false; String anchorIdentifier = ""; List<Element> data = record.getChildren(); for (Element el : data) { if (el.getName().equalsIgnoreCase("leader")) { String value = el.getText(); if (value.length() < 24) { value = "00000" + value; } char c6 = value.toCharArray()[6]; char c7 = value.toCharArray()[7]; char c19 = value.toCharArray()[19]; if (c6 == 'a' && (c7 == 's' || c7 == 'd')) { isPeriodical = true; } else if (c6 == 't') { isManuscript = true; } else if (c6 == 'e') { isCartographic = true; } if (c19 == 'b' || c19 == 'c') { isMultiVolume = true; } } if (el.getName().equalsIgnoreCase("datafield")) { String tag = el.getAttributeValue("tag"); List<Element> subfields = el.getChildren(); for (Element sub : subfields) { String code = sub.getAttributeValue("code"); // anchor identifier if (tag.equals("773") && code.equals("w")) { if (!isMultiVolume && !isPeriodical) { sub.setText(""); } else { anchorIdentifier = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } } else if (tag.equals("800") && code.equals("w") && isMultiVolume) { anchorIdentifier = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else if (isManuscript && tag.equals("810") && code.equals("w")) { isMultiVolume = true; anchorIdentifier = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else if (tag.equals("830") && code.equals("w")) { if (isCartographic || (isMultiVolume && anchorIdentifier == null)) { anchorIdentifier = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } } } } } org.w3c.dom.Element marcRecord = getRecord(answer, data, opac); if (isMultiVolume) { String anchorResult = SRUHelper.search(catalogue, schema, searchField, anchorIdentifier, packing, version); Document anchorDoc = new SAXBuilder().build(new StringReader(anchorResult), "utf-8"); Element anchorRecord = getRecordWithoutSruHeader(anchorDoc); if (anchorRecord != null) { List<Element> anchorData = anchorRecord.getChildren(); org.w3c.dom.Element anchorMarcRecord = getRecord(answer, anchorData, opac); collection.appendChild(anchorMarcRecord); } } collection.appendChild(marcRecord); return answer.getDocumentElement(); } }
From source file:de.nava.informa.parsers.OPMLParser.java
License:Open Source License
public static Collection<FeedIF> parse(InputSource inpSource, URL baseLocation) throws IOException, ParseException { // document reading without validation SAXBuilder saxBuilder = new SAXBuilder(false); // turn off DTD loading saxBuilder.setEntityResolver(new NoOpEntityResolver()); try {/* w ww. jav a2s . co m*/ Document doc = saxBuilder.build(inpSource); return parse(doc); } catch (JDOMException e) { throw new ParseException(e); } }
From source file:ec.edu.cedia.redi.ldclient.provider.ScopusAuthorProvider.java
License:Apache License
/** * Parse each XML result of publications. Assings each publication resource * to its author. See/*from ww w . ja v a2 s . com*/ * <a href="http://api.elsevier.com/documentation/SCOPUSSearchAPI.wadl">Scopus * Search API</a>. * * @param input * @param requestUrl * @param triples * @return list of publication resources * @throws DataRetrievalException */ private List<String> parseSearchPub(InputStream input, String requestUrl, final Model triples) throws DataRetrievalException { try { List<String> publications = new ArrayList<>(); ValueFactory vf = ValueFactoryImpl.getInstance(); String authorId = requestUrl.substring(requestUrl.indexOf("au-id(") + 6, requestUrl.indexOf(")&")); URI author = vf.createURI("http://api.elsevier.com/content/author/author_id/", authorId); final Document doc = new SAXBuilder(XMLReaders.NONVALIDATING).build(input); XPathExpression<Attribute> path = XPathFactory.instance().compile( "/atom:search-results/atom:entry/atom:link[@ref='self']/@href", Filters.attribute(), null, Namespace.getNamespace("atom", "http://www.w3.org/2005/Atom")); List<Attribute> publicationsFound = path.evaluate(doc); for (int i = 0; i < publicationsFound.size(); i++) { String pubResource = publicationsFound.get(i).getValue(); triples.add(author, FOAF.PUBLICATIONS, vf.createURI(pubResource)); publications.add(pubResource + "?apiKey=" + apiKey + "&httpAccept=application/rdf%2Bxml"); } return publications; } catch (JDOMException | IOException ex) { throw new DataRetrievalException(ex); } }
From source file:ec.edu.cedia.redi.ldclient.provider.ScopusAuthorProvider.java
License:Apache License
/** * Maps each author from XML to RDF using default implementation of * {@link AbstractXMLDataProvider#parseResponse}. * * @see// w ww. jav a 2 s. c o m * <a href="http://api.elsevier.com/documentation/AUTHORSearchAPI.wadl">Authors * search API.</a> * * @param input * @param resource * @param requestUrl * @param triples * @param contentType * @return list of resources of authors found. * @throws DataRetrievalException */ private List<String> parseResponseAuthorsSearch(InputStream input, String resource, String requestUrl, Model triples, String contentType) throws DataRetrievalException { try { // List of authors to extract perfil information such as publications, affiliations, etc. List<String> authorsFound = new ArrayList(); ValueFactory vf = ValueFactoryImpl.getInstance(); // Keep stream for various reads. byte[] response = IOUtils.toByteArray(input); final Document doc = new SAXBuilder(XMLReaders.NONVALIDATING).build(new ByteArrayInputStream(response)); // get only URI of authors XPathExpression<Text> path = XPathFactory.instance().compile( "/atom:search-results/atom:entry/prism:url/text()", Filters.textOnly(), null, Namespace.getNamespace("atom", "http://www.w3.org/2005/Atom"), Namespace.getNamespace("prism", "http://prismstandard.org/namespaces/basic/2.0/")); // Map each author XML to RDF using default implementationf parseResponse method from AbstractXMLDataProvider. List<Text> auhtorsFound = path.evaluate(doc); for (int i = 0; i < auhtorsFound.size(); i++) { setAuthorXPathMappings(i); String authorsResource = auhtorsFound.get(i).getValue(); super.parseResponse(authorsResource, requestUrl, triples, new ByteArrayInputStream(response), contentType); authorsFound.add( authorsResource + "?apiKey=" + apiKey + "&httpAccept=application/rdf%2Bxml&view=ENHANCED"); triples.add(vf.createURI(authorsResource), OWL.ONEOF, vf.createURI(resource)); } return authorsFound; } catch (JDOMException | IOException | DataRetrievalException ex) { throw new DataRetrievalException(ex); } }
From source file:edu.unc.lib.deposit.normalize.VocabularyEnforcementJob.java
License:Apache License
@Override public void runJob() { Model model = getWritableModel();//from w ww . j a v a 2 s. co m // Get the list of all objects being ingested in this job List<String> resourcePIDs = new ArrayList<>(); Bag deposit = model.getBag(getDepositPID().getURI()); walkChildrenDepthFirst(deposit, resourcePIDs, true); SAXBuilder sb = new SAXBuilder(new XMLReaderSAX2Factory(false)); // Vocabulary mappings need to be resolved against the destination since they are not in the hierarchy yet PID destinationPID = new PID(getDepositStatus().get(DepositField.containerId.name())); for (String resourcePID : resourcePIDs) { PID pid = new PID(resourcePID); File modsFile = new File(getDescriptionDir(), pid.getUUID() + ".xml"); // Check if the resource has a description if (modsFile.exists()) { try { Document modsDoc = sb.build(modsFile); // Update the MODS document to use approved terms when possible if the vocabularies support remapping log.debug("Updating document terms for {} within destination {}", pid, destinationPID); boolean modified = updateDocumentTerms(destinationPID, modsDoc.getRootElement()); // Update the mods document if it was changed if (modified) { try (FileOutputStream fos = new FileOutputStream(modsFile)) { new XMLOutputter(Format.getPrettyFormat()).output(modsDoc.getDocument(), fos); } } // Capture any invalid affiliations as relations log.debug("Adding invalid terms for {} within destination {}", pid, destinationPID); addInvalidTerms(pid, destinationPID, modsDoc.getRootElement(), model); } catch (JDOMException | IOException e) { log.error("Failed to parse description file {}", modsFile.getAbsolutePath(), e); } } } }
From source file:edu.unc.lib.dl.xml.DepartmentOntologyUtil.java
License:Apache License
/** * Parses a SKOS XML vocabulary located at filePath and populates a lookup index labels and alternative labels * referencing the authoritative version. * * @param ontologyURL// w ww. j a v a2 s .c om * @throws Exception */ private void parseVocabulary(byte[] content) throws Exception { departments = new HashMap<String, DepartmentConcept>(); log.debug("Parsing and building Department vocabulary from {}", getVocabularyURI()); SAXBuilder sb = new SAXBuilder(new XMLReaderSAX2Factory(false)); Document skosDoc = sb.build(new ByteArrayInputStream(content)); // Extract all of the concepts and store them to an index List<?> concepts = skosDoc.getRootElement().getChildren("Concept", SKOS_NS); Map<String, DepartmentConcept> tempDepts = new HashMap<String, DepartmentConcept>(concepts.size()); for (Object conceptObj : concepts) { DepartmentConcept dept = new DepartmentConcept((Element) conceptObj); tempDepts.put(cleanLabel(dept.getIdentifier()), dept); } // Expand out all the alternative labels into an index and resolve references for (Iterator<Entry<String, DepartmentConcept>> deptIt = tempDepts.entrySet().iterator(); deptIt .hasNext();) { Entry<String, DepartmentConcept> deptEntry = deptIt.next(); DepartmentConcept dept = deptEntry.getValue(); // Check if this concept should be ignored in favor of a preferred concept if (dept.prefLabel != null) { if (departments.containsKey(dept.prefLabel)) { // The preferred concept has already been indexed, grab extra labels from this concept and reindex pref DepartmentConcept prefDept = departments.get(dept.prefLabel); prefDept.merge(dept); addLabels(prefDept); } else { // Since the preferred concept isn't indexed yet, just need to merge labels into it DepartmentConcept prefDept = tempDepts.get(dept.prefLabel); if (prefDept == null) { log.warn("Preferred label {} referencing a concept which is not present", dept.prefLabel); } else { prefDept.merge(dept); } } continue; } String identifier = cleanLabel(dept.identifier); if (departments.containsKey(identifier) && dept.identifier.equals(departments.get(identifier).identifier)) { log.error("Illegal state, multiple concepts share the identifier {}, ignoring duplicate", identifier); } else { departments.put(identifier, dept); } addLabels(dept); } }
From source file:edu.wisc.ssec.adapter.NetCDFFile.java
License:Open Source License
public static NetCDFFile makeUnion(String filename, String other) throws Exception { Object obj = new Object(); URL url = obj.getClass().getResource("/edu/wisc/ssec/mcidasv/data/hydra/resources/union.ncml"); SAXBuilder builder = new SAXBuilder(false); Document doc = null;//from w ww.ja v a 2 s. c o m try { doc = builder.build(url); } catch (Exception e) { e.printStackTrace(); } Element root = doc.getRootElement(); List list = root.getChildren(); list = ((Element) list.get(1)).getChildren(); org.jdom2.Attribute attr1 = (org.jdom2.Attribute) (((Element) list.get(0)).getAttributes()).get(0); attr1.setValue(filename); org.jdom2.Attribute attr2 = (org.jdom2.Attribute) (((Element) list.get(1)).getAttributes()).get(0); attr2.setValue(other); XMLOutputter xmlOut = new XMLOutputter(); String newStr = xmlOut.outputString(doc); ByteArrayInputStream is = new ByteArrayInputStream(newStr.getBytes()); return new NetCDFFile(is); }
From source file:edu.wisc.ssec.mcidasv.data.hydra.NetCDFFile.java
License:Open Source License
public static NetCDFFile makeUnion(String filename, String other) throws Exception { Object obj = new Object(); URL url = obj.getClass().getResource("/edu/wisc/ssec/mcidasv/data/hydra/resources/union.ncml"); SAXBuilder builder = new SAXBuilder(false); Document doc = null;/*from w w w . j a va 2 s . co m*/ try { doc = builder.build(url); } catch (Exception e) { e.printStackTrace(); } Element root = doc.getRootElement(); List list = root.getChildren(); list = ((Element) list.get(1)).getChildren(); org.jdom2.Attribute attr1 = (((Element) list.get(0)).getAttributes()).get(0); attr1.setValue(filename); org.jdom2.Attribute attr2 = (((Element) list.get(1)).getAttributes()).get(0); attr2.setValue(other); XMLOutputter xmlOut = new XMLOutputter(); String newStr = xmlOut.outputString(doc); logger.trace("union string:\n{}", newStr); ByteArrayInputStream is = new ByteArrayInputStream(newStr.getBytes()); return new NetCDFFile(is); }