List of usage examples for org.jdom2.input SAXBuilder build
@Override public Document build(final Reader characterStream, final String systemId) throws JDOMException, IOException
This builds a document from the supplied Reader.
From source file:de.intranda.goobi.plugins.sru.SRUHelper.java
License:Open Source License
public static Node parseHaabResult(GbvMarcSruImport opac, String catalogue, String schema, String searchField, String searchValue, String resultString, String packing, String version, boolean ignoreAnchor) throws IOException, JDOMException, ParserConfigurationException { SAXBuilder builder = new SAXBuilder(XMLReaders.NONVALIDATING); builder.setFeature("http://xml.org/sax/features/validation", false); builder.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); builder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); Document doc = builder.build(new StringReader(resultString), "utf-8"); Element record = getRecordWithoutSruHeader(doc); if (record == null) { opac.setHitcount(0);//from www .jav a 2s . c om return null; } opac.setHitcount(1); boolean isPeriodical = false; boolean isManuscript = false; boolean isCartographic = false; boolean isMultiVolume = false; boolean isFSet = false; String anchorPpn = null; String otherAnchorPpn = null; String otherAnchorEpn = null; String otherPpn = null; String currentEpn = null; String otherEpn = null; boolean foundMultipleEpns = false; // generate an answer document DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = dbfac.newDocumentBuilder(); org.w3c.dom.Document answer = docBuilder.newDocument(); org.w3c.dom.Element collection = answer.createElement("collection"); answer.appendChild(collection); boolean shelfmarkFound = false; List<Element> data = record.getChildren(); for (Element el : data) { if (el.getName().equalsIgnoreCase("leader")) { String value = el.getText(); if (value.length() < 24) { value = "00000" + value; } char c6 = value.toCharArray()[6]; char c7 = value.toCharArray()[7]; char c19 = value.toCharArray()[19]; if (c6 == 'a' && (c7 == 's' || c7 == 'd')) { isPeriodical = true; } else if (c6 == 't') { isManuscript = true; } else if (c6 == 'e') { isCartographic = true; } if (c19 == 'b' || c19 == 'c') { isFSet = true; } } if (el.getName().equalsIgnoreCase("datafield")) { String tag = el.getAttributeValue("tag"); List<Element> subfields = el.getChildren(); boolean isCurrentEpn = false; for (Element sub : subfields) { String code = sub.getAttributeValue("code"); // anchor identifier if (tag.equals("773") && code.equals("w")) { if (ignoreAnchor) { sub.setText(""); } else if (isFSet || isPeriodical) { isMultiVolume = true; anchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } } else if (tag.equals("800") && code.equals("w")) { isMultiVolume = true; anchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else if (isManuscript && tag.equals("810") && code.equals("w")) { isMultiVolume = true; anchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else if (tag.equals("830") && code.equals("w")) { if (isCartographic || (isFSet && anchorPpn == null)) { isMultiVolume = true; anchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } } else if (tag.equals("776") && code.equals("w")) { if (otherPpn == null) { // found first/only occurrence otherPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else { otherPpn = null; foundMultipleEpns = true; } } else if (tag.equals("954")) { if (code.equals("b")) { if (searchField.equals("pica.epn")) { // remove wrong epns currentEpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); isCurrentEpn = true; if (!searchValue.trim().equals(currentEpn)) { sub.setAttribute("code", "invalid"); for (Element exemplarData : subfields) { if (exemplarData.getAttributeValue("code").equals("d")) { exemplarData.setAttribute("code", "invalid"); } } } } else { if (currentEpn == null) { isCurrentEpn = true; currentEpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else { foundMultipleEpns = true; } } } else if (code.equals("d")) { if (!shelfmarkFound && isCurrentEpn) { shelfmarkFound = true; } else { sub.setAttribute("code", "invalid"); } } } } } } // search for pica.zdb for periodca // get digital epn from digital ppn record if (otherPpn != null) { String otherResult = SRUHelper.search(catalogue, schema, isPeriodical ? "pica.zdb" : "pica.ppn", otherPpn, packing, version); Document otherDocument = new SAXBuilder().build(new StringReader(otherResult), "utf-8"); if (otherDocument != null) { Element otherRecord = getRecordWithoutSruHeader(otherDocument); if (otherRecord == null) { Helper.setFehlerMeldung("import_OtherEPNNotFound"); } else { List<Element> controlList = otherRecord.getChildren("controlfield", MARC); for (Element field : controlList) { if (field.getAttributeValue("tag").equals("001")) { otherPpn = field.getText(); } } List<Element> fieldList = otherRecord.getChildren("datafield", MARC); for (Element field : fieldList) { String tag = field.getAttributeValue("tag"); List<Element> subfields = field.getChildren(); for (Element sub : subfields) { String code = sub.getAttributeValue("code"); // anchor identifier if (tag.equals("773") && code.equals("w")) { otherAnchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else if (tag.equals("800") && code.equals("w")) { otherAnchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else if (isManuscript && tag.equals("810") && code.equals("w")) { otherAnchorPpn = sub.getText().replaceAll("\\(.+\\)", ""); } else if (isCartographic && tag.equals("830") && code.equals("w")) { otherAnchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else if (tag.equals("954") && code.equals("b")) { if (otherEpn == null) { otherEpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else { foundMultipleEpns = true; otherEpn = null; } } } } } if (otherPpn != null) { Element datafield = new Element("datafield", MARC); datafield.setAttribute("tag", "ppnDigital"); datafield.setAttribute("ind1", ""); datafield.setAttribute("ind2", ""); Element subfield = new Element("subfield", MARC); subfield.setAttribute("code", "a"); subfield.setText(otherPpn); datafield.addContent(subfield); data.add(datafield); } if (otherEpn != null && !foundMultipleEpns) { Element datafield = new Element("datafield", MARC); datafield.setAttribute("tag", "epnDigital"); datafield.setAttribute("ind1", ""); datafield.setAttribute("ind2", ""); Element subfield = new Element("subfield", MARC); subfield.setAttribute("code", "a"); subfield.setText(otherEpn); datafield.addContent(subfield); data.add(datafield); } } } org.w3c.dom.Element marcRecord = getRecord(answer, data, opac); if (isMultiVolume) { // get anchor record String anchorResult = SRUHelper.search(catalogue, schema, "pica.ppn", anchorPpn, packing, version); Document anchorDoc = new SAXBuilder().build(new StringReader(anchorResult), "utf-8"); Element anchorRecord = getRecordWithoutSruHeader(anchorDoc); if (anchorRecord != null) { List<Element> anchorData = anchorRecord.getChildren(); // get EPN/PPN digital for anchor String otherAnchorResult = SRUHelper.search(catalogue, schema, isPeriodical ? "pica.zdb" : "pica.ppn", otherAnchorPpn, packing, version); Document otherAnchorDoc = new SAXBuilder().build(new StringReader(otherAnchorResult), "utf-8"); Element otherAnchorRecord = getRecordWithoutSruHeader(otherAnchorDoc); if (otherAnchorRecord == null) { Helper.setFehlerMeldung("import_OtherEPNNotFound"); } else { List<Element> controlList = otherAnchorRecord.getChildren("controlfield", MARC); for (Element field : controlList) { if (field.getAttributeValue("tag").equals("001")) { otherAnchorPpn = field.getText(); } } List<Element> fieldList = otherAnchorRecord.getChildren("datafield", MARC); for (Element field : fieldList) { if (field.getAttributeValue("tag").equals("954")) { List<Element> subfields = field.getChildren(); for (Element sub : subfields) { String code = sub.getAttributeValue("code"); if (code.equals("b")) { if (otherAnchorEpn == null) { otherAnchorEpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); ; } else { foundMultipleEpns = true; } } } } } if (otherAnchorPpn != null) { Element datafield = new Element("datafield", MARC); datafield.setAttribute("tag", "ppnDigital"); datafield.setAttribute("ind1", ""); datafield.setAttribute("ind2", ""); Element subfield = new Element("subfield", MARC); subfield.setAttribute("code", "a"); subfield.setText(otherAnchorPpn); datafield.addContent(subfield); anchorData.add(datafield); } if (otherAnchorEpn != null && !foundMultipleEpns) { Element datafield = new Element("datafield", MARC); datafield.setAttribute("tag", "epnDigital"); datafield.setAttribute("ind1", ""); datafield.setAttribute("ind2", ""); Element subfield = new Element("subfield", MARC); subfield.setAttribute("code", "a"); subfield.setText(otherAnchorEpn); datafield.addContent(subfield); anchorData.add(datafield); } } org.w3c.dom.Element anchorMarcRecord = getRecord(answer, anchorData, opac); collection.appendChild(anchorMarcRecord); } } if (foundMultipleEpns) { Helper.setFehlerMeldung("import_foundMultipleEPNs"); } collection.appendChild(marcRecord); return answer.getDocumentElement(); }
From source file:de.intranda.goobi.plugins.sru.SRUHelper.java
License:Open Source License
public static Node parseGbvResult(GbvMarcSruImport opac, String catalogue, String schema, String searchField, String resultString, String packing, String version) throws IOException, JDOMException, ParserConfigurationException { // removed validation against external dtd SAXBuilder builder = new SAXBuilder(XMLReaders.NONVALIDATING); builder.setFeature("http://xml.org/sax/features/validation", false); builder.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false); builder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false); Document doc = builder.build(new StringReader(resultString), "utf-8"); // srw:searchRetrieveResponse Element record = getRecordWithoutSruHeader(doc); if (record == null) { opac.setHitcount(0);//w w w . j a va 2 s.c om return null; } else { opac.setHitcount(1); // generate an answer document DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance(); DocumentBuilder docBuilder = dbfac.newDocumentBuilder(); org.w3c.dom.Document answer = docBuilder.newDocument(); org.w3c.dom.Element collection = answer.createElement("collection"); answer.appendChild(collection); boolean isMultiVolume = false; boolean isPeriodical = false; boolean isManuscript = false; boolean isCartographic = false; String anchorIdentifier = ""; List<Element> data = record.getChildren(); for (Element el : data) { if (el.getName().equalsIgnoreCase("leader")) { String value = el.getText(); if (value.length() < 24) { value = "00000" + value; } char c6 = value.toCharArray()[6]; char c7 = value.toCharArray()[7]; char c19 = value.toCharArray()[19]; if (c6 == 'a' && (c7 == 's' || c7 == 'd')) { isPeriodical = true; } else if (c6 == 't') { isManuscript = true; } else if (c6 == 'e') { isCartographic = true; } if (c19 == 'b' || c19 == 'c') { isMultiVolume = true; } } if (el.getName().equalsIgnoreCase("datafield")) { String tag = el.getAttributeValue("tag"); List<Element> subfields = el.getChildren(); for (Element sub : subfields) { String code = sub.getAttributeValue("code"); // anchor identifier if (tag.equals("773") && code.equals("w")) { if (!isMultiVolume && !isPeriodical) { sub.setText(""); } else { anchorIdentifier = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } } else if (tag.equals("800") && code.equals("w") && isMultiVolume) { anchorIdentifier = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else if (isManuscript && tag.equals("810") && code.equals("w")) { isMultiVolume = true; anchorIdentifier = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } else if (tag.equals("830") && code.equals("w")) { if (isCartographic || (isMultiVolume && anchorIdentifier == null)) { anchorIdentifier = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", ""); } } } } } org.w3c.dom.Element marcRecord = getRecord(answer, data, opac); if (isMultiVolume) { String anchorResult = SRUHelper.search(catalogue, schema, searchField, anchorIdentifier, packing, version); Document anchorDoc = new SAXBuilder().build(new StringReader(anchorResult), "utf-8"); Element anchorRecord = getRecordWithoutSruHeader(anchorDoc); if (anchorRecord != null) { List<Element> anchorData = anchorRecord.getChildren(); org.w3c.dom.Element anchorMarcRecord = getRecord(answer, anchorData, opac); collection.appendChild(anchorMarcRecord); } } collection.appendChild(marcRecord); return answer.getDocumentElement(); } }