Example usage for org.jdom2.input SAXBuilder build

List of usage examples for org.jdom2.input SAXBuilder build

Introduction

In this page you can find the example usage for org.jdom2.input SAXBuilder build.

Prototype

@Override
public Document build(final Reader characterStream, final String systemId) throws JDOMException, IOException 

Source Link

Document

This builds a document from the supplied Reader.

Usage

From source file:de.intranda.goobi.plugins.sru.SRUHelper.java

License:Open Source License

public static Node parseHaabResult(GbvMarcSruImport opac, String catalogue, String schema, String searchField,
        String searchValue, String resultString, String packing, String version, boolean ignoreAnchor)
        throws IOException, JDOMException, ParserConfigurationException {
    SAXBuilder builder = new SAXBuilder(XMLReaders.NONVALIDATING);
    builder.setFeature("http://xml.org/sax/features/validation", false);
    builder.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
    builder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
    Document doc = builder.build(new StringReader(resultString), "utf-8");
    Element record = getRecordWithoutSruHeader(doc);
    if (record == null) {
        opac.setHitcount(0);//from  www  .jav  a 2s  .  c om
        return null;
    }
    opac.setHitcount(1);
    boolean isPeriodical = false;
    boolean isManuscript = false;
    boolean isCartographic = false;
    boolean isMultiVolume = false;
    boolean isFSet = false;

    String anchorPpn = null;
    String otherAnchorPpn = null;
    String otherAnchorEpn = null;

    String otherPpn = null;
    String currentEpn = null;
    String otherEpn = null;
    boolean foundMultipleEpns = false;

    // generate an answer document
    DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance();
    DocumentBuilder docBuilder = dbfac.newDocumentBuilder();
    org.w3c.dom.Document answer = docBuilder.newDocument();
    org.w3c.dom.Element collection = answer.createElement("collection");
    answer.appendChild(collection);

    boolean shelfmarkFound = false;
    List<Element> data = record.getChildren();
    for (Element el : data) {
        if (el.getName().equalsIgnoreCase("leader")) {
            String value = el.getText();
            if (value.length() < 24) {
                value = "00000" + value;
            }
            char c6 = value.toCharArray()[6];
            char c7 = value.toCharArray()[7];
            char c19 = value.toCharArray()[19];
            if (c6 == 'a' && (c7 == 's' || c7 == 'd')) {
                isPeriodical = true;
            } else if (c6 == 't') {
                isManuscript = true;
            } else if (c6 == 'e') {
                isCartographic = true;
            }
            if (c19 == 'b' || c19 == 'c') {
                isFSet = true;
            }

        }
        if (el.getName().equalsIgnoreCase("datafield")) {
            String tag = el.getAttributeValue("tag");
            List<Element> subfields = el.getChildren();
            boolean isCurrentEpn = false;
            for (Element sub : subfields) {
                String code = sub.getAttributeValue("code");
                // anchor identifier
                if (tag.equals("773") && code.equals("w")) {
                    if (ignoreAnchor) {
                        sub.setText("");
                    } else if (isFSet || isPeriodical) {
                        isMultiVolume = true;
                        anchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", "");

                    }
                } else if (tag.equals("800") && code.equals("w")) {
                    isMultiVolume = true;
                    anchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", "");
                } else if (isManuscript && tag.equals("810") && code.equals("w")) {
                    isMultiVolume = true;
                    anchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", "");

                } else if (tag.equals("830") && code.equals("w")) {
                    if (isCartographic || (isFSet && anchorPpn == null)) {
                        isMultiVolume = true;
                        anchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", "");

                    }
                } else if (tag.equals("776") && code.equals("w")) {
                    if (otherPpn == null) {
                        // found first/only occurrence
                        otherPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", "");

                    } else {
                        otherPpn = null;
                        foundMultipleEpns = true;
                    }

                } else if (tag.equals("954")) {
                    if (code.equals("b")) {
                        if (searchField.equals("pica.epn")) {
                            // remove wrong epns
                            currentEpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", "");
                            isCurrentEpn = true;
                            if (!searchValue.trim().equals(currentEpn)) {
                                sub.setAttribute("code", "invalid");
                                for (Element exemplarData : subfields) {
                                    if (exemplarData.getAttributeValue("code").equals("d")) {
                                        exemplarData.setAttribute("code", "invalid");
                                    }
                                }
                            }
                        } else {
                            if (currentEpn == null) {
                                isCurrentEpn = true;
                                currentEpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", "");

                            } else {
                                foundMultipleEpns = true;
                            }
                        }
                    } else if (code.equals("d")) {
                        if (!shelfmarkFound && isCurrentEpn) {
                            shelfmarkFound = true;
                        } else {
                            sub.setAttribute("code", "invalid");
                        }
                    }
                }
            }
        }
    }

    //  search for pica.zdb for periodca
    // get digital epn from digital ppn record
    if (otherPpn != null) {
        String otherResult = SRUHelper.search(catalogue, schema, isPeriodical ? "pica.zdb" : "pica.ppn",
                otherPpn, packing, version);
        Document otherDocument = new SAXBuilder().build(new StringReader(otherResult), "utf-8");
        if (otherDocument != null) {
            Element otherRecord = getRecordWithoutSruHeader(otherDocument);
            if (otherRecord == null) {
                Helper.setFehlerMeldung("import_OtherEPNNotFound");
            } else {

                List<Element> controlList = otherRecord.getChildren("controlfield", MARC);
                for (Element field : controlList) {
                    if (field.getAttributeValue("tag").equals("001")) {
                        otherPpn = field.getText();
                    }
                }

                List<Element> fieldList = otherRecord.getChildren("datafield", MARC);
                for (Element field : fieldList) {
                    String tag = field.getAttributeValue("tag");

                    List<Element> subfields = field.getChildren();
                    for (Element sub : subfields) {
                        String code = sub.getAttributeValue("code");
                        // anchor identifier
                        if (tag.equals("773") && code.equals("w")) {
                            otherAnchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", "");
                        } else if (tag.equals("800") && code.equals("w")) {
                            otherAnchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", "");
                        } else if (isManuscript && tag.equals("810") && code.equals("w")) {
                            otherAnchorPpn = sub.getText().replaceAll("\\(.+\\)", "");
                        } else if (isCartographic && tag.equals("830") && code.equals("w")) {
                            otherAnchorPpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", "");
                        } else if (tag.equals("954") && code.equals("b")) {
                            if (otherEpn == null) {
                                otherEpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", "");
                            } else {
                                foundMultipleEpns = true;
                                otherEpn = null;
                            }
                        }

                    }
                }
            }
            if (otherPpn != null) {
                Element datafield = new Element("datafield", MARC);
                datafield.setAttribute("tag", "ppnDigital");
                datafield.setAttribute("ind1", "");
                datafield.setAttribute("ind2", "");

                Element subfield = new Element("subfield", MARC);
                subfield.setAttribute("code", "a");
                subfield.setText(otherPpn);
                datafield.addContent(subfield);
                data.add(datafield);
            }
            if (otherEpn != null && !foundMultipleEpns) {
                Element datafield = new Element("datafield", MARC);
                datafield.setAttribute("tag", "epnDigital");
                datafield.setAttribute("ind1", "");
                datafield.setAttribute("ind2", "");

                Element subfield = new Element("subfield", MARC);
                subfield.setAttribute("code", "a");
                subfield.setText(otherEpn);
                datafield.addContent(subfield);
                data.add(datafield);
            }
        }
    }
    org.w3c.dom.Element marcRecord = getRecord(answer, data, opac);

    if (isMultiVolume) {
        // get anchor record
        String anchorResult = SRUHelper.search(catalogue, schema, "pica.ppn", anchorPpn, packing, version);
        Document anchorDoc = new SAXBuilder().build(new StringReader(anchorResult), "utf-8");

        Element anchorRecord = getRecordWithoutSruHeader(anchorDoc);

        if (anchorRecord != null) {
            List<Element> anchorData = anchorRecord.getChildren();

            // get EPN/PPN digital for anchor
            String otherAnchorResult = SRUHelper.search(catalogue, schema,
                    isPeriodical ? "pica.zdb" : "pica.ppn", otherAnchorPpn, packing, version);
            Document otherAnchorDoc = new SAXBuilder().build(new StringReader(otherAnchorResult), "utf-8");
            Element otherAnchorRecord = getRecordWithoutSruHeader(otherAnchorDoc);

            if (otherAnchorRecord == null) {
                Helper.setFehlerMeldung("import_OtherEPNNotFound");
            } else {

                List<Element> controlList = otherAnchorRecord.getChildren("controlfield", MARC);
                for (Element field : controlList) {
                    if (field.getAttributeValue("tag").equals("001")) {
                        otherAnchorPpn = field.getText();
                    }
                }

                List<Element> fieldList = otherAnchorRecord.getChildren("datafield", MARC);
                for (Element field : fieldList) {
                    if (field.getAttributeValue("tag").equals("954")) {
                        List<Element> subfields = field.getChildren();
                        for (Element sub : subfields) {
                            String code = sub.getAttributeValue("code");
                            if (code.equals("b")) {
                                if (otherAnchorEpn == null) {
                                    otherAnchorEpn = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP",
                                            "");
                                    ;
                                } else {
                                    foundMultipleEpns = true;
                                }
                            }
                        }
                    }
                }

                if (otherAnchorPpn != null) {
                    Element datafield = new Element("datafield", MARC);
                    datafield.setAttribute("tag", "ppnDigital");
                    datafield.setAttribute("ind1", "");
                    datafield.setAttribute("ind2", "");

                    Element subfield = new Element("subfield", MARC);
                    subfield.setAttribute("code", "a");
                    subfield.setText(otherAnchorPpn);
                    datafield.addContent(subfield);
                    anchorData.add(datafield);
                }

                if (otherAnchorEpn != null && !foundMultipleEpns) {
                    Element datafield = new Element("datafield", MARC);
                    datafield.setAttribute("tag", "epnDigital");
                    datafield.setAttribute("ind1", "");
                    datafield.setAttribute("ind2", "");

                    Element subfield = new Element("subfield", MARC);
                    subfield.setAttribute("code", "a");
                    subfield.setText(otherAnchorEpn);
                    datafield.addContent(subfield);
                    anchorData.add(datafield);
                }
            }
            org.w3c.dom.Element anchorMarcRecord = getRecord(answer, anchorData, opac);

            collection.appendChild(anchorMarcRecord);
        }

    }

    if (foundMultipleEpns) {
        Helper.setFehlerMeldung("import_foundMultipleEPNs");
    }

    collection.appendChild(marcRecord);
    return answer.getDocumentElement();
}

From source file:de.intranda.goobi.plugins.sru.SRUHelper.java

License:Open Source License

public static Node parseGbvResult(GbvMarcSruImport opac, String catalogue, String schema, String searchField,
        String resultString, String packing, String version)
        throws IOException, JDOMException, ParserConfigurationException {
    // removed validation against external dtd
    SAXBuilder builder = new SAXBuilder(XMLReaders.NONVALIDATING);

    builder.setFeature("http://xml.org/sax/features/validation", false);
    builder.setFeature("http://apache.org/xml/features/nonvalidating/load-dtd-grammar", false);
    builder.setFeature("http://apache.org/xml/features/nonvalidating/load-external-dtd", false);
    Document doc = builder.build(new StringReader(resultString), "utf-8");
    // srw:searchRetrieveResponse
    Element record = getRecordWithoutSruHeader(doc);
    if (record == null) {
        opac.setHitcount(0);//w w w .  j a va 2 s.c  om
        return null;
    } else {
        opac.setHitcount(1);
        // generate an answer document
        DocumentBuilderFactory dbfac = DocumentBuilderFactory.newInstance();
        DocumentBuilder docBuilder = dbfac.newDocumentBuilder();
        org.w3c.dom.Document answer = docBuilder.newDocument();
        org.w3c.dom.Element collection = answer.createElement("collection");
        answer.appendChild(collection);

        boolean isMultiVolume = false;
        boolean isPeriodical = false;
        boolean isManuscript = false;
        boolean isCartographic = false;

        String anchorIdentifier = "";
        List<Element> data = record.getChildren();

        for (Element el : data) {
            if (el.getName().equalsIgnoreCase("leader")) {
                String value = el.getText();
                if (value.length() < 24) {
                    value = "00000" + value;
                }
                char c6 = value.toCharArray()[6];
                char c7 = value.toCharArray()[7];
                char c19 = value.toCharArray()[19];
                if (c6 == 'a' && (c7 == 's' || c7 == 'd')) {
                    isPeriodical = true;
                } else if (c6 == 't') {
                    isManuscript = true;
                } else if (c6 == 'e') {
                    isCartographic = true;
                }
                if (c19 == 'b' || c19 == 'c') {
                    isMultiVolume = true;
                }
            }

            if (el.getName().equalsIgnoreCase("datafield")) {
                String tag = el.getAttributeValue("tag");
                List<Element> subfields = el.getChildren();
                for (Element sub : subfields) {
                    String code = sub.getAttributeValue("code");
                    // anchor identifier
                    if (tag.equals("773") && code.equals("w")) {
                        if (!isMultiVolume && !isPeriodical) {
                            sub.setText("");
                        } else {
                            anchorIdentifier = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", "");
                        }
                    } else if (tag.equals("800") && code.equals("w") && isMultiVolume) {
                        anchorIdentifier = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", "");
                    } else if (isManuscript && tag.equals("810") && code.equals("w")) {
                        isMultiVolume = true;
                        anchorIdentifier = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", "");
                    } else if (tag.equals("830") && code.equals("w")) {
                        if (isCartographic || (isMultiVolume && anchorIdentifier == null)) {
                            anchorIdentifier = sub.getText().replaceAll("\\(.+\\)", "").replace("KXP", "");
                        }
                    }
                }
            }
        }

        org.w3c.dom.Element marcRecord = getRecord(answer, data, opac);

        if (isMultiVolume) {
            String anchorResult = SRUHelper.search(catalogue, schema, searchField, anchorIdentifier, packing,
                    version);
            Document anchorDoc = new SAXBuilder().build(new StringReader(anchorResult), "utf-8");

            Element anchorRecord = getRecordWithoutSruHeader(anchorDoc);
            if (anchorRecord != null) {
                List<Element> anchorData = anchorRecord.getChildren();
                org.w3c.dom.Element anchorMarcRecord = getRecord(answer, anchorData, opac);

                collection.appendChild(anchorMarcRecord);
            }

        }
        collection.appendChild(marcRecord);
        return answer.getDocumentElement();
    }

}