Example usage for org.w3c.dom Attr getValue

Introduction

In this page you can find the example usage for org.w3c.dom Attr getValue.

Prototype

public String getValue();

Source Link

Document

On retrieval, the value of the attribute is returned as a string.

Usage

From source file:org.apache.xml.security.c14n.implementations.Canonicalizer11.java

/**
 * Returns the Attr[]s to be output for the given element.
 * <br>//from  w w  w .ja  v a 2s  .c  o  m
 * IMPORTANT: This method expects to work on a modified DOM tree, i.e. a 
 * DOM which has been prepared using 
 * {@link org.apache.xml.security.utils.XMLUtils#circumventBug2650(
 * org.w3c.dom.Document)}.
 * 
 * @param element
 * @param ns
 * @return the Attr[]s to be output
 * @throws CanonicalizationException
 */
@Override
protected Iterator<Attr> handleAttributes(Element element, NameSpaceSymbTable ns)
        throws CanonicalizationException {
    // result will contain the attrs which have to be output
    xmlattrStack.push(ns.getLevel());
    boolean isRealVisible = isVisibleDO(element, ns.getLevel()) == 1;
    final SortedSet<Attr> result = this.result;
    result.clear();

    if (element.hasAttributes()) {
        NamedNodeMap attrs = element.getAttributes();
        int attrsLength = attrs.getLength();

        for (int i = 0; i < attrsLength; i++) {
            Attr attribute = (Attr) attrs.item(i);
            String NUri = attribute.getNamespaceURI();
            String NName = attribute.getLocalName();
            String NValue = attribute.getValue();

            if (!XMLNS_URI.equals(NUri)) {
                //A non namespace definition node.
                if (XML_LANG_URI.equals(NUri)) {
                    if (NName.equals("id")) {
                        if (isRealVisible) {
                            // treat xml:id like any other attribute 
                            // (emit it, but don't inherit it)
                            result.add(attribute);
                        }
                    } else {
                        xmlattrStack.addXmlnsAttr(attribute);
                    }
                } else if (isRealVisible) {
                    //The node is visible add the attribute to the list of output attributes.
                    result.add(attribute);
                }
            } else if (!XML.equals(NName) || !XML_LANG_URI.equals(NValue)) {
                /* except omit namespace node with local name xml, which defines
                 * the xml prefix, if its string value is 
                 * http://www.w3.org/XML/1998/namespace.
                 */
                // add the prefix binding to the ns symb table.
                if (isVisible(attribute)) {
                    if (isRealVisible || !ns.removeMappingIfRender(NName)) {
                        // The xpath select this node output it if needed.
                        Node n = ns.addMappingAndRender(NName, NValue, attribute);
                        if (n != null) {
                            result.add((Attr) n);
                            if (C14nHelper.namespaceIsRelative(attribute)) {
                                Object exArgs[] = { element.getTagName(), NName, attribute.getNodeValue() };
                                throw new CanonicalizationException("c14n.Canonicalizer.RelativeNamespace",
                                        exArgs);
                            }
                        }
                    }
                } else {
                    if (isRealVisible && !XMLNS.equals(NName)) {
                        ns.removeMapping(NName);
                    } else {
                        ns.addMapping(NName, NValue, attribute);
                    }
                }
            }
        }
    }

    if (isRealVisible) {
        //The element is visible, handle the xmlns definition        
        Attr xmlns = element.getAttributeNodeNS(XMLNS_URI, XMLNS);
        Node n = null;
        if (xmlns == null) {
            //No xmlns def just get the already defined.
            n = ns.getMapping(XMLNS);
        } else if (!isVisible(xmlns)) {
            //There is a definition but the xmlns is not selected by the xpath.
            //then xmlns=""
            n = ns.addMappingAndRender(XMLNS, "", nullNode);
        }
        //output the xmlns def if needed.
        if (n != null) {
            result.add((Attr) n);
        }
        //Float all xml:* attributes of the unselected parent elements to this one. 
        xmlattrStack.getXmlnsAttr(result);
        ns.getUnrenderedNodes(result);
    }

    return result.iterator();
}

From source file:org.apache.xml.security.test.utils.resolver.ResourceResolverTest.java

/**
 * Tests registering a custom resolver implementation.
 *//*from   ww  w . j a va 2  s  . c  o m*/
public static void testCustomResolver() throws Exception {
    String className = "org.apache.xml.security.test.utils.resolver.OfflineResolver";
    ResourceResolver.registerAtStart(className);
    Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
    Attr uriAttr = doc.createAttribute("URI");
    uriAttr.setValue("http://www.apache.org");
    ResourceResolver res = ResourceResolver.getInstance(uriAttr, "http://www.apache.org");
    try {
        uriAttr.setValue("http://xmldsig.pothole.com/xml-stylesheet.txt");
        res.resolve(uriAttr, null);
    } catch (Exception e) {
        e.printStackTrace();
        fail(uriAttr.getValue() + " should be resolvable by the OfflineResolver");
    }
    try {
        uriAttr.setValue("http://www.apache.org");
        res.resolve(uriAttr, null);
        fail(uriAttr.getValue() + " should not be resolvable by the OfflineResolver");
    } catch (Exception e) {
    }
}

From source file:org.apereo.portal.layout.dlm.EditManager.java

/**
   Evaluate whether attribute changes exist in the ilfChild and if so
   apply them. Returns true if some changes existed. If changes existed
   but matched those in the original node then they are not applicable,
   are removed from the editSet, and false is returned.
*//* w  w w.j a  va  2  s  .com*/
public static boolean applyEditSet(Element plfChild, Element original) {
    // first get edit set if it exists
    Element editSet = null;
    try {
        editSet = getEditSet(plfChild, null, null, false);
    } catch (Exception e) {
        // should never occur unless problem during create in getEditSet
        // and we are telling it not to create.
        return false;
    }

    if (editSet == null || editSet.getChildNodes().getLength() == 0)
        return false;

    if (original.getAttribute(Constants.ATT_EDIT_ALLOWED).equals("false")) {
        // can't change anymore so discard changes
        plfChild.removeChild(editSet);
        return false;
    }

    Document ilf = original.getOwnerDocument();
    boolean attributeChanged = false;
    Element edit = (Element) editSet.getFirstChild();

    while (edit != null) {
        String attribName = edit.getAttribute(Constants.ATT_NAME);
        Attr attr = plfChild.getAttributeNode(attribName);

        // preferences are only updated at preference storage time so
        // if a preference change exists in the edit set assume it is
        // still valid so that the node being edited will persist in
        // the PLF.
        if (edit.getNodeName().equals(Constants.ELM_PREF))
            attributeChanged = true;
        else if (attr == null) {
            // attribute removed. See if needs removing in original.
            attr = original.getAttributeNode(attribName);
            if (attr == null) // edit irrelevant,
                editSet.removeChild(edit);
            else {
                // edit differs, apply to original
                original.removeAttribute(attribName);
                attributeChanged = true;
            }
        } else {
            // attribute there, see if original is also there
            Attr origAttr = original.getAttributeNode(attribName);
            if (origAttr == null) {
                // original attribute isn't defined so need to add
                origAttr = (Attr) ilf.importNode(attr, true);
                original.setAttributeNode(origAttr);
                attributeChanged = true;
            } else {
                // original attrib found, see if different
                if (attr.getValue().equals(origAttr.getValue())) {
                    // they are the same, edit irrelevant
                    editSet.removeChild(edit);
                } else {
                    // edit differs, apply to original
                    origAttr.setValue(attr.getValue());
                    attributeChanged = true;
                }
            }
        }
        edit = (Element) edit.getNextSibling();
    }
    return attributeChanged;
}

From source file:org.apereo.portal.layout.dlm.EditManager.java

/**
 * Searches for a command of the passed-in type and if found removes it from
 * the user's PLF.//from w  w  w.  j  a  v  a 2 s .co m
 */
private static void removeDirective(String elementId, String attributeName, String type, IPerson person) {
    Document plf = (Document) person.getAttribute(Constants.PLF);
    Element node = plf.getElementById(elementId);
    if (node == null)
        return;

    Element editSet = null;

    try {
        editSet = getEditSet(node, plf, person, false);
    } catch (Exception e) {
        /*
         * we should never get here since we are calling getEditSet passing
         * create=false meaning that the only portion of that method that
         * tosses an exception will not be reached with this call. But if a
         * runtime exception somehow occurs we will log it so that we don't
         * lose the information.
         */
        LOG.error(e, e);
        return;
    }

    // if no edit set then the edit can't be there either
    if (editSet == null)
        return;

    Node child = editSet.getFirstChild();

    while (child != null) {
        if (child.getNodeName().equals(type)) {
            Attr attr = ((Element) child).getAttributeNode(Constants.ATT_NAME);
            if (attr != null && attr.getValue().equals(attributeName)) {
                // we found it, remove it
                editSet.removeChild(child);
                break;
            }
        }
        child = child.getNextSibling();
    }
    // if that was the last on in the edit set then delete it
    if (editSet.getFirstChild() == null)
        node.removeChild(editSet);
}

From source file:org.bibsonomy.scraper.url.kde.ieee.IEEEXploreBookScraper.java

/**
 * @param sc//from w  w w .ja  v a 2 s .c om
 * @return bibtex
 * @throws ScrapingException
 */
public String ieeeBookScrape(ScrapingContext sc) throws ScrapingException {
    try {
        //-- init all NodeLists and Node
        NodeList pres = null;
        Node currNode = null;
        NodeList temp = null;

        //-- init String map for bibtex entries
        String type = IEEE_BOOK;
        String url = sc.getUrl().toString();
        String authors = "";
        String numpages = "";
        String title = "";
        String isbn = "";
        String publisher = "";
        String month = "";
        String year = "";
        String edition = "";
        String abstr = "";

        String bibtexkey = null;
        String _tempabs = null;
        String ident1 = null;
        String ident2 = null;

        //-- get the html doc and parse the DOM
        final Document doc = XmlUtils.getDOM(sc.getPageContent());

        /*
         * -- Search title and extract --
         * The title has always the css-class "headNavBlueXLarge".
         *
         * FIXME: this part could be deprecated. don't knot it at all...
         *
        pres = null;
        pres = doc.getElementsByTagName("span"); //get all <span>-Tags
        for (int i = 0; i < pres.getLength(); i++) {
           Node curr = pres.item(i);
           Element g = (Element)curr;
           Attr own = g.getAttributeNode("class");         
                
           //-- Extract the title
           if ("headNavBlueXLarge".equals(own.getValue())){
              title = curr.getFirstChild().getNodeValue();
           }
        } */

        if (title == null || title.equals("")) {
            ident1 = "<title>";
            ident2 = "</title>";
            if (sc.getPageContent().contains(ident1) && sc.getPageContent().contains(ident2)) {
                int _startIndex = sc.getPageContent().indexOf(ident1) + ident1.length();
                int _endIndex = sc.getPageContent().indexOf(ident2);
                title = sc.getPageContent().substring(_startIndex, _endIndex);
                title = title.replaceAll("IEEEXplore#\\s", "");
            }
        }

        /* 
         * get the abstract block
         * 
         * FIXME: this part could be deprecated. don't knot it at all...
         * 
        ident1 = "<strong>Abstract</strong>";
        ident2 = "<strong>Table of Contents </strong>";
        if (sc.getPageContent().indexOf(ident1) != -1 && sc.getPageContent().indexOf(ident2) != -1 ){
           _tempabs = sc.getPageContent().substring(sc.getPageContent().indexOf(ident1)+ident1.length(),sc.getPageContent().indexOf(ident2)).replaceAll("\\s\\s+", "").replaceAll("(<.+?>)", "").trim();
           abstr = _tempabs;         
        } */

        ident1 = "<span class=\"sectionHeaders\">Abstract</span>";
        ident2 = "<td class=\"bodyCopyGrey\"><p class=\"bodyCopyGreySpaced\"><strong>";
        if (sc.getPageContent().contains(ident1) && sc.getPageContent().contains(ident2)) {
            int _startIndex = sc.getPageContent().indexOf(ident1) + ident1.length();
            int _endIndex = sc.getPageContent().indexOf(ident2);
            _tempabs = sc.getPageContent().substring(_startIndex, _endIndex);
            abstr = _tempabs.replaceAll("\\s\\s+", "").replaceAll("(<.+?>)", "").trim();
        }

        /* 
         * get the book formats like hardcover
         * 
         * FIXME: this part could be deprecated. don't knot it at all...
         * 
         *
        ident1 = "<td class=\"bodyCopyBlackLarge\" nowrap>Hardcover</td>";
        ident2 = "<td class=\"bodyCopyBlackLarge\" nowrap><span class=\"sectionHeaders\">&raquo;</span>";
        if (sc.getPageContent().indexOf(ident1) != -1){
           _format = sc.getPageContent().substring(sc.getPageContent().indexOf(ident1),sc.getPageContent().indexOf(ident2)).replaceAll("\\s\\s+", "").replaceAll("(<.+?>)", "");
                
           _format = _format.substring(_format.indexOf(CONST_ISBN) + CONST_ISBN.length());
           isbn = _format.substring(0,_format.indexOf("&nbsp;"));
        }*/

        /*-- get all <p>-Tags to extract the standard informations
         *  In every standard page the css-class "bodyCopyBlackLargeSpaced"
         *  indicates the collection of all informations.
         * */
        pres = null;
        pres = doc.getElementsByTagName("p"); //get all <p>-Tags
        for (int i = 0; i < pres.getLength(); i++) {
            currNode = pres.item(i);

            if (currNode.hasAttributes()) {
                Element g = (Element) currNode;
                Attr own = g.getAttributeNode("class");
                if ("bodyCopyBlackLargeSpaced".equals(own.getValue()) && currNode.hasChildNodes()) {
                    temp = currNode.getChildNodes();

                    for (int j = 0; j < temp.getLength(); j++) {
                        if (temp.item(j).getNodeValue().indexOf(CONST_DATE) != -1) {
                            String date = temp.item(j).getNodeValue().substring(18);
                            year = date.substring(date.length() - 5).trim();
                            month = date.substring(0, date.length() - 4).trim();
                            // not correct in all cases
                            // publisher = temp.item(j+2).getNodeValue().trim();
                        }
                        if (temp.item(j).getNodeValue().indexOf(CONST_PAGES) != -1) {
                            numpages = temp.item(j).getNodeValue().substring(CONST_PAGES.length()).trim();
                        } else if (temp.item(j).getNodeValue().indexOf(CONST_ON_PAGES) != -1) {
                            numpages = temp.item(j).getNodeValue().substring(CONST_ON_PAGES.length()).trim();
                        }
                        if (temp.item(j).getNodeValue().indexOf(CONST_EDITION) != -1) {
                            edition = temp.item(j).getNodeValue().substring(CONST_EDITION.length()).trim();
                        } else if (temp.item(j).getNodeValue().indexOf(CONST_VOLUME) != -1) {
                            edition = temp.item(j).getNodeValue().substring(CONST_VOLUME.length()).trim();
                        }
                        if (isbn == "" && temp.item(j).getNodeValue().indexOf(CONST_ISBN) != -1) {
                            isbn = temp.item(j).getNodeValue().substring(CONST_ISBN.length()).trim();
                        }
                    }
                    break;
                }
            }
        }

        /*-- Search authors and save them --
         * 
         * FIXME: this part could be deprecated. don't knot it at all...
         * 
        pres = null;
        pres = doc.getElementsByTagName("a"); //get all <a>-Tags
                
        //init vars to count authors to form a bibtex String
        int numaut = 0;
                
         *
         * iterate through the a tags and search the attribute value "<in>aud)" 
         * to identify the authors in the source of the ieeexplore page
         * 
        for (int i = 39; i < pres.getLength(); i++) {
           Node curr = pres.item(i);
           Element g = (Element)curr;
           Attr own = g.getAttributeNode("href");
                
           if (own.getValue().indexOf("<in>au)") != -1){
              //Form Bibtex String by counting authors
              if (numaut > 0 ){
          authors += " and " + curr.getFirstChild().getNodeValue(); 
              }
              if (numaut == 0) {
          numaut=i;
          authors += curr.getFirstChild().getNodeValue();
                
          if (curr.getFirstChild().getNodeValue().indexOf(",") != -1 && bibtexkey == null){
             bibtexkey = curr.getFirstChild().getNodeValue().substring(0,curr.getFirstChild().getNodeValue().trim().indexOf(","));
          } else if (curr.getFirstChild().getNodeValue().trim().indexOf(" ") != -1 && bibtexkey == null){
             bibtexkey = curr.getFirstChild().getNodeValue().trim().substring(0,curr.getFirstChild().getNodeValue().trim().indexOf(" "));
          } else if (bibtexkey == null){
             bibtexkey = curr.getFirstChild().getNodeValue().trim();
          }
              }
           }
        } */

        /*
         * get authors
         */
        if (authors == null || authors.equals("")) {
            ident1 = "<font color=990000><b>";
            ident2 = "<br>";
            int _startIndex = sc.getPageContent().indexOf(ident1) + ident1.length();
            if (sc.getPageContent().contains(ident1)
                    && sc.getPageContent().indexOf(ident2, _startIndex) != -1) {
                int _endIndex = sc.getPageContent().indexOf(ident2, _startIndex);
                authors = sc.getPageContent().substring(_startIndex, _endIndex);
                authors = authors.replaceAll("\\s\\s+", "").replaceAll("(<.+?>)", "").trim();
                authors = authors.replaceAll("&nbsp;&nbsp;", " and ");

                if (authors.endsWith(" and ")) {
                    authors = authors.substring(0, authors.length() - 5);
                }
            }
        }

        //-- kill special chars and add the year to bibtexkey
        if ((isbn == null || !isbn.equals("")) && (year == null || !year.equals(""))) {
            bibtexkey = isbn.replaceAll("-", "");
            bibtexkey = bibtexkey.replaceAll("[^0-9A-Za-z]", "") + ":" + year;
        }

        //create the book-bibtex
        return type + " { " + bibtexkey + ", \n" + "author = {" + authors + "}, \n" + "title = {" + title
                + "}, \n" + "year = {" + year + "}, \n" + "url = {" + url + "}, \n" + "pages = {" + numpages
                + "}, \n" + "edition = {" + edition + "}, \n" + "publisher = {" + publisher + "}, \n"
                + "isbn = {" + isbn + "}, \n" + "abstract = {" + abstr + "}, \n" + "month = {" + month + "}\n}";

    } catch (Exception e) {
        throw new InternalFailureException(e);
    }
}

From source file:org.bibsonomy.scraper.url.kde.ieee.IEEEXploreJournalProceedingsScraper.java

public String ieeeJournalProceedingsScrape(ScrapingContext sc) throws ScrapingException {

    try {/*from   w w w . ja  v  a2s .  c  o m*/
        //-- init all NodeLists and Node
        NodeList pres = null;
        Node currNode = null;
        NodeList temp = null;

        //-- init Strings for bibtex entries
        // month uncased because of multiple date types
        String type = "";
        String url = sc.getUrl().toString();
        String author = "";
        String year = "";
        String abstr = "";
        String title = "";
        String booktitle = "";
        String volume = null;
        String pages = null;
        String issn = null;
        String isbn = null;
        String doi = null;

        String authors[] = null;
        String tempAuthors = null;

        //-- get the html doc and parse the DOM
        final Document document = XmlUtils.getDOM(sc.getPageContent());

        //get the abstract block
        String ident1 = "<span class=\"sectionHeaders\">Abstract</span><br>";
        String ident2 = "<td class=\"bodyCopyGrey\"><p class=\"bodyCopyGreySpaced\"><strong>Index";
        if (sc.getPageContent().indexOf(ident1) != -1 && sc.getPageContent().indexOf(ident2) != -1) {
            abstr = sc.getPageContent()
                    .substring(sc.getPageContent().indexOf(ident1) + ident1.length(),
                            sc.getPageContent().indexOf(ident2))
                    .replaceAll("\\s\\s+", "").replaceAll("(<.+?>)", "").trim();
        }

        /*-- Get the title of the journal --
         * Iterate through all spans
         */
        pres = null;
        pres = document.getElementsByTagName("span"); //get all <span>-Tags
        for (int i = 0; i < pres.getLength(); i++) {
            Node curr = pres.item(i);
            Element g = (Element) curr;
            Attr own = g.getAttributeNode("class");

            if ("headNavBlueXLarge2".equals(own.getValue())) {
                title = curr.getFirstChild().getNodeValue();
                temp = pres.item(i + 1).getChildNodes();

                if (!"".equals(temp.item(0).getNodeValue())) {
                    tempAuthors = temp.item(0).getNodeValue();

                    if ("\u00A0\u00A0".equals(tempAuthors)) {
                        authors = new String[] { "N/A" };
                    } else {
                        authors = tempAuthors.split("\u00A0\u00A0");
                    }
                }
                break;
            }
        }

        /*-- Get the global infomation like publication date, number of pages ... --
         * iterate through all p's stop at "This paper appears in:" because its
         * available in all journals.
         * Save Nodelist and break the loops.
         * */
        pres = null;
        NodeList match = null;
        pres = document.getElementsByTagName("p"); //get all <p>-Tags
        for (int i = 0; i < pres.getLength(); i++) {
            currNode = pres.item(i);
            temp = currNode.getChildNodes();
            //iterate through childs to find "Publication Date:"
            for (int j = 0; j < temp.getLength(); j++) {
                if (temp.item(j).getNodeValue().indexOf(CONST_BOOKTITLE) != -1) {
                    if (!"".equals(temp.item(1).getFirstChild().getFirstChild().getNodeValue())) {
                        booktitle = temp.item(1).getFirstChild().getFirstChild().getNodeValue();
                    }
                    match = temp;
                    break;
                }
            }
        }
        //get the different childs of the founded p-tag
        for (int i = 0; i < match.getLength(); i++) {
            if (!"".equals(match.item(i).getNodeValue())) {
                String infoMatches = null;
                if (match.item(i).getNodeValue().indexOf(CONST_DATE) != -1) {
                    //extract the year
                    infoMatches = match.item(i).getNodeValue().substring(CONST_DATE.length());
                    StringTokenizer tokenizer = new StringTokenizer(infoMatches);
                    String yearPattern = "\\d{4}";
                    Pattern yearP = Pattern.compile(yearPattern);

                    while (tokenizer.hasMoreTokens()) {
                        String token = tokenizer.nextToken();
                        Matcher matcherYear = yearP.matcher(token);
                        if (matcherYear.matches()) {
                            year = token;
                        }
                    }
                }
                if (volume == null && match.item(i).getNodeValue().indexOf(CONST_VOLUME) != -1) {
                    infoMatches = match.item(i).getNodeValue();
                    volume = infoMatches.substring(infoMatches.indexOf(CONST_VOLUME) + CONST_VOLUME.length(),
                            infoMatches.indexOf(",")).trim();
                }
                if (pages == null && match.item(i).getNodeValue().indexOf(CONST_PAGES) != -1) {
                    infoMatches = match.item(i).getNodeValue();
                    pages = infoMatches.substring(infoMatches.indexOf(CONST_PAGES) + CONST_PAGES.length())
                            .trim();
                }
                if (issn == null)
                    issn = getField(match, i, "ISSN: ");
                if (isbn == null)
                    isbn = getField(match, i, "ISBN: ");
                if (doi == null)
                    doi = getField(match, i, "Digital Object Identifier: ");
            }
        }

        //-- set bibtex type @article for journals & @proceeding for proceedings
        if ((isbn == null || isbn.trim().equals("")) && issn != null && !issn.trim().equals("")) {
            type = IEEE_JOURNAL;
        } else {
            if (title.equals(booktitle)) {
                type = IEEE_PROCEEDINGS;
            } else {
                type = IEEE_INPROCEEDINGS;
            }
        }

        //-- get all authors out of the arraylist and prepare them to bibtex entry "author"
        for (int i = 0; i < authors.length; i++) {
            if (i == authors.length - 1) {
                author += authors[i].trim();
            } else {
                author += authors[i].trim() + " and ";
            }
        }

        //-- kill spaces and add the year to bibtexkey
        //- replace all special chars to avaoid crashes through bibtexkey
        StringBuffer b = new StringBuffer(type + "{" + getName(authors[0]) + ":" + year + ",");
        appendBibtexField(b, "author", author);
        appendBibtexField(b, "abstract", abstr);

        appendBibtexField(b, "title", title);
        appendBibtexField(b, "booktitle", booktitle);
        appendBibtexField(b, "url", url);
        appendBibtexField(b, "year", year);
        appendBibtexField(b, "isbn", isbn);
        appendBibtexField(b, "issn", issn);
        appendBibtexField(b, "doi", doi);
        appendBibtexField(b, "volume", volume);
        appendBibtexField(b, "pages", pages);
        b.append("}");

        return b.toString();

    } catch (Exception e) {
        throw new InternalFailureException(e);
    }
}

From source file:org.bibsonomy.scraper.url.kde.ieee.IEEEXploreStandardsScraper.java

public String ieeeStandardsScrape(ScrapingContext sc) throws ScrapingException {
    try {//from  www  . ja v  a  2  s . c  o m
        //-- init all NodeLists and Node
        NodeList pres = null;
        Node currNode = null;
        NodeList temp = null;

        //-- init String map for bibtex entries
        String type = IEEE_STANDARDS;
        String url = sc.getUrl().toString();
        String numpages = "";
        String title = "";
        String isbn = "";
        String abstr = "";
        String year = "";

        //-- get the html doc and parse the DOM
        final Document document = XmlUtils.getDOM(sc.getPageContent());

        /* -- get the spans to extract the title and abstract
         */
        pres = null;
        pres = document.getElementsByTagName("span"); //get all <span>-Tags
        for (int i = 0; i < pres.getLength(); i++) {
            currNode = pres.item(i);
            if (currNode.hasAttributes()) {
                Element g = (Element) currNode;
                Attr own = g.getAttributeNode("class");
                //-- Extract the title
                if ("headNavBlueXLarge2".equals(own.getValue())) {
                    temp = currNode.getChildNodes();
                    title = temp.item(temp.getLength() - 1).getNodeValue().trim();
                }
                //-- Extract the abstract
                if ("sectionHeaders".equals(own.getValue())
                        && "Abstract".equals(currNode.getFirstChild().getNodeValue())) {
                    abstr = currNode.getParentNode().getLastChild().getNodeValue().trim();
                }
            }
        }

        /*-- get all <p>-Tags to extract the standard informations
         *  In every standard page the css-class "bodyCopyBlackLargeSpaced"
         *  indicates the collection of all informations.
         * */
        pres = null;
        pres = document.getElementsByTagName("p"); //get all <p>-Tags
        for (int i = 0; i < pres.getLength(); i++) {
            currNode = pres.item(i);
            if (currNode.hasAttributes()) {
                Element g = (Element) currNode;
                Attr own = g.getAttributeNode("class");
                if ("bodyCopyBlackLargeSpaced".equals(own.getValue())) {
                    temp = currNode.getChildNodes();

                    for (int j = 0; j < temp.getLength(); j++) {
                        if (temp.item(j).getNodeValue().indexOf(CONST_DATE) != -1) {
                            String date = temp.item(j).getNodeValue().substring(CONST_DATE.length()).trim();
                            year = date.substring(date.length() - 4).trim();
                        }
                        if (temp.item(j).getNodeValue().indexOf(CONST_PAGE) != -1) {
                            numpages = temp.item(j).getNodeValue().substring(CONST_PAGE.length()).trim();
                        }
                        if (temp.item(j).getNodeValue().indexOf(CONST_EISBN) != -1) {
                            isbn = temp.item(j).getNodeValue().substring(CONST_EISBN.length()).trim();
                        }
                    }
                }
            }
        }

        //create valid bibtex snippet
        return type + " {," + "title = {" + title + "}, " + "year = {" + year + "}, " + "url = {" + url + "}, "
                + "pages = {" + numpages + "}, " + "abstract = {" + abstr + "}, " + "isbn = {" + isbn + "}}";

    } catch (Exception e) {
        throw new InternalFailureException(e);
    }
}

From source file:org.deegree.portal.context.WebMapContextFactory.java

/**
 * creates an instance of a class encapsulating the access the configuration of Module
 * /*from   ww w.  j a v a 2  s  . c  o  m*/
 * @param element
 * @param xml
 * 
 * @return instance of <tt>ModuleConfiguration</tt>
 * 
 * @throws XMLParsingException
 * @throws MalformedURLException
 */
private static ModuleConfiguration createModuleConfiguration(Element element, XMLFragment xml)
        throws XMLParsingException, MalformedURLException {

    ModuleConfiguration mc = null;
    if (element != null) {
        Element elem = XMLTools.getRequiredElement(element, "cntxt:OnlineResource",
                CommonNamespaces.getNamespaceContext());
        Attr attr = elem.getAttributeNodeNS(CommonNamespaces.XLNNS.toASCIIString(), "href");
        String url = attr.getValue();
        URL u = xml.resolve(url);
        url = u.toExternalForm();
        if (url.endsWith("?")) {
            url = url.substring(0, url.length() - 1);
        }
        attr.setNodeValue(url);
        URL onlineResource = createOnlineResource(elem);
        mc = new ModuleConfiguration(onlineResource);
    }

    return mc;
}

From source file:org.dhatim.xml.DomUtils.java

/**
 * Rename element.//  ww  w . j  a  v a  2  s.  co m
 * @param element The element to be renamed.
 * @param replacementElement The tag name of the replacement element.  Can be a prefix qualified
  * name if the namespace is not the null namepsace ({@link javax.xml.XMLConstants#NULL_NS_URI}).
  * @param namespace The element namespace.
 * @param keepChildContent <code>true</code> if the target element's child content
 * is to be copied to the replacement element, false if not. Default <code>true</code>.
 * @param keepAttributes <code>true</code> if the target element's attributes
 * are to be copied to the replacement element, false if not. Default <code>true</code>.
 * @return The renamed element.
 */
public static Element renameElementNS(Element element, String replacementElement, String namespace,
        boolean keepChildContent, boolean keepAttributes) {
    AssertArgument.isNotNull(element, "element");
    AssertArgument.isNotNull(replacementElement, "replacementElement");

    Element replacement;
    if (namespace != null && !XMLConstants.NULL_NS_URI.equals(namespace)) {
        replacement = element.getOwnerDocument().createElementNS(namespace, replacementElement);
    } else {
        replacement = element.getOwnerDocument().createElement(replacementElement);
    }

    if (keepChildContent) {
        DomUtils.copyChildNodes(element, replacement);
    }
    if (keepAttributes) {
        NamedNodeMap attributes = element.getAttributes();
        int attributeCount = attributes.getLength();

        for (int i = 0; i < attributeCount; i++) {
            Attr attribute = (Attr) attributes.item(i);
            replacement.setAttribute(attribute.getName(), attribute.getValue());
        }
    }
    DomUtils.replaceNode(replacement, element);

    return replacement;
}

From source file:org.dita.dost.AbstractIntegrationTest.java

private Document rewriteIds(final Document doc, final Map<String, Pattern> patterns) {
    final Map<String, String> idMap = new HashMap<>();
    AtomicInteger counter = new AtomicInteger();
    final NodeList ns = doc.getElementsByTagName("*");
    for (int i = 0; i < ns.getLength(); i++) {
        final Element e = (Element) ns.item(i);
        for (Map.Entry<String, Pattern> p : patterns.entrySet()) {
            final Attr id = e.getAttributeNode(p.getKey());
            if (id != null) {
                if (p.getKey().equals("headers")) {// split value
                    final List<String> res = new ArrayList<>();
                    for (final String v : id.getValue().trim().split("\\s+")) {
                        rewriteId(v, idMap, counter, p.getValue());
                        res.add(idMap.getOrDefault(v, v));
                    }//www.  ja va 2 s.  co  m
                    id.setNodeValue(res.stream().collect(Collectors.joining(" ")));

                } else {
                    final String v = id.getValue();
                    rewriteId(v, idMap, counter, p.getValue());
                    if (idMap.containsKey(v)) {
                        id.setNodeValue(idMap.get(v));
                    }
                }
            }
        }
    }
    return doc;
}