Example usage for org.w3c.dom Document getChildNodes

List of usage examples for org.w3c.dom Document getChildNodes

Introduction

In this page you can find the example usage for org.w3c.dom Document getChildNodes.

Prototype

public NodeList getChildNodes();

Source Link

Document

A NodeList that contains all children of this node.

Usage

From source file:it.polito.tellmefirst.web.rest.clients.ClientEpub.java

private HashMap<String, String> parseEpub(File file) throws IOException, TMFVisibleException {

    LOG.debug("[parseEpub] - BEGIN");

    ZipFile fi = new ZipFile(file);

    for (Enumeration e = fi.entries(); e.hasMoreElements();) {
        ZipEntry entry = (ZipEntry) e.nextElement();
        if (entry.getName().endsWith("ncx")) {
            InputStream tocMaybeDirty = fi.getInputStream(entry);
            Scanner scanner = new Scanner(tocMaybeDirty, "UTF-8").useDelimiter("\\A");
            String theString = scanner.hasNext() ? scanner.next() : "";
            tocMaybeDirty.close();//w ww. j  a v a  2  s.  co m
            scanner.close();

            String res = theString.replaceAll(">[\\s]*?<", "><");

            InputStream toc = new ByteArrayInputStream(res.getBytes(StandardCharsets.UTF_8));

            try {
                DocumentBuilder dBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
                Document doc = dBuilder.parse(toc);
                toc.close();

                if (doc.hasChildNodes()) {
                    findNavMap(doc.getChildNodes());
                }
            } catch (Exception ex) {
                LOG.error("Unable to navigate the TOC");
            }

            removeEmptyTOC(epub);

            //search anchors in links and split
            Set set = epub.entrySet();
            Iterator i = set.iterator();
            while (i.hasNext()) {
                Map.Entry me = (Map.Entry) i.next();
                if (me.getValue().toString().contains("#")) {
                    String[] parts = me.getValue().toString().split("#");
                    String anchor = parts[1];
                    epub.put(me.getKey().toString(), anchor);
                }
            }
        }
        if (entry.getName().endsWith("opf")) { //manage files because order is important
            InputStream content = fi.getInputStream(entry);

            Scanner scanner = new Scanner(content, "UTF-8").useDelimiter("\\A");
            String contentString = scanner.hasNext() ? scanner.next() : "";
            content.close();
            scanner.close();

            String filenameRegex = "href=\"(.*.htm(|l))\".*media-type=\"application/xhtml";
            Pattern pattern = Pattern.compile(filenameRegex);
            Matcher matcher = pattern.matcher(contentString);

            Integer count = 0;
            while (matcher.find()) {
                files.put(count, matcher.group(1));
                count++;
            }
        }
        if (entry.getName().endsWith("html") || entry.getName().endsWith("htm")
                || entry.getName().endsWith("xhtml")) {
            InputStream htmlFile = fi.getInputStream(entry);

            Scanner scanner = new Scanner(htmlFile, "UTF-8").useDelimiter("\\A");
            String htmlString = scanner.hasNext() ? scanner.next() : "";

            String regex1 = htmlString.replaceAll("^[^_]*?<body>", ""); //remove head
            String regex2 = regex1.replaceAll("</body>.*$", ""); //remove tail
            String htmlCleaned = regex2.replaceAll("<a.*?/>", ""); //anchor with one tag

            String[] bits = entry.getName().split("/");
            String fileName = bits[bits.length - 1];

            htmls.put(fileName, htmlCleaned);
        }
    }
    fi.close();
    Integer i;
    for (i = 0; i < files.size(); i++) {
        stringBuilder.append("<p id=\"" + files.get(i) + "\"></p>"); // "anchor" also the heads of each files
        stringBuilder.append(htmls.get(files.get(i)));
    }
    String htmlAll = stringBuilder.toString();

    /* We have all needed files, start to split
       For each link -> made a chunk
       Start from the bottom */
    Metadata metadata = new Metadata();
    Parser parser = new HtmlParser();
    ListIterator<Map.Entry<String, String>> iter = new ArrayList<>(epub.entrySet()).listIterator(epub.size());

    while (iter.hasPrevious()) {
        Map.Entry<String, String> me = iter.previous();
        try {
            ContentHandler contenthandler = new BodyContentHandler(10 * htmlAll.length());
            Scanner sc = new Scanner(htmlAll);
            sc.useDelimiter("id=\"" + me.getValue().toString() + "\">");
            htmlAll = sc.next();
            InputStream stream = new ByteArrayInputStream(sc.next().getBytes(StandardCharsets.UTF_8));
            parser.parse(stream, contenthandler, metadata, new ParseContext());
            String chapterText = contenthandler.toString().toLowerCase().replaceAll("\\d+.*", "");
            String chapterTextWithoutNo = chapterText.replaceAll("\\d+.*", "");
            // Remove the Project Gutenberg meta information from the text
            String chapterTextCleaned = chapterTextWithoutNo.split("end of the project gutenberg ebook")[0];
            epub.put(me.getKey().toString(), chapterTextCleaned);

        } catch (Exception ex) {
            LOG.error("Unable to parse content for index: " + me.getKey() + ", this chapter will be deleted");
            removeChapter(epub, me.getKey().toString());
        }
    }

    /* I remove the Project Gutenberg license chapter from the Map, because it is useless
      for the classification and it generates a Lucene Exception in case of the Italian language
      (the license text is always in English).
            
      You can use this method in order to remove each chapter that is useless for classifying
      your Epub document. */
    removeChapter(epub, "A Word from Project Gutenberg");
    removeEmptyItems(epub);

    //If the Epub file has a bad structure, I try to use the basic Epub extractor of Tika.
    if (epub.size() == 0) {
        LOG.info("The Epub file has a bad structure. Try to use the Tika extractor");
        epub.put("All text", autoParseAll(file));
    }

    removeEmptyItems(epub);

    if (epub.size() == 0) {
        LOG.error("Unable to extract text from this Epub");
        throw new TMFVisibleException("Unable to extract any text from this Epub.");
    }

    removeDownloadedFile(TEMPORARY_PATH);

    LOG.debug("[parseEpub] - END");

    return epub;
}

From source file:com.blackbear.flatworm.config.impl.DefaultConfigurationReaderImpl.java

/**
 * {@code loadConfigurationFile} takes an {@link InputStream} and returns a {@code FileFormat} object, which can be used to parse an
 * input file into beans.//from   w  w  w  .jav  a2  s.com
 *
 * @param in The {@link InputStream} instance to use in parsing the configuration file.
 * @return a constructed {@link FileFormat} if the parsing was successful.
 * @throws FlatwormConfigurationException If the configuration data contains invalid syntax.
 * @throws IOException                    If issues occur while reading from I/O.
 */
@Override
public FileFormat loadConfigurationFile(InputStream in) throws FlatwormConfigurationException, IOException {
    DocumentBuilder parser;
    Document document;
    NodeList children;
    FileFormat fileFormat = null;

    try {
        DocumentBuilderFactory fact = DocumentBuilderFactory.newInstance();
        parser = fact.newDocumentBuilder();
        document = parser.parse((new InputSource(in)));
        children = document.getChildNodes();
        for (int i = 0; i < children.getLength(); i++) {
            Node child = children.item(i);
            if (("file-format".equals(child.getNodeName())) && (child.getNodeType() == Node.ELEMENT_NODE)) {
                fileFormat = (FileFormat) traverse(child);
                break;
            }
        }

        if (fileFormat != null) {
            // Make sure we haven't double dipped the default handling of data.
            if (fileFormat.hasDefaultRecord() && fileFormat.isIgnoreUnmappedRecords()) {
                throw new FlatwormParserException(
                        "You cannot have default Records (those lacking identifier configuration) and "
                                + "the ignore-unmapped-records flag set to true - you must have one or the other.");
            }
        }
    } catch (Exception e) {
        throw new FlatwormConfigurationException(e.getMessage(), e);
    }
    return fileFormat;
}

From source file:org.opencastproject.analytics.impl.AnalyticsServiceImpl.java

/**
 * Gets the number of times an episode was watched and for how long in
 * intervals over a time range.//from  www .j  av a 2s  .  c  om
 * 
 * @param id
 *            The unique id of the episode to get the statistics for.
 * @param start
 *            The start of the period to investigate in the form
 *            YYYYMMDDHHMM e.g. 201212312359.
 * @param end
 *            The end of the period to investigate in the form YYYYMMDDHHMM
 *            e.g. 201212312359.
 * @param intervalString
 *            The number of seconds to break up the views and durations into
 *            from start time to end time.
 * @return An xml representation of all of these intervals between start and
 *         end.
 * @throws TrustedHttpClientException
 *             Thrown if rest calls cannot be made. Thrown if it cannot
 *             query a rest endpoint.
 */
public ViewCollection getViews(String id, String start, String end, String intervalString)
        throws TrustedHttpClientException {
    if (canAnalyzeEpisode(id)) {

        long limit = DEFAULT_LIMIT;
        long interval = Long.parseLong(intervalString);

        SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmm");
        Date startDate = new Date();
        Date endDate = new Date();
        try {
            startDate = dateFormat.parse(start);
            endDate = dateFormat.parse(end);
        } catch (ParseException e) {
            e.printStackTrace();
        }

        ViewCollection viewCollection = new ViewCollection();
        viewCollection.setLimit(limit);
        viewCollection.setFrom(startDate);
        viewCollection.setTo(endDate);
        viewCollection.setInterval(interval);

        long intervalCount = 0;
        Date intervalStart;
        Date intervalEnd;
        HttpGet getInterval;
        HttpResponse response;
        Boolean foundViews = false;

        do {
            foundViews = false;
            // Get the start and end of the interval
            intervalStart = new Date(startDate.getTime() + interval * secondsToMilliseconds * intervalCount);
            intervalEnd = new Date(
                    startDate.getTime() + interval * secondsToMilliseconds * (intervalCount + 1));
            String uri = UrlSupport.concat(engageURL, "/usertracking/report.xml");
            uri += "?from=" + dateFormat.format(intervalStart);
            uri += "&to=" + dateFormat.format(intervalEnd);
            uri += "&limit=" + limit;
            getInterval = new HttpGet(uri);
            response = client.execute(getInterval);

            DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
            DocumentBuilder dBuilder;
            try {
                dBuilder = dbFactory.newDocumentBuilder();
                Document document = dBuilder.parse(response.getEntity().getContent());
                document.getDocumentElement().normalize();
                NodeList reports = document.getChildNodes();
                ViewItem viewItem = new ViewItem();
                for (int i = 0; i < reports.getLength(); i++) {
                    Node report = reports.item(i);
                    NodeList reportItems = report.getChildNodes();
                    for (int j = 0; j < reportItems.getLength(); j++) {
                        Node reportItem = reportItems.item(j);
                        if (reportItem.getNodeType() == Node.ELEMENT_NODE) {
                            Element eElement = (Element) reportItem;
                            String tagID = getTagValue(EPISODE_ID_TAG_NAME, eElement);
                            if (id.equals(tagID)) {
                                viewItem.setId(getTagValue(EPISODE_ID_TAG_NAME, eElement));
                                viewItem.setViews(getTagValue(VIEWS_XML_TAG, eElement));
                                viewItem.setPlayed(getTagValue(PLAYED_XML_TAG, eElement));
                                viewItem.setStart(dateFormat.format(intervalStart));
                                viewItem.setEnd(dateFormat.format(intervalEnd));
                                viewCollection.add(viewItem);
                                viewCollection.setViews(viewCollection.getViews()
                                        + Integer.parseInt(getTagValue(VIEWS_XML_TAG, eElement)));
                                viewCollection.setPlayed(viewCollection.getPlayed()
                                        + Integer.parseInt(getTagValue(PLAYED_XML_TAG, eElement)));
                                viewCollection.setTotal(viewCollection.getTotal() + 1);
                                foundViews = true;
                            }

                        }
                    }
                }
                // Handle the case where there is no data for this episode during this interval. 
                if (!foundViews) {
                    viewItem.setId(id);
                    viewItem.setViews("0");
                    viewItem.setPlayed("0");
                    viewItem.setStart(dateFormat.format(intervalStart));
                    viewItem.setEnd(dateFormat.format(intervalEnd));
                    viewCollection.add(viewItem);
                }
            } catch (IllegalStateException e) {
                e.printStackTrace();
            } catch (SAXException e) {
                e.printStackTrace();
            } catch (IOException e) {
                e.printStackTrace();
            } catch (ParserConfigurationException e1) {
                e1.printStackTrace();
            }
            intervalCount++;
        } while (intervalStart.before(endDate) && intervalEnd.before(endDate));
        return viewCollection;
    } else {
        return new ViewCollection();
    }
}

From source file:com.wfreitas.camelsoap.SoapClient.java

private String buildSOAPMessage(String soapMessageTemplate, Map params, String smooksResource, String soapNs)
        throws IOException, SAXException {
    Document messageDoc = getDocBuilder().parse(new InputSource(new StringReader(soapMessageTemplate)));

    Element docRoot = messageDoc.getDocumentElement();

    boolean dumpSOAP = params.containsKey("dumpSOAP");
    if (dumpSOAP) {
        dumpSOAP("SOAP Template (Unexpanded):", docRoot);
    }//  w ww.ja v a2s  . c o m

    expandMessage(docRoot, params);

    if (dumpSOAP) {
        dumpSOAP("SOAP Template (Expanded):", docRoot);
    }

    injectParameters(docRoot, params, soapNs);

    if (dumpSOAP) {
        dumpSOAP("SOAP Message (Populated Template):", docRoot);
    }

    return XmlUtil.serialize(messageDoc.getChildNodes());
}

From source file:com.hp.application.automation.tools.results.RunResultRecorder.java

private void processLrScenarioStats(JobLrScenarioResult jobLrScenarioResult, Document doc) {

    NodeList rootNodes = doc.getChildNodes();
    Node root = getNode("Runs", rootNodes);
    Element generalNode = (Element) getNode("General", root.getChildNodes());
    NodeList generalNodeChildren = generalNode.getChildNodes();

    extractVUserScenarioReult(jobLrScenarioResult, generalNodeChildren);
    extractTransactionScenarioResult(jobLrScenarioResult, generalNodeChildren);
    extractConnectionsScenarioResult(jobLrScenarioResult, generalNodeChildren);
    extractDuration(jobLrScenarioResult, generalNodeChildren);
}

From source file:com.hp.application.automation.tools.results.RunResultRecorder.java

private void processSLA(JobLrScenarioResult jobLrScenarioResult, Document doc) {
    Node slaRuleNode;/*from  w ww .j a va2 s .  c  om*/
    Element slaRuleElement;

    NodeList rootNodes = doc.getChildNodes();
    Node root = getNode("Runs", rootNodes);
    Element slaRoot = (Element) getNode("SLA", root.getChildNodes());
    NodeList slaRuleResults = slaRoot.getChildNodes();

    for (int j = 0; j < slaRuleResults.getLength(); j++) {
        slaRuleNode = slaRuleResults.item(j);
        if (slaRuleNode.getNodeType() != Node.ELEMENT_NODE) {
            continue;
        }
        slaRuleElement = (Element) slaRuleNode;
        //check type by mesurment field:
        LrTest.SLA_GOAL slaGoal = LrTest.SLA_GOAL.checkGoal(slaRuleElement.getAttribute("Measurement"));

        processSlaRule(jobLrScenarioResult, slaRuleElement, slaGoal);
    }

}

From source file:com.krawler.portal.tools.ServiceBuilder.java

public void writeToSourceCfgXml(String fileName) {
    try {/*from   w w w. j av  a2  s .com*/
        DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
        Document doc = docBuilder.parse(PropsValues.CFG_SOURCE_FILE_PATH);
        Node hibernate_conf = doc.getChildNodes().item(1);
        //            .getChildNodes().item(1);
        Node SessionFac = hibernate_conf.getChildNodes().item(1);
        Element mapping = doc.createElement("mapping");
        mapping.setAttribute("resource", PropsValues.PACKAGE_FILE_PATH + fileName + ".hbm.xml");
        SessionFac.getChildNodes().getLength();
        SessionFac.appendChild(mapping);
        DOMSource ds = new DOMSource(doc);
        StreamResult sr = new StreamResult(PropsValues.CFG_SOURCE_FILE_PATH);
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer trans = tf.newTransformer();
        trans.setOutputProperty(OutputKeys.VERSION, "1.0");
        trans.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        trans.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM,
                "http://hibernate.sourceforge.net/hibernate-configuration-3.0.dtd");
        trans.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, "-//Hibernate/Hibernate Configuration DTD 3.0//EN");
        trans.transform(ds, sr);
        //            writeToClassesCfgXml(fileName);
    } catch (TransformerException ex) {
        logger.warn(ex.getMessage(), ex);
    } catch (SAXException ex) {
        logger.warn(ex.getMessage(), ex);
    } catch (IOException ex) {
        logger.warn(ex.getMessage(), ex);
    } catch (ParserConfigurationException ex) {
        logger.warn(ex.getMessage(), ex);
    }
}

From source file:com.krawler.portal.tools.ServiceBuilder.java

public void writeToClassesCfgXml(String fileName) {
    try {//from   ww  w. j  a  v  a2s. c om
        DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
        //            Document doc = docBuilder.parse(PropsValues.CFG_SOURCE_FILE_PATH);
        Document doc = docBuilder.parse(PropsValues.CFG_CLASSES_FILE_PATH);
        Node hibernate_conf = doc.getChildNodes().item(1);
        //            .getChildNodes().item(1);
        Node SessionFac = hibernate_conf.getChildNodes().item(1);
        Element mapping = doc.createElement("mapping");
        mapping.setAttribute("resource", PropsValues.PACKAGE_FILE_PATH + fileName + ".hbm.xml");
        SessionFac.getChildNodes().getLength();
        SessionFac.appendChild(mapping);
        DOMSource ds = new DOMSource(doc);
        //            StreamResult sr = new StreamResult(PropsValues.CFG_SOURCE_FILE_PATH);
        StreamResult sr = new StreamResult(PropsValues.CFG_CLASSES_FILE_PATH);
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer trans = tf.newTransformer();
        trans.setOutputProperty(OutputKeys.VERSION, "1.0");
        trans.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        trans.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM,
                "http://hibernate.sourceforge.net/hibernate-configuration-3.0.dtd");
        trans.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, "-//Hibernate/Hibernate Configuration DTD 3.0//EN");
        trans.transform(ds, sr);

    } catch (TransformerException ex) {
        logger.warn(ex.getMessage(), ex);
    } catch (SAXException ex) {
        logger.warn(ex.getMessage(), ex);
    } catch (IOException ex) {
        logger.warn(ex.getMessage(), ex);
    } catch (ParserConfigurationException ex) {
        logger.warn(ex.getMessage(), ex);
    }

}

From source file:com.krawler.portal.tools.ServiceBuilder.java

public void deleteSourceEntryCfgXml(String fileName) {
    try {/*w ww .jav a2  s  .  c  o  m*/
        DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
        Document doc = docBuilder.parse(PropsValues.CFG_SOURCE_FILE_PATH);
        Node hibernate_conf = doc.getChildNodes().item(1);
        Node SessionFac = hibernate_conf.getChildNodes().item(1);
        Element sesFac = (Element) SessionFac;
        NodeList mapping_lists = sesFac.getElementsByTagName("mapping");
        Node toDelete = null;
        for (int num = 0; num < mapping_lists.getLength(); num++) {
            Element mapEle = (Element) mapping_lists.item(num);
            if (mapEle.getAttribute("resource").equals(PropsValues.PACKAGE_FILE_PATH + fileName + ".hbm.xml")) {
                toDelete = mapEle;
                break;
            }
        }
        sesFac.removeChild(toDelete);
        DOMSource ds = new DOMSource(doc);
        StreamResult sr = new StreamResult(PropsValues.CFG_SOURCE_FILE_PATH);
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer trans = tf.newTransformer();
        trans.setOutputProperty(OutputKeys.VERSION, "1.0");
        trans.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        trans.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM,
                "http://hibernate.sourceforge.net/hibernate-configuration-3.0.dtd");
        trans.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, "-//Hibernate/Hibernate Configuration DTD 3.0//EN");
        trans.transform(ds, sr);
    } catch (TransformerException ex) {
        logger.warn(ex.getMessage(), ex);
    } catch (SAXException ex) {
        logger.warn(ex.getMessage(), ex);
    } catch (IOException ex) {
        logger.warn(ex.getMessage(), ex);
    } catch (ParserConfigurationException ex) {
        logger.warn(ex.getMessage(), ex);
    }
}

From source file:com.krawler.portal.tools.ServiceBuilder.java

public void deleteClassesEntryCfgXml(String fileName) {
    try {/*from www .  j a v a  2  s .  c  o  m*/
        DocumentBuilderFactory docFactory = DocumentBuilderFactory.newInstance();
        DocumentBuilder docBuilder = docFactory.newDocumentBuilder();
        Document doc = docBuilder.parse(PropsValues.CFG_CLASSES_FILE_PATH);
        Node hibernate_conf = doc.getChildNodes().item(1);
        Node SessionFac = hibernate_conf.getChildNodes().item(1);
        Element sesFac = (Element) SessionFac;
        NodeList mapping_lists = sesFac.getElementsByTagName("mapping");
        Node toDelete = null;
        for (int num = 0; num < mapping_lists.getLength(); num++) {
            Element mapEle = (Element) mapping_lists.item(num);
            if (mapEle.getAttribute("resource").equals(PropsValues.PACKAGE_FILE_PATH + fileName + ".hbm.xml")) {
                toDelete = mapEle;
                break;
            }
        }
        sesFac.removeChild(toDelete);
        DOMSource ds = new DOMSource(doc);
        StreamResult sr = new StreamResult(PropsValues.CFG_CLASSES_FILE_PATH);
        TransformerFactory tf = TransformerFactory.newInstance();
        Transformer trans = tf.newTransformer();
        trans.setOutputProperty(OutputKeys.VERSION, "1.0");
        trans.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
        trans.setOutputProperty(OutputKeys.DOCTYPE_SYSTEM,
                "http://hibernate.sourceforge.net/hibernate-configuration-3.0.dtd");
        trans.setOutputProperty(OutputKeys.DOCTYPE_PUBLIC, "-//Hibernate/Hibernate Configuration DTD 3.0//EN");
        trans.transform(ds, sr);
    } catch (TransformerException ex) {
        logger.warn(ex.getMessage(), ex);
    } catch (SAXException ex) {
        logger.warn(ex.getMessage(), ex);
    } catch (IOException ex) {
        logger.warn(ex.getMessage(), ex);
    } catch (ParserConfigurationException ex) {
        logger.warn(ex.getMessage(), ex);
    }
}