Example usage for org.jsoup.nodes Element outerHtml

List of usage examples for org.jsoup.nodes Element outerHtml

Introduction

In this page you can find the example usage for org.jsoup.nodes Element outerHtml.

Prototype

public String outerHtml() 

Source Link

Document

Get the outer HTML of this node.

Usage

From source file:org.brnvrn.Main.java

/**
 * Parse the HTML containing a category table and the interleaved comments ...
*///from w  w  w . j a  v  a  2s .  c om
private static void parseCategory(List<Tool> tools, Element tool_div, String category, boolean obsolete) {
    Tool tool = new Tool(obsolete);
    for (Node child : tool_div.select("tbody").first().childNodes()) {
        switch (child.nodeName()) {
        case "#comment":
            parseComment(tool, (Comment) child);
            break;
        case "tr":
            Element tr = (Element) child;
            if (tr.select("th").size() > 0) // Skip headings
                break;
            tool.setCategory(category);
            if (!parseTrTool(tool, tr))
                System.out.println("  Could not parse: " + tr.outerHtml());
            tools.add(tool);
            tool = new Tool(obsolete);
            break;
        }
    }
}

From source file:com.screenslicer.core.scrape.ProcessPage.java

public static List<Result> perform(RemoteWebDriver driver, int page, String query, String[] whitelist,
        String[] patterns, UrlTransform[] transforms) throws ActionFailed {
    try {/*  w  w  w  .j  a va2  s.c  o m*/
        Element element = Util.openElement(driver, whitelist, patterns, transforms);
        trim(element);
        if (WebApp.DEBUG) {
            try {
                FileUtils.writeStringToFile(new File("./" + System.currentTimeMillis()), element.outerHtml());
            } catch (IOException e) {
            }
        }
        Map<String, Object> cache = new HashMap<String, Object>();
        List<Result> results = perform(element, page, query, driver.getCurrentUrl(), true, cache);
        if (results == null || results.isEmpty()) {
            results = perform(element, page, query, driver.getCurrentUrl(), false, cache);
        }
        return results;
    } catch (Throwable t) {
        Log.exception(t);
        throw new ActionFailed(t);
    }
}

From source file:net.sf.texprinter.utils.StringUtils.java

/**
 * Escapes HTML entities and tags to a TeX format. This method tries to
 * replace HTML code by the TeX equivalent macros.
 *
 * @param text The input text.//w  ww  .  j  a v  a 2  s . c om
 * @return A new text formatted from HTML to TeX.
 */
public static String escapeHTMLtoTeX(String text) {

    // replace bold tags
    String newText = text.replaceAll("<b>", "\\\\textbf{");
    newText = newText.replaceAll("</b>", "}");

    // replace bold tags
    newText = newText.replaceAll("<strong>", "\\\\textbf{");
    newText = newText.replaceAll("</strong>", "}");

    // replace italic tags
    newText = newText.replaceAll("<i>", "\\\\textit{");
    newText = newText.replaceAll("</i>", "}");

    // replace emphasized tags
    newText = newText.replaceAll("<em>", "\\\\emph{");
    newText = newText.replaceAll("</em>", "}");

    // replace paragraphs tags
    newText = newText.replaceAll("<p>", "");
    newText = newText.replaceAll("</p>", "\n\n");

    // replace ordered lists tags
    newText = newText.replaceAll("<ol>", "\\\\begin{enumerate}\n");
    newText = newText.replaceAll("</ol>", "\\\\end{enumerate}\n");

    // replace unordered lists tags
    newText = newText.replaceAll("<ul>", "\\\\begin{itemize}\n");
    newText = newText.replaceAll("</ul>", "\\\\end{itemize}\n");

    // replace item tags
    newText = newText.replaceAll("<li>", "\\\\item ");
    newText = newText.replaceAll("</li>", "\n");

    // replace blockquote tags
    newText = newText.replaceAll("<blockquote>", "\\\\begin{quotation}\n");
    newText = newText.replaceAll("</blockquote>", "\\\\end{quotation}\n");

    // replace code tags
    newText = newText.replaceAll("<pre><code>", "\\\\begin{TeXPrinterListing}\n");
    newText = newText.replaceAll("<pre class=.*\"><code>", "\\\\begin{TeXPrinterListing}\n");
    newText = newText.replaceAll("</code></pre>", "\\\\end{TeXPrinterListing}\n\n");

    // replace inline code tags
    newText = newText.replaceAll("<code>", "\\\\lstinline|");
    newText = newText.replaceAll("</code>", "|");

    // replace links tags
    newText = newText.replaceAll("alt=\".*\" ", "");

    // parse the text
    Document docLinks = Jsoup.parse(newText);

    // get all the links
    Elements links = docLinks.getElementsByTag("a");

    // if there are links
    if (links.size() > 0) {

        // for every link
        for (Element link : links) {

            // get the outer HTML
            String temp = link.outerHtml();

            // replace it
            newText = newText.replaceFirst(Pattern.quote(temp),
                    "\\\\href{" + link.attr("href") + "}{" + link.text() + "}");

        }
    }

    // create a list of images
    ArrayList<ImageGroup> images = new ArrayList<ImageGroup>();

    // parse the current text
    Document doc = Jsoup.parse(text);

    // fetch all the media found
    Elements media = doc.select("[src]");

    // for all media found
    for (Element m : media) {

        // if it's an image tag
        if (m.tagName().equals("img")) {

            // create a new image group with the image link
            ImageGroup image = new ImageGroup(m.attr("abs:src"));

            // add to the list of images
            images.add(image);

            // set the current image to null
            image = null;
        }
    }

    // create a new loop saver
    LoopSaver lps = null;

    // for every image in the list of images
    for (ImageGroup img : images) {

        // create a new object
        lps = new LoopSaver();

        // while there are references for that image in the text
        while (newText.indexOf(img.getURL()) != -1) {

            // tick loop
            lps.tick();

            // replace the occurrence of that image
            newText = newText.replaceFirst("<img src=\"" + img.getURL() + "\" />",
                    "\\\\begin{figure}[h!]\n\\\\centering\n\\\\includegraphics[scale=0.5]{" + img.getName()
                            + "}\n\\\\end{figure}");
        }

        // lets try
        try {

            // finally, download the image to the current directory
            Downloader.download(img.getURL(), img.getName());

        } catch (Exception exception) {

            // log message
            log.log(Level.WARNING,
                    "An error occurred while getting the current image. Trying to set the replacement image instead. MESSAGE: {0}",
                    StringUtils.printStackTrace(exception));

            // image could not be downloaded for any reason
            try {

                // open a file stream
                FileOutputStream f = new FileOutputStream(img.getName());

                // write a replacement image
                f.write(Base64.decode(
                        "iVBORw0KGgoAAAANSUhEUgAAALAAAABKCAIAAACU3El2AAAAAXNSR0IArs4c6QAAAARnQU1BAACxjwv8YQUAAAAJcEhZcwAADsMAAA7DAcdvqGQAAAcjSURBVHhe7VzrmeMgDExdKSj1pJptZovZi3lqhAQ4n7HhrPt1STAaRoOELa0ff/bPGCAMPIwNY4AyYIIwPQADJggThAnCNKAzYBHC1GERwjRgEcI00MmApYxOou4yzARxF093rtME0UnUXYaZIO7i6c51miA6ibrLMBPEXTzduc4vBPHzejwez/cvt+C+f7x+GpY7h/2+nz2zdS5z6LCFoLZ5qAli8wj/9xHCzIK4xDeXGG27No7YB68dIfx8JCJoguiHOG7kvsUfhOMSo/3Y98H7XhBvlyFIWAfDPjFIOWT/MLgC9IlW/E/xH0tqeWiR7tx1z7dfkE95dKo00TbF8/2TrMTsiIyLhsh8eJVHG00EJGAizUeScT+8KieSqL4VRGCOHggILySI/L5feNzYP0wThGBF2Q2frwPn0oBImuhgZ8T/ElzjZqK6JHPKhmhM/Xlt15e7wpkISIiFIBaF54AJ4LkP6vhmYPlWEFHT4JR0DPR4hINnWjSB3RxWFQRaaYdHIeGxpMjmyIqAS1EFwvk3jy5NaiYASSkaIpQULhR4sl+acvj7GyOItJuEnKGkDH57ohGupAwI9cKdDgnkhVQl97DDdNYvRHevRm1FwZAgUkSTU2uPILRrfVSob9QLBeFNs71HvgOfNYZpEQKP0dL2CSPoBGqEwD0nha12hBANCavTzuXdgmjCmytC5Exapb9zGGREksfFy0WqHc+eQymZ8S1MEzDdU/QMISdp2RALahsQxUSPILRr1QiBP7SCxKCUEU5HrbuMzmEkA4VTvveudHn6DjZRjrOvl7tXoI/V2jGdnNqer1e8l5EOoYqhjBTuWUhiIodK6fyLQmdpg8ILKxOimXqqA420BdGS1KDfBS8NstQ9rRbouydYYeC0gpiQ/QkhHS+xKQURA2yzLnI8H7UZTRDn8m3WZmBgyggxAzF3xWCCuKvnlXWbIEwQa9x2mp+uYaAeIbRndiOx7nsAsW/0cbilp+2HzX7Ior5EuIogeElH7kU4zCXNib6kuzmvGzCvIPrwHztKZqOsHfj65iHcfbGAmwqC0B3qNq1mHrKTc8GAbW94Vo8tQ6qLIXkRbzBBkOpG0fXHLJGqQ+oLVi5PgknXhIqGWJigdRahGk1KwNt07Ras2JgDvVUfSHWqOcJe0ddTBhdEKAtF3txyiaty/bFUEusbAEe6KYSWD7KIHkEoc4qooDzse7oqkDwQcg0tfArtSbwpKhBGCq6EOr9yuXwqfR/r/EINTEPYq4bPuJ2CaBfigu0MzW8DV110vEiRHhSB8qDzQSsb3YjNOUVUWPVksaZEIRQQs1tTrMjRK0+4/c9VWTecIdSmWny9pQUfl4uJCqnG/kyla60ikIMFgckh96yw/0EU5N24REEZuJx1YFvzc2euvQuoyp4u/XKPAp3B/c7yI673M7XPDLEVIowGb0PMis2IXAFlCAjs5ZgUkXx5yjlSEHSPZeQ0L0sdXn3hDFIGuYTYxM2Uxsio4s+ZNuVypkmBbmkTk95tL4XPF5up0Nsd0mNbEKy5Ja1FXpQWw/oo9qMOFwTJk879JEJSXJqD5bY7TKV0noKZ4k/HeIiOqIpdqkMqQ0R5hpCSaVj80+nBr+H5+ZAgdggCFIFJqOwBo0EBEO5QxJGCoGGYNCaxWIyHx9wzhE8Wcgj2i+mIEHlYmhT607eD65bI6eHDjcxVdg1qJDT9Do1b+GccoEh0S/gkd2+KKSPnqrAmgT3oAdMQdktieC1DCGOTtTl0c3WLgaMFgWf3VlS+BeVzL3K0IFK05/cSc9NyX3QnCOK+5K64chPEil4biNkEMZDcFac2QazotYGYTRADyV1x6l2CaD7dXZEBwwwMdD+pTM8B+TPEOQlltcs5Qc6IygQxo1cuxFQTRPHKppAyirdLffDTmqYUQ8jv8ck1LRxAETG/7ikUpppvf2J/CA4F1qIlQLLrC0/C+6M6lnah9waY3h8h6m+XgrceJbz08OFfskQfYpMiXXRlEA37qDY1lfNrKUOxGxs06i9ochf/55WY/YIoO3wY+SVt5WFU6iEoezz4G2g0Q8JhVxGEZld720ZzaQP26LVTHiEIVjRmJWWpM1ptBGIOkPxRvv1Jcr4sCNWuJojW0q513gjrhwmicvPB3RALXqwPMTUc5qgsCaI0JMyvtedLEaJ8oVgedb8b7cZzCCQEPpEPrao2eIycIcouo3qE6Ho1k59fe7ESXYLch4Zy1ZbWWvKIzXvKnK0HU+nAnk6CQpdw5LBsf0pryAd/7EpkjUANQeiGKvOzkAK3IM3mJc3ibQVxiirNyDwMtCLEPEgNySkMmCBOoXkdIyaIdXx1ClITxCk0r2PEBLGOr05BaoI4heZ1jJgg1vHVKUhNEKfQvI4RE8Q6vjoFqQniFJrXMWKCWMdXpyA1QZxC8zpGTBDr+OoUpP8Arv92hCPEu+kAAAAASUVORK5CYII="));

                // close the file
                f.close();

            } catch (IOException ioexception) {

                // log message
                log.log(Level.SEVERE,
                        "An IO exception occured while trying to create the image replacement. MESSAGE: {0}",
                        StringUtils.printStackTrace(ioexception));

            } catch (Exception except) {

                // log message
                log.log(Level.SEVERE,
                        "An error occured while trying to create the image replacement. MESSAGE: {0}",
                        StringUtils.printStackTrace(except));

            }

        }

    }

    // unescape all HTML entities
    newText = StringEscapeUtils.unescapeHtml(newText);

    // return new text
    return newText;
}

From source file:com.cognifide.aet.job.common.datafilters.extractelement.ExtractElementDataModifier.java

private String modifyDataForElementParam(Document document) throws ProcessingException {
    String result;/*from   www.  java2 s  .  c  o m*/
    Element element = document.getElementById(elementId);
    if (element != null) {
        result = element.outerHtml();
    } else {
        throw new ProcessingException("No element with id=" + elementId + " found!");
    }
    return result;
}

From source file:com.lingxiang2014.entity.Article.java

@Transient
public String[] getPageContents() {
    if (StringUtils.isEmpty(content)) {
        return new String[] { "" };
    }/*from   w ww .j a  v  a  2s  . c  o m*/
    if (content.contains(PAGE_BREAK_SEPARATOR)) {
        return content.split(PAGE_BREAK_SEPARATOR);
    } else {
        List<String> pageContents = new ArrayList<String>();
        Document document = Jsoup.parse(content);
        List<Node> children = document.body().childNodes();
        if (children != null) {
            int textLength = 0;
            StringBuffer html = new StringBuffer();
            for (Node node : children) {
                if (node instanceof Element) {
                    Element element = (Element) node;
                    html.append(element.outerHtml());
                    textLength += element.text().length();
                    if (textLength >= PAGE_CONTENT_LENGTH) {
                        pageContents.add(html.toString());
                        textLength = 0;
                        html.setLength(0);
                    }
                } else if (node instanceof TextNode) {
                    TextNode textNode = (TextNode) node;
                    String text = textNode.text();
                    String[] contents = PARAGRAPH_SEPARATOR_PATTERN.split(text);
                    Matcher matcher = PARAGRAPH_SEPARATOR_PATTERN.matcher(text);
                    for (String content : contents) {
                        if (matcher.find()) {
                            content += matcher.group();
                        }
                        html.append(content);
                        textLength += content.length();
                        if (textLength >= PAGE_CONTENT_LENGTH) {
                            pageContents.add(html.toString());
                            textLength = 0;
                            html.setLength(0);
                        }
                    }
                }
            }
            String pageContent = html.toString();
            if (StringUtils.isNotEmpty(pageContent)) {
                pageContents.add(pageContent);
            }
        }
        return pageContents.toArray(new String[pageContents.size()]);
    }
}

From source file:edu.usu.sdl.openstorefront.service.io.HelpImporter.java

/**
 * Accept a stream pointed to markdown// w  ww .j  av  a2  s .  co  m
 *
 * @param in
 * @return
 */
public List<HelpSection> processHelp(InputStream in) {
    List<HelpSection> helpSections = new ArrayList<>();

    String data = "";
    try (BufferedReader bin = new BufferedReader(new InputStreamReader(in))) {
        data = bin.lines().collect(Collectors.joining("\n"));
    } catch (IOException e) {

    }

    PegDownProcessor pegDownProcessor = new PegDownProcessor(PROCESSING_TIMEOUT);
    String html = pegDownProcessor.markdownToHtml(data);
    Document doc = Jsoup.parse(html);
    Elements elements = doc.getAllElements();

    Set<String> headerTags = new HashSet<>();
    headerTags.add("h1");
    headerTags.add("h2");
    headerTags.add("h3");
    headerTags.add("h4");
    headerTags.add("h5");
    headerTags.add("h6");

    boolean capture = false;
    HelpSection helpSection = null;
    for (Element element : elements) {
        if (headerTags.contains(element.tagName().toLowerCase()) == false && capture) {
            if (helpSection != null) {
                if (helpSection.getContent().contains(element.outerHtml()) == false) {
                    helpSection.setContent(helpSection.getContent() + element.outerHtml());
                }
            }
        }

        if (headerTags.contains(element.tagName().toLowerCase())) {
            String title = element.html();

            if (helpSection != null) {
                //save old section
                addHelpSection(helpSections, helpSection);
            }

            String titleSplit[] = title.split(" ");

            helpSection = new HelpSection();
            helpSection.setTitle(title);
            helpSection.setHeaderLevel(Convert.toInteger(element.tagName().toLowerCase().replace("h", "")));
            helpSection.setSectionNumber(titleSplit[0]);
            helpSection.setContent("");

            if (title.contains("*")) {
                helpSection.setAdminSection(true);
            } else {
                helpSection.setAdminSection(false);
            }

            capture = true;
        }
    }
    //Add last section
    if (helpSection != null) {
        addHelpSection(helpSections, helpSection);
    }

    return helpSections;
}

From source file:com.astamuse.asta4d.render.RenderUtil.java

public final static void applyMessages(Element target) {
    Context context = Context.getCurrentThreadContext();
    List<Element> msgElems = target.select(ExtNodeConstants.MSG_NODE_TAG_SELECTOR);
    for (final Element msgElem : msgElems) {
        Attributes attributes = msgElem.attributes();
        String key = attributes.get(ExtNodeConstants.MSG_NODE_ATTR_KEY);
        // List<String> externalizeParamKeys = getExternalizeParamKeys(attributes);
        Object defaultMsg = new Object() {
            @Override//from ww  w  .  j av  a 2 s . c  o m
            public String toString() {
                return ExtNodeConstants.MSG_NODE_ATTRVALUE_HTML_PREFIX + msgElem.html();
            }
        };
        Locale locale = LocalizeUtil.getLocale(attributes.get(ExtNodeConstants.MSG_NODE_ATTR_LOCALE));
        String currentTemplatePath = attributes.get(ExtNodeConstants.ATTR_TEMPLATE_PATH);
        if (StringUtils.isEmpty(currentTemplatePath)) {
            logger.warn("There is a msg tag which does not hold corresponding template file path:{}",
                    msgElem.outerHtml());
        } else {
            context.setData(TRACE_VAR_TEMPLATE_PATH, currentTemplatePath);
        }

        final Map<String, Object> paramMap = getMessageParams(attributes, locale, key);
        String text;
        switch (I18nMessageHelperTypeAssistant.configuredHelperType()) {
        case Mapped:
            text = I18nMessageHelperTypeAssistant.getConfiguredMappedHelper().getMessageWithDefault(locale, key,
                    defaultMsg, paramMap);
            break;
        case Ordered:
        default:
            // convert map to array
            List<Object> numberedParamNameList = new ArrayList<>();
            for (int index = 0; paramMap
                    .containsKey(ExtNodeConstants.MSG_NODE_ATTR_PARAM_PREFIX + index); index++) {
                numberedParamNameList.add(paramMap.get(ExtNodeConstants.MSG_NODE_ATTR_PARAM_PREFIX + index));
            }
            text = I18nMessageHelperTypeAssistant.getConfiguredOrderedHelper().getMessageWithDefault(locale,
                    key, defaultMsg, numberedParamNameList.toArray());
        }

        Node node;
        if (text.startsWith(ExtNodeConstants.MSG_NODE_ATTRVALUE_TEXT_PREFIX)) {
            node = ElementUtil.text(text.substring(ExtNodeConstants.MSG_NODE_ATTRVALUE_TEXT_PREFIX.length()));
        } else if (text.startsWith(ExtNodeConstants.MSG_NODE_ATTRVALUE_HTML_PREFIX)) {
            node = ElementUtil
                    .parseAsSingle(text.substring(ExtNodeConstants.MSG_NODE_ATTRVALUE_HTML_PREFIX.length()));
        } else {
            node = ElementUtil.text(text);
        }
        msgElem.replaceWith(node);

        context.setData(TRACE_VAR_TEMPLATE_PATH, null);
    }
}

From source file:net.groupbuy.entity.Article.java

/**
 * ?// www .j a  va2s  .  com
 * 
 * @return 
 */
@Transient
public String[] getPageContents() {
    if (StringUtils.isEmpty(content)) {
        return new String[] { "" };
    }
    if (content.contains(PAGE_BREAK_SEPARATOR)) {
        return content.split(PAGE_BREAK_SEPARATOR);
    } else {
        List<String> pageContents = new ArrayList<String>();
        Document document = Jsoup.parse(content);
        List<Node> children = document.body().childNodes();
        if (children != null) {
            int textLength = 0;
            StringBuffer html = new StringBuffer();
            for (Node node : children) {
                if (node instanceof Element) {
                    Element element = (Element) node;
                    html.append(element.outerHtml());
                    textLength += element.text().length();
                    if (textLength >= PAGE_CONTENT_LENGTH) {
                        pageContents.add(html.toString());
                        textLength = 0;
                        html.setLength(0);
                    }
                } else if (node instanceof TextNode) {
                    TextNode textNode = (TextNode) node;
                    String text = textNode.text();
                    String[] contents = PARAGRAPH_SEPARATOR_PATTERN.split(text);
                    Matcher matcher = PARAGRAPH_SEPARATOR_PATTERN.matcher(text);
                    for (String content : contents) {
                        if (matcher.find()) {
                            content += matcher.group();
                        }
                        html.append(content);
                        textLength += content.length();
                        if (textLength >= PAGE_CONTENT_LENGTH) {
                            pageContents.add(html.toString());
                            textLength = 0;
                            html.setLength(0);
                        }
                    }
                }
            }
            String pageContent = html.toString();
            if (StringUtils.isNotEmpty(pageContent)) {
                pageContents.add(pageContent);
            }
        }
        return pageContents.toArray(new String[pageContents.size()]);
    }
}

From source file:cn.edu.hfut.dmic.contentextractor.ContentExtractor.java

protected String getTime(Element contentElement, String regex) throws Exception {
    Pattern pattern = Pattern.compile(regex);
    Element current = contentElement;
    for (int i = 0; i < 2; i++) {
        if (current != null && current != doc.body()) {
            Element parent = current.parent();
            if (parent != null) {
                current = parent;/*from  w w  w. j  ava2s  .  c  o m*/
            }
        }
    }
    for (int i = 0; i < 6; i++) {
        if (current == null) {
            break;
        }
        String currentHtml = current.outerHtml();
        Matcher matcher = pattern.matcher(currentHtml);
        if (matcher.find()) {
            srcTime = matcher.group(0);
            StringBuilder sb = new StringBuilder(
                    matcher.group(1) + "-" + format(matcher.group(2)) + "-" + format(matcher.group(3)));
            if (matcher.groupCount() >= 4) {
                sb.append(" ").append(format(matcher.group(4)));
            }
            if (matcher.groupCount() >= 5) {
                sb.append(":").append(format(matcher.group(5)));
            }
            if (matcher.groupCount() >= 6) {
                sb.append(":").append(format(matcher.group(6)));
            }
            return sb.toString();
        }
        if (current != doc.body()) {
            current = current.parent();
        }
    }
    return "";
}

From source file:be.ibridge.kettle.jsoup.JsoupInput.java

private Object[] buildRow() throws KettleException {
    // Create new row...
    Object[] outputRowData = buildEmptyRow();

    if (data.readrow != null)
        outputRowData = data.readrow.clone();

    // Read fields...
    for (int i = 0; i < data.nrInputFields; i++) {
        // Get field
        JsoupInputField field = meta.getInputFields()[i];

        // get jsoup array for field
        Elements jsoupa = data.resultList.get(i);
        String nodevalue = null;/*from ww w . j a v a2 s .com*/
        if (jsoupa != null) {
            Element jo = jsoupa.get(data.recordnr);
            if (jo != null) {

                // Do Element Type
                switch (field.getElementType()) {
                case JsoupInputField.ELEMENT_TYPE_NODE:
                    // Do Result Type
                    switch (field.getResultType()) {
                    case JsoupInputField.RESULT_TYPE_TEXT:
                        nodevalue = jo.text();
                        break;
                    case JsoupInputField.RESULT_TYPE_TYPE_OUTER_HTML:
                        nodevalue = jo.outerHtml();
                        break;
                    case JsoupInputField.RESULT_TYPE_TYPE_INNER_HTML:
                        nodevalue = jo.html();
                        break;
                    default:
                        nodevalue = jo.toString();
                        break;
                    }
                    break;
                case JsoupInputField.ELEMENT_TYPE_ATTRIBUT:
                    nodevalue = jo.attr(field.getAttribute());
                    break;
                default:
                    nodevalue = jo.toString();
                    break;
                }
            }
        }

        // Do trimming
        switch (field.getTrimType()) {
        case JsoupInputField.TYPE_TRIM_LEFT:
            nodevalue = Const.ltrim(nodevalue);
            break;
        case JsoupInputField.TYPE_TRIM_RIGHT:
            nodevalue = Const.rtrim(nodevalue);
            break;
        case JsoupInputField.TYPE_TRIM_BOTH:
            nodevalue = Const.trim(nodevalue);
            break;
        default:
            break;
        }

        if (meta.isInFields()) {
            // Add result field to input stream
            outputRowData = RowDataUtil.addValueData(outputRowData, data.totalpreviousfields + i, nodevalue);
        }
        // Do conversions
        //
        ValueMetaInterface targetValueMeta = data.outputRowMeta.getValueMeta(data.totalpreviousfields + i);
        ValueMetaInterface sourceValueMeta = data.convertRowMeta.getValueMeta(data.totalpreviousfields + i);
        outputRowData[data.totalpreviousfields + i] = targetValueMeta.convertData(sourceValueMeta, nodevalue);

        // Do we need to repeat this field if it is null?
        if (meta.getInputFields()[i].isRepeated()) {
            if (data.previousRow != null && Const.isEmpty(nodevalue)) {
                outputRowData[data.totalpreviousfields + i] = data.previousRow[data.totalpreviousfields + i];
            }
        }
    } // End of loop over fields...   

    int rowIndex = data.nrInputFields;

    // See if we need to add the filename to the row...
    if (meta.includeFilename() && !Const.isEmpty(meta.getFilenameField())) {
        outputRowData[rowIndex++] = data.filename;
    }
    // See if we need to add the row number to the row...  
    if (meta.includeRowNumber() && !Const.isEmpty(meta.getRowNumberField())) {
        outputRowData[rowIndex++] = new Long(data.rownr);
    }
    // Possibly add short filename...
    if (meta.getShortFileNameField() != null && meta.getShortFileNameField().length() > 0) {
        outputRowData[rowIndex++] = data.shortFilename;
    }
    // Add Extension
    if (meta.getExtensionField() != null && meta.getExtensionField().length() > 0) {
        outputRowData[rowIndex++] = data.extension;
    }
    // add path
    if (meta.getPathField() != null && meta.getPathField().length() > 0) {
        outputRowData[rowIndex++] = data.path;
    }
    // Add Size
    if (meta.getSizeField() != null && meta.getSizeField().length() > 0) {
        outputRowData[rowIndex++] = new Long(data.size);
    }
    // add Hidden
    if (meta.isHiddenField() != null && meta.isHiddenField().length() > 0) {
        outputRowData[rowIndex++] = new Boolean(data.path);
    }
    // Add modification date
    if (meta.getLastModificationDateField() != null && meta.getLastModificationDateField().length() > 0) {
        outputRowData[rowIndex++] = data.lastModificationDateTime;
    }
    // Add Uri
    if (meta.getUriField() != null && meta.getUriField().length() > 0) {
        outputRowData[rowIndex++] = data.uriName;
    }
    // Add RootUri
    if (meta.getRootUriField() != null && meta.getRootUriField().length() > 0) {
        outputRowData[rowIndex++] = data.rootUriName;
    }
    data.recordnr++;

    RowMetaInterface irow = getInputRowMeta();

    data.previousRow = irow == null ? outputRowData : (Object[]) irow.cloneRow(outputRowData); // copy it to make
    // surely the next step doesn't change it in between...

    return outputRowData;
}