Example usage for javax.xml.parsers SAXParserFactory newInstance

List of usage examples for javax.xml.parsers SAXParserFactory newInstance

Introduction

In this page you can find the example usage for javax.xml.parsers SAXParserFactory newInstance.

Prototype


public static SAXParserFactory newInstance() 

Source Link

Document

Obtain a new instance of a SAXParserFactory .

Usage

From source file:org.gege.caldavsyncadapter.caldav.CaldavFacade.java

private void parseXML(HttpResponse response, ContentHandler contentHandler)
        throws IOException, CaldavProtocolException {
    InputStream is = response.getEntity().getContent();
    /*BufferedReader bReader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
    String Content = "";//w  ww. j a v a  2 s. c  om
    String Line = bReader.readLine();
            
    while (Line != null) {
       Content += Line;
       Line = bReader.readLine();
    }*/

    SAXParserFactory factory = SAXParserFactory.newInstance();
    try {
        SAXParser parser = factory.newSAXParser();
        XMLReader reader = parser.getXMLReader();
        reader.setContentHandler(contentHandler);
        reader.parse(new InputSource(is));
    } catch (ParserConfigurationException e) {
        throw new AssertionError("ParserConfigurationException " + e.getMessage());
    } catch (IllegalStateException e) {
        throw new CaldavProtocolException(e.getMessage());
    } catch (SAXException e) {
        throw new CaldavProtocolException(e.getMessage());
    }
}

From source file:fr.paris.lutece.plugins.calendar.web.CalendarStyleSheetJspBean.java

/**
 * Use parsing for validate the modify xsl file
 * /*  w w w. jav a2  s  .  c  om*/
 * @param baXslSource The XSL source
 * @return the message exception when the validation is false
 */
private String isValid(byte[] baXslSource) {
    String strError = null;

    try {
        SAXParserFactory factory = SAXParserFactory.newInstance();
        SAXParser analyzer = factory.newSAXParser();
        InputSource is = new InputSource(new ByteArrayInputStream(baXslSource));
        analyzer.getXMLReader().parse(is);
    } catch (Exception e) {
        strError = e.getMessage();
    }

    return strError;
}

From source file:com.flipzu.flipzu.FlipInterface.java

private List<BroadcastDataSet> sendRequest(String url, String data) throws IOException {
    DefaultHttpClient hc = new DefaultHttpClient();

    ResponseHandler<String> res = new ResponseHandler<String>() {
        public String handleResponse(final HttpResponse response) throws HttpResponseException, IOException {
            StatusLine statusLine = response.getStatusLine();
            if (statusLine.getStatusCode() >= 300) {
                throw new HttpResponseException(statusLine.getStatusCode(), statusLine.getReasonPhrase());
            }/*from w ww .  ja v  a  2 s .c  o m*/

            HttpEntity entity = response.getEntity();
            return entity == null ? null : EntityUtils.toString(entity, "UTF-8");
        }
    };

    HttpPost postMethod = new HttpPost(url);

    postMethod.getParams().setParameter(CoreProtocolPNames.USE_EXPECT_CONTINUE, Boolean.FALSE);

    if (data != null) {
        StringEntity tmp = null;
        try {
            tmp = new StringEntity(data, "UTF-8");
        } catch (UnsupportedEncodingException e) {
            debug.logE(TAG, "sendRequest ERROR", e.getCause());
            return null;
        }

        postMethod.setEntity(tmp);
    }

    String response = hc.execute(postMethod, res);

    SAXParserFactory spf = SAXParserFactory.newInstance();
    try {
        SAXParser sp = spf.newSAXParser();
        XMLReader xr = sp.getXMLReader();
        TimelineHandler myTimelineHandler = new TimelineHandler();
        xr.setContentHandler(myTimelineHandler);

        InputSource inputSource = new InputSource();
        inputSource.setEncoding("UTF-8");
        inputSource.setCharacterStream(new StringReader(response));

        xr.parse(inputSource);

        List<BroadcastDataSet> parsedDataSet = myTimelineHandler.getParsedData();

        return parsedDataSet;

    } catch (ParserConfigurationException e) {
        return null;
    } catch (SAXException e) {
        return null;
    }
}

From source file:com.entertailion.java.caster.RampClient.java

private void parseXml(Reader reader) {
    try {//from  ww w  .  ja  v  a 2 s  . c o m
        InputSource inStream = new org.xml.sax.InputSource();
        inStream.setCharacterStream(reader);
        SAXParserFactory spf = SAXParserFactory.newInstance();
        SAXParser sp = spf.newSAXParser();
        XMLReader xr = sp.getXMLReader();
        AppHandler appHandler = new AppHandler();
        xr.setContentHandler(appHandler);
        xr.parse(inStream);

        connectionServiceUrl = appHandler.getConnectionServiceUrl();
        state = appHandler.getState();
        protocol = appHandler.getProtocol();
    } catch (Exception e) {
        Log.e(LOG_TAG, "parse device description", e);
    }
}

From source file:efen.parsewiki.WikipediaDocumentSequence.java

@Override
public DocumentIterator iterator() throws IOException {
    final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
    saxParserFactory.setNamespaceAware(true);
    final MutableString nameSpaceAccumulator = new MutableString();
    final ObjectOpenHashSet<MutableString> nameSpacesAccumulator = new ObjectOpenHashSet<MutableString>();
    final ArrayBlockingQueue<DocumentFactory> freeFactories = new ArrayBlockingQueue<DocumentFactory>(16);
    for (int i = freeFactories.remainingCapacity(); i-- != 0;)
        freeFactories.add(this.factory.copy());
    final ArrayBlockingQueue<DocumentAndFactory> readyDocumentsAndFactories = new ArrayBlockingQueue<DocumentAndFactory>(
            freeFactories.size());// w w  w.  jav a  2s.  com

    final SAXParser parser;
    try {
        parser = saxParserFactory.newSAXParser();
    } catch (Exception e) {
        throw new RuntimeException(e.getMessage(), e);
    }
    final DefaultHandler handler = new DefaultHandler() {
        private final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
        private boolean inText;
        private boolean inTitle;
        private boolean inId;
        private boolean inTimestamp;
        private boolean inNamespaceDef;
        private boolean redirect;
        private MutableString text = new MutableString();
        private MutableString title = new MutableString();
        private MutableString id = new MutableString();
        private MutableString timestamp = new MutableString();
        private final Reference2ObjectMap<Enum<?>, Object> metadata = new Reference2ObjectOpenHashMap<Enum<?>, Object>();
        {
            metadata.put(PropertyBasedDocumentFactory.MetadataKeys.ENCODING, "UTF-8");
            metadata.put(MetadataKeys.REDIRECT, redirectAnchors);
        }

        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes)
                throws SAXException {
            if ("page".equals(localName)) {
                redirect = inText = inTitle = inId = inTimestamp = false;
                text.length(0);
                title.length(0);
                id.length(0);
                timestamp.length(0);
            } else if ("text".equals(localName))
                inText = true;
            else if ("title".equals(localName) && title.length() == 0)
                inTitle = true; // We catch only the first id/title elements.
            else if ("id".equals(localName) && id.length() == 0)
                inId = true;
            else if ("timestamp".equals(localName) && timestamp.length() == 0)
                inTimestamp = true;
            else if ("redirect".equals(localName)) {
                redirect = true;
                if (attributes.getValue("title") != null)
                    // Accumulate the title of the page as virtual text of the redirect page.
                    synchronized (redirectAnchors) {
                        final String link = Encoder.encodeTitleToUrl(attributes.getValue("title"), true);
                        redirectAnchors.add(
                                new AnchorExtractor.Anchor(new MutableString(baseURL.length() + link.length())
                                        .append(baseURL).append(link), title.copy()));
                    }
            } else if ("namespace".equals(localName)) {
                // Found a new namespace
                inNamespaceDef = true;
                nameSpaceAccumulator.length(0);
            }
        }

        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            if ("namespace".equals(localName)) { // Collecting a namespace
                if (nameSpaceAccumulator.length() != 0)
                    nameSpacesAccumulator.add(nameSpaceAccumulator.copy().toLowerCase());
                return;
            }

            if ("namespaces".equals(localName)) { // All namespaces collected
                nameSpaces = ImmutableSet.copyOf(nameSpacesAccumulator);
                return;
            }

            if (!redirect) {
                if ("title".equals(localName)) {
                    // Set basic metadata for the page
                    metadata.put(PropertyBasedDocumentFactory.MetadataKeys.TITLE, title.copy());
                    String link = Encoder.encodeTitleToUrl(title.toString(), true);
                    metadata.put(PropertyBasedDocumentFactory.MetadataKeys.URI,
                            new MutableString(baseURL.length() + link.length()).append(baseURL).append(link));
                    inTitle = false;
                } else if ("id".equals(localName)) {
                    metadata.put(MetadataKeys.ID, Long.valueOf(id.toString()));
                    inId = false;
                } else if ("timestamp".equals(localName)) {
                    try {
                        metadata.put(MetadataKeys.LASTEDIT, dateFormat.parse(timestamp.toString()));
                    } catch (ParseException e) {
                        throw new RuntimeException(e.getMessage(), e);
                    }
                    inTimestamp = false;
                } else if ("text".equals(localName)) {
                    inText = false;
                    if (!keepNamespaced) {
                        // Namespaces are case-insensitive and language-dependent
                        final int pos = title.indexOf(':');
                        if (pos != -1 && isATrueNamespace(title.substring(0, pos)))
                            return;
                    }
                    try {
                        final MutableString html = new MutableString();
                        DocumentFactory freeFactory;
                        try {
                            freeFactory = freeFactories.take();
                        } catch (InterruptedException e) {
                            throw new RuntimeException(e.getMessage(), e);
                        }
                        if (parseText) {
                            if (DISAMBIGUATION.search(text) != -1) { // It's a disambiguation page.
                                /* Roi's hack: duplicate links using the page title, so the generic name will end up as anchor text. */
                                final MutableString newLinks = new MutableString();
                                for (int start = 0, end; (start = BRACKETS_OPEN.search(text,
                                        start)) != -1; start = end) {
                                    end = start;
                                    final int endOfLink = text.indexOfAnyOf(END_OF_DISAMBIGUATION_LINK, start);
                                    // Note that we don't escape title because we are working at the Wikipedia raw text level.
                                    if (endOfLink != -1) {
                                        newLinks.append(text.array(), start, endOfLink - start).append('|')
                                                .append(title).append("]]\n");
                                        end = endOfLink;
                                    }
                                    end++;
                                }

                                text.append(newLinks);
                            }
                            // We separate categories by OXOXO, so we don't get overflowing phrases.
                            final MutableString category = new MutableString();
                            for (int start = 0, end; (start = CATEGORY_START.search(text,
                                    start)) != -1; start = end) {
                                end = BRACKETS_CLOSED.search(text, start += CATEGORY_START.length());
                                if (end != -1)
                                    category.append(text.subSequence(start, end)).append(" OXOXO ");
                                else
                                    break;
                            }
                            metadata.put(MetadataKeys.CATEGORY, category);

                            // Heuristics to get the first paragraph
                            metadata.put(MetadataKeys.FIRSTPAR, new MutableString());
                            String plainText = new WikiModel(imageBaseURL, linkBaseURL)
                                    .render(new PlainTextConverter(true), text.toString());
                            for (int start = 0; start < plainText.length(); start++) {
                                //System.err.println("Examining " + plainText.charAt( start )  );
                                if (Character.isWhitespace(plainText.charAt(start)))
                                    continue;
                                if (plainText.charAt(start) == '{') {
                                    //System.err.print( "Braces " + start + " text: \"" + plainText.subSequence( start, start + 10 )  + "\" -> " );
                                    start = BRACES_CLOSED.search(plainText, start);
                                    //System.err.println( start + " text: \"" + plainText.subSequence( start, start + 10 ) + "\"" );
                                    if (start == -1)
                                        break;
                                    start++;
                                } else if (plainText.charAt(start) == '[') {
                                    start = BRACKETS_CLOSED.search(plainText, start);
                                    if (start == -1)
                                        break;
                                    start++;
                                } else {
                                    final int end = plainText.indexOf('\n', start);
                                    if (end != -1)
                                        metadata.put(MetadataKeys.FIRSTPAR,
                                                new MutableString(plainText.substring(start, end)));//new MutableString( new WikiModel( imageBaseURL, linkBaseURL ).render( new PlainTextConverter( true ), text.substring( start, end ).toString() ) ) );
                                    break;
                                }
                            }

                            try {
                                WikiModel wikiModel = new WikiModel(imageBaseURL, linkBaseURL);
                                wikiModel.render(new HTMLConverter(), text.toString(), html, false, false);
                                final Map<String, String> categories = wikiModel.getCategories();
                                // Put back category links in the page (they have been parsed by bliki and to not appear anymore in the HTML rendering)
                                for (Entry<String, String> entry : categories.entrySet()) {
                                    final String key = entry.getKey();
                                    final String value = entry.getValue().trim();
                                    if (value.length() != 0) // There are empty such things
                                        html.append("\n<a href=\"").append(baseURL).append("Category:")
                                                .append(Encoder.encodeTitleToUrl(key, true)).append("\">")
                                                .append(HtmlEscapers.htmlEscaper().escape(key))
                                                .append("</a>\n");
                                }
                            } catch (Exception e) {
                                LOGGER.error("Unexpected exception while parsing " + title, e);
                            }
                        }
                        readyDocumentsAndFactories.put(new DocumentAndFactory(
                                freeFactory.getDocument(IOUtils.toInputStream(html, Charsets.UTF_8),
                                        new Reference2ObjectOpenHashMap<Enum<?>, Object>(metadata)),
                                freeFactory));
                    } catch (InterruptedException e) {
                        throw new RuntimeException(e.getMessage(), e);
                    } catch (IOException e) {
                        throw new RuntimeException(e.getMessage(), e);
                    }
                }
            }
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            if (inText && parseText)
                text.append(ch, start, length);
            if (inTitle)
                title.append(ch, start, length);
            if (inId)
                id.append(ch, start, length);
            if (inTimestamp)
                timestamp.append(ch, start, length);
            if (inNamespaceDef) {
                nameSpaceAccumulator.append(ch, start, length);
                inNamespaceDef = false; // Dirty, but it works
            }
        }

        @Override
        public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
            if (inText && parseText)
                text.append(ch, start, length);
            if (inTitle)
                title.append(ch, start, length);
        }
    };

    final Thread parsingThread = new Thread() {
        public void run() {
            try {
                InputStream in = new FileInputStream(wikipediaXmlDump);
                if (bzipped)
                    in = new BZip2CompressorInputStream(in);
                parser.parse(
                        new InputSource(new InputStreamReader(new FastBufferedInputStream(in), Charsets.UTF_8)),
                        handler);
                readyDocumentsAndFactories.put(END);
            } catch (Exception e) {
                throw new RuntimeException(e.getMessage(), e);
            }
        }
    };

    parsingThread.start();

    return new AbstractDocumentIterator() {
        private DocumentFactory lastFactory;

        @Override
        public Document nextDocument() throws IOException {
            try {
                final DocumentAndFactory documentAndFactory = readyDocumentsAndFactories.take();
                if (lastFactory != null)
                    freeFactories.put(lastFactory);
                if (documentAndFactory == END)
                    return null;
                lastFactory = documentAndFactory.factory;
                return documentAndFactory.document;
            } catch (InterruptedException e) {
                throw new RuntimeException(e.getMessage(), e);
            }
        }
    };
}

From source file:nl.armatiek.xslweb.configuration.WebApp.java

public XsltExecutable tryTemplatesCache(String transformationPath, ErrorListener errorListener)
        throws Exception {
    String key = FilenameUtils.normalize(transformationPath);
    XsltExecutable templates = templatesCache.get(key);
    if (templates == null) {
        logger.info("Compiling and caching stylesheet \"" + transformationPath + "\" ...");
        try {//from www.ja v  a  2 s. com
            SAXParserFactory spf = SAXParserFactory.newInstance();
            spf.setNamespaceAware(true);
            spf.setXIncludeAware(true);
            spf.setValidating(false);
            SAXParser parser = spf.newSAXParser();
            XMLReader reader = parser.getXMLReader();
            Source source = new SAXSource(reader, new InputSource(transformationPath));
            XsltCompiler comp = processor.newXsltCompiler();
            comp.setErrorListener(errorListener);
            templates = comp.compile(source);
        } catch (Exception e) {
            logger.error("Could not compile stylesheet \"" + transformationPath + "\"", e);
            throw e;
        }
        if (!developmentMode) {
            templatesCache.put(key, templates);
        }
    }
    return templates;
}

From source file:com.flexive.core.storage.GenericDivisionImporter.java

/**
 * Import data from a zip archive to a database table
 *
 * @param stmt               statement to use
 * @param zip                zip archive containing the zip entry
 * @param ze                 zip entry within the archive
 * @param xpath              xpath containing the entries to import
 * @param table              name of the table
 * @param executeInsertPhase execute the insert phase?
 * @param executeUpdatePhase execute the update phase?
 * @param updateColumns      columns that should be set to <code>null</code> in a first pass (insert)
 *                           and updated to the provided values in a second pass (update),
 *                           columns that should be used in the where clause have to be prefixed
 *                           with "KEY:", to assign a default value use the expression "columnname:default value",
 *                           if the default value is "@", it will be a negative counter starting at 0, decreasing.
 *                           If the default value starts with "%", it will be set to the column following the "%"
 *                           character in the first pass
 * @throws Exception on errors/*from  www .  j  a v a 2  s . c  o m*/
 */
protected void importTable(Statement stmt, final ZipFile zip, final ZipEntry ze, final String xpath,
        final String table, final boolean executeInsertPhase, final boolean executeUpdatePhase,
        final String... updateColumns) throws Exception {
    //analyze the table
    final ResultSet rs = stmt.executeQuery("SELECT * FROM " + table + " WHERE 1=2");
    StringBuilder sbInsert = new StringBuilder(500);
    StringBuilder sbUpdate = updateColumns.length > 0 ? new StringBuilder(500) : null;
    if (rs == null)
        throw new IllegalArgumentException("Can not analyze table [" + table + "]!");
    sbInsert.append("INSERT INTO ").append(table).append(" (");
    final ResultSetMetaData md = rs.getMetaData();
    final Map<String, ColumnInfo> updateClauseColumns = updateColumns.length > 0
            ? new HashMap<String, ColumnInfo>(md.getColumnCount())
            : null;
    final Map<String, ColumnInfo> updateSetColumns = updateColumns.length > 0
            ? new LinkedHashMap<String, ColumnInfo>(md.getColumnCount())
            : null;
    final Map<String, String> presetColumns = updateColumns.length > 0 ? new HashMap<String, String>(10) : null;
    //preset to a referenced column (%column syntax)
    final Map<String, String> presetRefColumns = updateColumns.length > 0 ? new HashMap<String, String>(10)
            : null;
    final Map<String, Integer> counters = updateColumns.length > 0 ? new HashMap<String, Integer>(10) : null;
    final Map<String, ColumnInfo> insertColumns = new HashMap<String, ColumnInfo>(
            md.getColumnCount() + (counters != null ? counters.size() : 0));
    int insertIndex = 1;
    int updateSetIndex = 1;
    int updateClauseIndex = 1;
    boolean first = true;
    for (int i = 0; i < md.getColumnCount(); i++) {
        final String currCol = md.getColumnName(i + 1).toLowerCase();
        if (updateColumns.length > 0) {
            boolean abort = false;
            for (String col : updateColumns) {
                if (col.indexOf(':') > 0 && !col.startsWith("KEY:")) {
                    String value = col.substring(col.indexOf(':') + 1);
                    col = col.substring(0, col.indexOf(':'));
                    if ("@".equals(value)) {
                        if (currCol.equalsIgnoreCase(col)) {
                            counters.put(col, 0);
                            insertColumns.put(col, new ColumnInfo(md.getColumnType(i + 1), insertIndex++));
                            sbInsert.append(',').append(currCol);
                        }
                    } else if (value.startsWith("%")) {
                        if (currCol.equalsIgnoreCase(col)) {
                            presetRefColumns.put(col, value.substring(1));
                            insertColumns.put(col, new ColumnInfo(md.getColumnType(i + 1), insertIndex++));
                            sbInsert.append(',').append(currCol);
                            //                                System.out.println("==> adding presetRefColumn "+col+" with value of "+value.substring(1));
                        }
                    } else if (!presetColumns.containsKey(col))
                        presetColumns.put(col, value);
                }
                if (currCol.equalsIgnoreCase(col)) {
                    abort = true;
                    updateSetColumns.put(currCol, new ColumnInfo(md.getColumnType(i + 1), updateSetIndex++));
                    break;
                }
            }
            if (abort)
                continue;
        }
        if (first) {
            first = false;
        } else
            sbInsert.append(',');
        sbInsert.append(currCol);
        insertColumns.put(currCol, new ColumnInfo(md.getColumnType(i + 1), insertIndex++));
    }
    if (updateColumns.length > 0 && executeUpdatePhase) {
        sbUpdate.append("UPDATE ").append(table).append(" SET ");
        int counter = 0;
        for (String updateColumn : updateSetColumns.keySet()) {
            if (counter++ > 0)
                sbUpdate.append(',');
            sbUpdate.append(updateColumn).append("=?");
        }
        sbUpdate.append(" WHERE ");
        boolean hasKeyColumn = false;
        for (String col : updateColumns) {
            if (!col.startsWith("KEY:"))
                continue;
            hasKeyColumn = true;
            String keyCol = col.substring(4);
            for (int i = 0; i < md.getColumnCount(); i++) {
                if (!md.getColumnName(i + 1).equalsIgnoreCase(keyCol))
                    continue;
                updateClauseColumns.put(keyCol, new ColumnInfo(md.getColumnType(i + 1), updateClauseIndex++));
                sbUpdate.append(keyCol).append("=? AND ");
                break;
            }

        }
        if (!hasKeyColumn)
            throw new IllegalArgumentException("Update columns require a KEY!");
        sbUpdate.delete(sbUpdate.length() - 5, sbUpdate.length()); //remove trailing " AND "
        //"shift" clause indices
        for (String col : updateClauseColumns.keySet()) {
            GenericDivisionImporter.ColumnInfo ci = updateClauseColumns.get(col);
            ci.index += (updateSetIndex - 1);
        }
    }
    if (presetColumns != null) {
        for (String key : presetColumns.keySet())
            sbInsert.append(',').append(key);
    }
    sbInsert.append(")VALUES(");
    for (int i = 0; i < insertColumns.size(); i++) {
        if (i > 0)
            sbInsert.append(',');
        sbInsert.append('?');
    }
    if (presetColumns != null) {
        for (String key : presetColumns.keySet())
            sbInsert.append(',').append(presetColumns.get(key));
    }
    sbInsert.append(')');
    if (DBG) {
        LOG.info("Insert statement:\n" + sbInsert.toString());
        if (updateColumns.length > 0)
            LOG.info("Update statement:\n" + sbUpdate.toString());
    }
    //build a map containing all nodes that require attributes
    //this allows for matching simple xpath queries like "flatstorages/storage[@name='FX_FLAT_STORAGE']/data"
    final Map<String, List<String>> queryAttributes = new HashMap<String, List<String>>(5);
    for (String pElem : xpath.split("/")) {
        if (!(pElem.indexOf('@') > 0 && pElem.indexOf('[') > 0))
            continue;
        List<String> att = new ArrayList<String>(5);
        for (String pAtt : pElem.split("@")) {
            if (!(pAtt.indexOf('=') > 0))
                continue;
            att.add(pAtt.substring(0, pAtt.indexOf('=')));
        }
        queryAttributes.put(pElem.substring(0, pElem.indexOf('[')), att);
    }
    final PreparedStatement psInsert = stmt.getConnection().prepareStatement(sbInsert.toString());
    final PreparedStatement psUpdate = updateColumns.length > 0 && executeUpdatePhase
            ? stmt.getConnection().prepareStatement(sbUpdate.toString())
            : null;
    try {
        final SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
        final DefaultHandler handler = new DefaultHandler() {
            private String currentElement = null;
            private Map<String, String> data = new HashMap<String, String>(10);
            private StringBuilder sbData = new StringBuilder(10000);
            boolean inTag = false;
            boolean inElement = false;
            int counter;
            List<String> path = new ArrayList<String>(10);
            StringBuilder currPath = new StringBuilder(100);
            boolean insertMode = true;

            /**
             * {@inheritDoc}
             */
            @Override
            public void startDocument() throws SAXException {
                counter = 0;
                inTag = false;
                inElement = false;
                path.clear();
                currPath.setLength(0);
                sbData.setLength(0);
                data.clear();
                currentElement = null;
            }

            /**
             * {@inheritDoc}
             */
            @Override
            public void processingInstruction(String target, String data) throws SAXException {
                if (target != null && target.startsWith("fx_")) {
                    if (target.equals("fx_mode"))
                        insertMode = "insert".equals(data);
                } else
                    super.processingInstruction(target, data);
            }

            /**
             * {@inheritDoc}
             */
            @Override
            public void endDocument() throws SAXException {
                if (insertMode)
                    LOG.info("Imported [" + counter + "] entries into [" + table + "] for xpath [" + xpath
                            + "]");
                else
                    LOG.info("Updated [" + counter + "] entries in [" + table + "] for xpath [" + xpath + "]");
            }

            /**
             * {@inheritDoc}
             */
            @Override
            public void startElement(String uri, String localName, String qName, Attributes attributes)
                    throws SAXException {
                pushPath(qName, attributes);
                if (currPath.toString().equals(xpath)) {
                    inTag = true;
                    data.clear();
                    for (int i = 0; i < attributes.getLength(); i++) {
                        String name = attributes.getLocalName(i);
                        if (StringUtils.isEmpty(name))
                            name = attributes.getQName(i);
                        data.put(name, attributes.getValue(i));
                    }
                } else {
                    currentElement = qName;
                }
                inElement = true;
                sbData.setLength(0);
            }

            /**
             * Push a path element from the stack
             *
             * @param qName element name to push
             * @param att attributes
             */
            private void pushPath(String qName, Attributes att) {
                if (att.getLength() > 0 && queryAttributes.containsKey(qName)) {
                    String curr = qName + "[";
                    boolean first = true;
                    final List<String> attList = queryAttributes.get(qName);
                    for (int i = 0; i < att.getLength(); i++) {
                        if (!attList.contains(att.getQName(i)))
                            continue;
                        if (first)
                            first = false;
                        else
                            curr += ',';
                        curr += "@" + att.getQName(i) + "='" + att.getValue(i) + "'";
                    }
                    curr += ']';
                    path.add(curr);
                } else
                    path.add(qName);
                buildPath();
            }

            /**
             * Pop the top path element from the stack
             */
            private void popPath() {
                path.remove(path.size() - 1);
                buildPath();
            }

            /**
             * Rebuild the current path
             */
            private synchronized void buildPath() {
                currPath.setLength(0);
                for (String s : path)
                    currPath.append(s).append('/');
                if (currPath.length() > 1)
                    currPath.delete(currPath.length() - 1, currPath.length());
                //                    System.out.println("currPath: " + currPath);
            }

            /**
             * {@inheritDoc}
             */
            @Override
            public void endElement(String uri, String localName, String qName) throws SAXException {
                if (currPath.toString().equals(xpath)) {
                    if (DBG)
                        LOG.info("Insert [" + xpath + "]: [" + data + "]");
                    inTag = false;
                    try {
                        if (insertMode) {
                            if (executeInsertPhase) {
                                processColumnSet(insertColumns, psInsert);
                                counter += psInsert.executeUpdate();
                            }
                        } else {
                            if (executeUpdatePhase) {
                                if (processColumnSet(updateSetColumns, psUpdate)) {
                                    processColumnSet(updateClauseColumns, psUpdate);
                                    counter += psUpdate.executeUpdate();
                                }
                            }
                        }
                    } catch (SQLException e) {
                        throw new SAXException(e);
                    } catch (ParseException e) {
                        throw new SAXException(e);
                    }
                } else {
                    if (inTag) {
                        data.put(currentElement, sbData.toString());
                    }
                    currentElement = null;
                }
                popPath();
                inElement = false;
                sbData.setLength(0);
            }

            /**
             * Process a column set
             *
             * @param columns the columns to process
             * @param ps prepared statement to use
             * @return if data other than <code>null</code> has been set
             * @throws SQLException on errors
             * @throws ParseException on date/time conversion errors
             */
            private boolean processColumnSet(Map<String, ColumnInfo> columns, PreparedStatement ps)
                    throws SQLException, ParseException {
                boolean dataSet = false;
                for (String col : columns.keySet()) {
                    ColumnInfo ci = columns.get(col);
                    String value = data.get(col);
                    if (insertMode && counters != null && counters.get(col) != null) {
                        final int newVal = counters.get(col) - 1;
                        value = String.valueOf(newVal);
                        counters.put(col, newVal);
                        //                            System.out.println("new value for " + col + ": " + newVal);
                    }
                    if (insertMode && presetRefColumns != null && presetRefColumns.get(col) != null) {
                        value = data.get(presetRefColumns.get(col));
                        //                            System.out.println("Set presetRefColumn for "+col+" to ["+value+"] from column ["+presetRefColumns.get(col)+"]");
                    }

                    if (value == null)
                        ps.setNull(ci.index, ci.columnType);
                    else {
                        dataSet = true;
                        switch (ci.columnType) {
                        case Types.BIGINT:
                        case Types.NUMERIC:
                            if (DBG)
                                LOG.info("BigInt " + ci.index + "->" + new BigDecimal(value));
                            ps.setBigDecimal(ci.index, new BigDecimal(value));
                            break;
                        case java.sql.Types.DOUBLE:
                            if (DBG)
                                LOG.info("Double " + ci.index + "->" + Double.parseDouble(value));
                            ps.setDouble(ci.index, Double.parseDouble(value));
                            break;
                        case java.sql.Types.FLOAT:
                        case java.sql.Types.REAL:
                            if (DBG)
                                LOG.info("Float " + ci.index + "->" + Float.parseFloat(value));
                            ps.setFloat(ci.index, Float.parseFloat(value));
                            break;
                        case java.sql.Types.TIMESTAMP:
                        case java.sql.Types.DATE:
                            if (DBG)
                                LOG.info("Timestamp/Date " + ci.index + "->"
                                        + FxFormatUtils.getDateTimeFormat().parse(value));
                            ps.setTimestamp(ci.index,
                                    new Timestamp(FxFormatUtils.getDateTimeFormat().parse(value).getTime()));
                            break;
                        case Types.TINYINT:
                        case Types.SMALLINT:
                            if (DBG)
                                LOG.info("Integer " + ci.index + "->" + Integer.valueOf(value));
                            ps.setInt(ci.index, Integer.valueOf(value));
                            break;
                        case Types.INTEGER:
                        case Types.DECIMAL:
                            try {
                                if (DBG)
                                    LOG.info("Long " + ci.index + "->" + Long.valueOf(value));
                                ps.setLong(ci.index, Long.valueOf(value));
                            } catch (NumberFormatException e) {
                                //Fallback (temporary) for H2 if the reported long is a big decimal (tree...)
                                ps.setBigDecimal(ci.index, new BigDecimal(value));
                            }
                            break;
                        case Types.BIT:
                        case Types.CHAR:
                        case Types.BOOLEAN:
                            if (DBG)
                                LOG.info("Boolean " + ci.index + "->" + value);
                            if ("1".equals(value) || "true".equals(value))
                                ps.setBoolean(ci.index, true);
                            else
                                ps.setBoolean(ci.index, false);
                            break;
                        case Types.LONGVARBINARY:
                        case Types.VARBINARY:
                        case Types.BLOB:
                        case Types.BINARY:
                            ZipEntry bin = zip.getEntry(value);
                            if (bin == null) {
                                LOG.error("Failed to lookup binary [" + value + "]!");
                                ps.setNull(ci.index, ci.columnType);
                                break;
                            }
                            try {
                                ps.setBinaryStream(ci.index, zip.getInputStream(bin), (int) bin.getSize());
                            } catch (IOException e) {
                                LOG.error("IOException importing binary [" + value + "]: " + e.getMessage(), e);
                            }
                            break;
                        case Types.CLOB:
                        case Types.LONGVARCHAR:
                        case Types.VARCHAR:
                        case SQL_LONGNVARCHAR:
                        case SQL_NCHAR:
                        case SQL_NCLOB:
                        case SQL_NVARCHAR:
                            if (DBG)
                                LOG.info("String " + ci.index + "->" + value);
                            ps.setString(ci.index, value);
                            break;
                        default:
                            LOG.warn("Unhandled type [" + ci.columnType + "] for column [" + col + "]");
                        }
                    }
                }
                return dataSet;
            }

            /**
             * {@inheritDoc}
             */
            @Override
            public void characters(char[] ch, int start, int length) throws SAXException {
                if (inElement)
                    sbData.append(ch, start, length);
            }

        };
        handler.processingInstruction("fx_mode", "insert");
        parser.parse(zip.getInputStream(ze), handler);
        if (updateColumns.length > 0 && executeUpdatePhase) {
            handler.processingInstruction("fx_mode", "update");
            parser.parse(zip.getInputStream(ze), handler);
        }
    } finally {
        Database.closeObjects(GenericDivisionImporter.class, psInsert, psUpdate);
    }
}

From source file:org.corpus_tools.pepper.connectors.impl.MavenAccessor.java

/**
 * This method checks the provided pepper plugin for updates and triggers
 * the installation process if a newer version is available
 *//*from w  ww  .j a va  2  s.  c  om*/
public boolean update(String groupId, String artifactId, String repositoryUrl, boolean isSnapshot,
        boolean ignoreFrameworkVersion, Bundle installedBundle) {
    if (forbiddenFruits.isEmpty() && !initDependencies()) {
        logger.warn("Update could not be performed, because the pepper dependencies could not be listed.");
        return false;
    }

    if (logger.isTraceEnabled()) {
        logger.trace("Starting update process for " + groupId + ", " + artifactId + ", " + repositoryUrl
                + ", isSnapshot=" + isSnapshot + ", ignoreFrameworkVersion=" + ignoreFrameworkVersion
                + ", installedBundle=" + installedBundle);
    } else {
        logger.info("Starting update process for " + artifactId);
    }

    String newLine = System.getProperty("line.separator");

    DefaultRepositorySystemSession session = getNewSession();

    boolean update = true; // MUST be born true!

    /* build repository */

    RemoteRepository repo = getRepo("repo", repositoryUrl);

    /* build artifact */
    Artifact artifact = new DefaultArtifact(groupId, artifactId, "zip", "[0,)");

    try {
        /* version range request */
        VersionRangeRequest rangeRequest = new VersionRangeRequest();
        rangeRequest.addRepository(repo);
        rangeRequest.setArtifact(artifact);
        VersionRangeResult rangeResult = mvnSystem.resolveVersionRange(session, rangeRequest);
        rangeRequest.setArtifact(artifact);

        /* utils needed for request */
        ArtifactRequest artifactRequest = new ArtifactRequest();
        artifactRequest.addRepository(repo);
        ArtifactResult artifactResult = null;

        /* get all pepperModules versions listed in the maven repository */
        List<Version> versions = rangeResult.getVersions();
        Collections.reverse(versions);

        /* utils for version comparison */
        Iterator<Version> itVersions = versions.iterator();
        VersionScheme vScheme = new GenericVersionScheme();
        boolean srcExists = false;
        Version installedVersion = installedBundle == null ? vScheme.parseVersion("0.0.0")
                : vScheme.parseVersion(
                        installedBundle.getVersion().toString().replace(".SNAPSHOT", "-SNAPSHOT"));
        Version newestVersion = null;

        /*
         * compare, if the listed version really exists in the maven
         * repository
         */
        File file = null;
        while (!srcExists && itVersions.hasNext() && update) {
            newestVersion = itVersions.next();
            artifact = new DefaultArtifact(groupId, artifactId, "zip", newestVersion.toString());
            if (!(artifact.isSnapshot() && !isSnapshot)) {
                update = newestVersion.compareTo(installedVersion) > 0;
                artifactRequest.setArtifact(artifact);
                try {
                    artifactResult = mvnSystem.resolveArtifact(session, artifactRequest);
                    artifact = artifactResult.getArtifact();
                    srcExists = update && artifact.getFile().exists();
                    file = artifact.getFile();

                } catch (ArtifactResolutionException e) {
                    logger.warn("Plugin version " + newestVersion
                            + " could not be found in repository. Checking the next lower version ...");
                }
            }
        }
        update &= file != null;// in case of only snapshots in the maven
        // repository vs. isSnapshot=false
        /*
         * if an update is possible/necessary, perform dependency collection
         * and installation
         */
        if (update) {
            /* create list of necessary repositories */
            Artifact pom = new DefaultArtifact(artifact.getGroupId(), artifact.getArtifactId(), "pom",
                    artifact.getVersion());
            artifactRequest.setArtifact(pom);
            boolean pomReadingErrors = false;
            try {
                artifactResult = null;
                artifactResult = mvnSystem.resolveArtifact(session, artifactRequest);
            } catch (ArtifactResolutionException e1) {
                pomReadingErrors = true;
            }
            List<RemoteRepository> repoList = new ArrayList<RemoteRepository>();
            repoList.add(repos.get(CENTRAL_REPO));
            repoList.add(repos.get(repositoryUrl));
            if (artifactResult != null && artifactResult.getArtifact().getFile().exists()) {
                try {
                    SAXParser saxParser = SAXParserFactory.newInstance().newSAXParser();
                    saxParser.parse(artifactResult.getArtifact().getFile().getAbsolutePath(),
                            new POMReader(repoList));
                } catch (SAXException | IOException | ParserConfigurationException e) {
                    pomReadingErrors = true;
                }
            }
            if (pomReadingErrors) {
                logger.warn(
                        "Could not determine all relevant repositories, update might fail. Trying to continue ...");
                repoList.add(repos.get(KORPLING_MAVEN_REPO));
            }

            /* remove older version */
            if (installedBundle != null) {
                try {
                    if (!pepperOSGiConnector.remove(installedBundle.getSymbolicName())) {
                        logger.warn("Could not remove older version. Update process aborted.");
                        return false;
                    }
                } catch (BundleException | IOException e) {
                    logger.warn("An error occured while trying to remove OSGi bundle "
                            + installedBundle.getSymbolicName()
                            + ". This may cause update problems. Trying to continue ...");
                }
            }

            /* utils for file-collection */
            List<Artifact> installArtifacts = new ArrayList<Artifact>();
            installArtifacts.add(artifact);

            /* utils for dependency collection */
            CollectRequest collectRequest = new CollectRequest();
            collectRequest.setRoot(new Dependency(artifact, ""));
            collectRequest.setRepositories(repoList);
            CollectResult collectResult = mvnSystem.collectDependencies(session, collectRequest);
            List<Dependency> allDependencies = getAllDependencies(collectResult.getRoot(), true);

            /*
             * we have to remove the dependencies of pepperParent from the
             * dependency list, since they are (sometimes) not already on
             * the blacklist
             */
            String parentVersion = null;
            for (int i = 0; i < allDependencies.size() && parentVersion == null; i++) {
                if (ARTIFACT_ID_PEPPER_FRAMEWORK.equals(allDependencies.get(i).getArtifact().getArtifactId())) {
                    parentVersion = allDependencies.get(i).getArtifact().getVersion();
                }
            }
            if (parentVersion == null) {
                logger.warn(artifactId
                        + ": Could not perform update: pepper-parent version could not be determined.");
                return false;
            }
            VersionRange range = isCompatiblePlugin(parentVersion);
            if (!ignoreFrameworkVersion && range != null) {
                logger.info((new StringBuilder()).append(
                        "No update was performed because of a version incompatibility according to pepper-framework: ")
                        .append(newLine).append(artifactId).append(" only supports ").append(range.toString())
                        .append(", but ").append(pepperOSGiConnector.getFrameworkVersion())
                        .append(" is installed!").append(newLine)
                        .append("You can make pepper ignore this by using \"update")
                        .append(isSnapshot ? " snapshot " : " ").append("iv ").append(artifactId).append("\"")
                        .toString());
                return false;
            }
            allDependencies = cleanDependencies(allDependencies, session, parentVersion);
            Bundle bundle = null;
            Dependency dependency = null;
            // in the following we ignore the first dependency (i=0),
            // because it is the module itself
            for (int i = 1; i < allDependencies.size(); i++) {
                dependency = allDependencies.get(i);
                if (!ARTIFACT_ID_PEPPER_FRAMEWORK.equals(dependency.getArtifact().getArtifactId())) {
                    artifactRequest = new ArtifactRequest();
                    artifactRequest.setArtifact(dependency.getArtifact());
                    artifactRequest.setRepositories(repoList);
                    try {
                        artifactResult = mvnSystem.resolveArtifact(session, artifactRequest);
                        installArtifacts.add(artifactResult.getArtifact());
                    } catch (ArtifactResolutionException e) {
                        logger.warn("Artifact " + dependency.getArtifact().getArtifactId()
                                + " could not be resolved. Dependency will not be installed.");
                        if (!Boolean.parseBoolean(pepperOSGiConnector.getPepperStarterConfiguration()
                                .getProperty("pepper.forceUpdate").toString())) {
                            logger.error("Artifact ".concat(artifact.getArtifactId())
                                    .concat(" will not be installed. Resolution of dependency ")
                                    .concat(dependency.getArtifact().getArtifactId())
                                    .concat(" failed and \"force update\" is disabled in pepper.properties."));
                            return false;
                        }
                    }
                }
            }
            artifact = null;
            Artifact installArtifact = null;
            for (int i = installArtifacts.size() - 1; i >= 0; i--) {
                try {
                    installArtifact = installArtifacts.get(i);
                    logger.info("installing: " + installArtifact);
                    bundle = pepperOSGiConnector.installAndCopy(installArtifact.getFile().toURI());
                    if (i != 0) {// the module itself must not be put on the
                                 // blacklist
                        putOnBlacklist(installArtifact);
                    } else if (installedBundle != null) {
                        pepperOSGiConnector.remove(installedBundle.getSymbolicName());
                        logger.info(
                                "Successfully removed version ".concat(installedBundle.getVersion().toString())
                                        .concat(" of ").concat(artifactId));
                    }
                    if (bundle != null) {
                        bundle.start();
                    }
                } catch (IOException | BundleException e) {
                    if (logger.isTraceEnabled()) {
                        logger.trace("File could not be installed: " + installArtifact + " ("
                                + installArtifact.getFile() + "); " + e.getClass().getSimpleName());
                    } else {
                        logger.warn("File could not be installed: " + installArtifact.getFile());
                    }
                }
            }
            /*
             * btw: root is not supposed to be stored as forbidden
             * dependency. This makes the removal of a module much less
             * complicated. If a pepper module would be put onto the
             * blacklist and the bundle would be removed, we always had to
             * make sure, it its entry on the blacklist would be removed.
             * Assuming the entry would remain on the blacklist, the module
             * could be reinstalled, BUT(!) the dependencies would all be
             * dropped and never being installed again, since the modules
             * node dominates all other nodes in the dependency tree.
             */
            write2Blacklist();
        } else {
            logger.info("No (newer) version of " + artifactId + " could be found.");
        }
    } catch (VersionRangeResolutionException | InvalidVersionSpecificationException
            | DependencyCollectionException e) {
        if (e instanceof DependencyCollectionException) {
            Throwable t = e.getCause();
            while (t.getCause() != null) {
                t = t.getCause();
            }
            if (t instanceof ArtifactNotFoundException) {
                if (logger.isDebugEnabled()) {
                    logger.debug(t.getMessage(), e);
                } else {
                    logger.warn("Update of " + artifactId + " failed, could not resolve dependencies "/*
                                                                                                      * ,
                                                                                                      * e
                                                                                                      */);// TODO
                                                                                                           // decide
                }
            }
            if (t instanceof HttpResponseException) {
                logger.error("Dependency resolution failed!" + System.lineSeparator()
                        + "\tUnsatisfying http response: " + t.getMessage());
            }
        }
        update = false;
    }
    return update;
}

From source file:it.unimi.di.big.mg4j.document.WikipediaDocumentSequence.java

@Override
public DocumentIterator iterator() throws IOException {
    final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
    saxParserFactory.setNamespaceAware(true);
    final MutableString nameSpaceAccumulator = new MutableString();
    final ObjectOpenHashSet<MutableString> nameSpacesAccumulator = new ObjectOpenHashSet<MutableString>();
    final ArrayBlockingQueue<DocumentFactory> freeFactories = new ArrayBlockingQueue<DocumentFactory>(16);
    for (int i = freeFactories.remainingCapacity(); i-- != 0;)
        freeFactories.add(this.factory.copy());
    final ArrayBlockingQueue<DocumentAndFactory> readyDocumentsAndFactories = new ArrayBlockingQueue<DocumentAndFactory>(
            freeFactories.size());/* www  .j  av a 2s.  co m*/

    final SAXParser parser;
    try {
        parser = saxParserFactory.newSAXParser();
    } catch (Exception e) {
        throw new RuntimeException(e.getMessage(), e);
    }
    final DefaultHandler handler = new DefaultHandler() {
        private final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
        private boolean inText;
        private boolean inTitle;
        private boolean inId;
        private boolean inTimestamp;
        private boolean inNamespaceDef;
        private boolean redirect;
        private MutableString text = new MutableString();
        private MutableString title = new MutableString();
        private MutableString id = new MutableString();
        private MutableString timestamp = new MutableString();
        private final Reference2ObjectMap<Enum<?>, Object> metadata = new Reference2ObjectOpenHashMap<Enum<?>, Object>();
        {
            metadata.put(PropertyBasedDocumentFactory.MetadataKeys.ENCODING, "UTF-8");
            metadata.put(MetadataKeys.REDIRECT, redirectAnchors);
        }

        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes)
                throws SAXException {
            if ("page".equals(localName)) {
                redirect = inText = inTitle = inId = inTimestamp = false;
                text.length(0);
                title.length(0);
                id.length(0);
                timestamp.length(0);
            } else if ("text".equals(localName))
                inText = true;
            else if ("title".equals(localName) && title.length() == 0)
                inTitle = true; // We catch only the first id/title elements.
            else if ("id".equals(localName) && id.length() == 0)
                inId = true;
            else if ("timestamp".equals(localName) && timestamp.length() == 0)
                inTimestamp = true;
            else if ("redirect".equals(localName)) {
                redirect = true;
                if (attributes.getValue("title") != null)
                    // Accumulate the title of the page as virtual text of the redirect page.
                    synchronized (redirectAnchors) {
                        final String link = Encoder.encodeTitleToUrl(attributes.getValue("title"), true);
                        redirectAnchors.add(
                                new AnchorExtractor.Anchor(new MutableString(baseURL.length() + link.length())
                                        .append(baseURL).append(link), title.copy()));
                    }
            } else if ("namespace".equals(localName)) {
                // Found a new namespace
                inNamespaceDef = true;
                nameSpaceAccumulator.length(0);
            }
        }

        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            if ("namespace".equals(localName)) { // Collecting a namespace
                if (nameSpaceAccumulator.length() != 0)
                    nameSpacesAccumulator.add(nameSpaceAccumulator.copy().toLowerCase());
                return;
            }

            if ("namespaces".equals(localName)) { // All namespaces collected
                nameSpaces = ImmutableSet.copyOf(nameSpacesAccumulator);
                return;
            }

            if (!redirect) {
                if ("title".equals(localName)) {
                    // Set basic metadata for the page
                    metadata.put(PropertyBasedDocumentFactory.MetadataKeys.TITLE, title.copy());
                    String link = Encoder.encodeTitleToUrl(title.toString(), true);
                    metadata.put(PropertyBasedDocumentFactory.MetadataKeys.URI,
                            new MutableString(baseURL.length() + link.length()).append(baseURL).append(link));
                    inTitle = false;
                } else if ("id".equals(localName)) {
                    metadata.put(MetadataKeys.ID, Long.valueOf(id.toString()));
                    inId = false;
                } else if ("timestamp".equals(localName)) {
                    try {
                        metadata.put(MetadataKeys.LASTEDIT, dateFormat.parse(timestamp.toString()));
                    } catch (ParseException e) {
                        throw new RuntimeException(e.getMessage(), e);
                    }
                    inTimestamp = false;
                } else if ("text".equals(localName)) {
                    inText = false;
                    if (!keepNamespaced) {
                        // Namespaces are case-insensitive and language-dependent
                        final int pos = title.indexOf(':');
                        if (pos != -1 && nameSpaces.contains(title.substring(0, pos).toLowerCase()))
                            return;
                    }
                    try {
                        final MutableString html = new MutableString();
                        DocumentFactory freeFactory;
                        try {
                            freeFactory = freeFactories.take();
                        } catch (InterruptedException e) {
                            throw new RuntimeException(e.getMessage(), e);
                        }
                        if (parseText) {
                            if (DISAMBIGUATION.search(text) != -1) { // It's a disambiguation page.
                                /* Roi's hack: duplicate links using the page title, so the generic name will end up as anchor text. */
                                final MutableString newLinks = new MutableString();
                                for (int start = 0, end; (start = BRACKETS_OPEN.search(text,
                                        start)) != -1; start = end) {
                                    end = start;
                                    final int endOfLink = text.indexOfAnyOf(END_OF_DISAMBIGUATION_LINK, start);
                                    // Note that we don't escape title because we are working at the Wikipedia raw text level.
                                    if (endOfLink != -1) {
                                        newLinks.append(text.array(), start, endOfLink - start).append('|')
                                                .append(title).append("]]\n");
                                        end = endOfLink;
                                    }
                                    end++;
                                }

                                text.append(newLinks);
                            }
                            // We separate categories by OXOXO, so we don't get overflowing phrases.
                            final MutableString category = new MutableString();
                            for (int start = 0, end; (start = CATEGORY_START.search(text,
                                    start)) != -1; start = end) {
                                end = BRACKETS_CLOSED.search(text, start += CATEGORY_START.length());
                                if (end != -1)
                                    category.append(text.subSequence(start, end)).append(" OXOXO ");
                                else
                                    break;
                            }
                            metadata.put(MetadataKeys.CATEGORY, category);

                            // Heuristics to get the first paragraph
                            metadata.put(MetadataKeys.FIRSTPAR, new MutableString());
                            String plainText = wikiModel.render(new PlainTextConverter(true), text.toString());
                            for (int start = 0; start < plainText.length(); start++) {
                                //System.err.println("Examining " + plainText.charAt( start )  );
                                if (Character.isWhitespace(plainText.charAt(start)))
                                    continue;
                                if (plainText.charAt(start) == '{') {
                                    //System.err.print( "Braces " + start + " text: \"" + plainText.subSequence( start, start + 10 )  + "\" -> " );
                                    start = BRACES_CLOSED.search(plainText, start);
                                    //System.err.println( start + " text: \"" + plainText.subSequence( start, start + 10 ) + "\"" );
                                    if (start == -1)
                                        break;
                                    start++;
                                } else if (plainText.charAt(start) == '[') {
                                    start = BRACKETS_CLOSED.search(plainText, start);
                                    if (start == -1)
                                        break;
                                    start++;
                                } else {
                                    final int end = plainText.indexOf('\n', start);
                                    if (end != -1)
                                        metadata.put(MetadataKeys.FIRSTPAR,
                                                new MutableString(plainText.substring(start, end)));
                                    break;
                                }
                            }

                            try {
                                wikiModel.render(new HTMLConverter(), text.toString(), html, false, true);
                                final Map<String, String> categories = wikiModel.getCategories();
                                // Put back category links in the page (they have been parsed by bliki and to not appear anymore in the HTML rendering)
                                for (Entry<String, String> entry : categories.entrySet()) {
                                    final String key = entry.getKey();
                                    final String value = entry.getValue().trim();
                                    if (value.length() != 0) // There are empty such things
                                        html.append("\n<a href=\"").append(baseURL).append("Category:")
                                                .append(Encoder.encodeTitleToUrl(key, true)).append("\">")
                                                .append(HtmlEscapers.htmlEscaper().escape(key))
                                                .append("</a>\n");
                                }
                            } catch (Exception e) {
                                LOGGER.error("Unexpected exception while parsing " + title, e);
                            }
                        }
                        readyDocumentsAndFactories.put(new DocumentAndFactory(
                                freeFactory.getDocument(IOUtils.toInputStream(html, Charsets.UTF_8),
                                        new Reference2ObjectOpenHashMap<Enum<?>, Object>(metadata)),
                                freeFactory));
                    } catch (InterruptedException e) {
                        throw new RuntimeException(e.getMessage(), e);
                    } catch (IOException e) {
                        throw new RuntimeException(e.getMessage(), e);
                    }
                }
            }
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            if (inText && parseText)
                text.append(ch, start, length);
            if (inTitle)
                title.append(ch, start, length);
            if (inId)
                id.append(ch, start, length);
            if (inTimestamp)
                timestamp.append(ch, start, length);
            if (inNamespaceDef) {
                nameSpaceAccumulator.append(ch, start, length);
                inNamespaceDef = false; // Dirty, but it works
            }
        }

        @Override
        public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
            if (inText && parseText)
                text.append(ch, start, length);
            if (inTitle)
                title.append(ch, start, length);
        }
    };

    final Thread parsingThread = new Thread() {
        public void run() {
            try {
                InputStream in = new FileInputStream(wikipediaXmlDump);
                if (bzipped)
                    in = new BZip2CompressorInputStream(in);
                parser.parse(
                        new InputSource(new InputStreamReader(new FastBufferedInputStream(in), Charsets.UTF_8)),
                        handler);
                readyDocumentsAndFactories.put(END);
            } catch (Exception e) {
                throw new RuntimeException(e.getMessage(), e);
            }
        }
    };

    parsingThread.start();

    return new AbstractDocumentIterator() {
        private DocumentFactory lastFactory;

        @Override
        public Document nextDocument() throws IOException {
            try {
                final DocumentAndFactory documentAndFactory = readyDocumentsAndFactories.take();
                if (lastFactory != null)
                    freeFactories.put(lastFactory);
                if (documentAndFactory == END)
                    return null;
                lastFactory = documentAndFactory.factory;
                return documentAndFactory.document;
            } catch (InterruptedException e) {
                throw new RuntimeException(e.getMessage(), e);
            }
        }
    };
}