Example usage for org.xml.sax.helpers DefaultHandler DefaultHandler

List of usage examples for org.xml.sax.helpers DefaultHandler DefaultHandler

Introduction

In this page you can find the example usage for org.xml.sax.helpers DefaultHandler DefaultHandler.

Prototype

DefaultHandler

Source Link

Usage

From source file:co.anarquianegra.rockolappServidor.mundo.ListaReproductor.java

private void cargarCanciones(File archivo, String extension) throws IOException, SAXException, TikaException {
    cant++;/*from www  . j  av  a 2 s. c o m*/

    Parser parser = null;
    if (extension.equals("wav"))
        parser = new AudioParser();
    else if (extension.equals("mp3"))
        parser = new Mp3Parser();
    InputStream input = new FileInputStream(new File(archivo.toString()));
    ContentHandler handler = new DefaultHandler();
    Metadata metadata = new Metadata();
    ParseContext parseCtx = new ParseContext();
    parser.parse(input, handler, metadata, parseCtx);
    input.close();

    System.out.println("------------------------------------------");
    System.out.println(archivo.getCanonicalPath());

    System.out.println("Title: " + metadata.get("title"));
    System.out.println("Artists: " + metadata.get("xmpDM:artist"));
    System.out.println("Album: " + metadata.get("xmpDM:album"));
    if (metadata.get("title") != null && !metadata.get("title").equals("")
            && metadata.get("xmpDM:artist") != null && !metadata.get("xmpDM:artist").equals("")
            && metadata.get("xmpDM:album") != null && !metadata.get("xmpDM:album").equals("")) {
        Cancion c = new Cancion(metadata.get("title"), metadata.get("xmpDM:artist"),
                metadata.get("xmpDM:album"), archivo.getCanonicalPath());
        cancionesXnombre.agregar(c.darNombre(), c);
        cancionesXartista.agregar(c.darArtista(), c);
        cancionesXid.agregar(c.toString(), c);
    } else if (metadata.get("title") != null && !metadata.get("title").equals("")
            && metadata.get("xmpDM:artist") != null && !metadata.get("xmpDM:artist").equals("")) {
        Cancion c = new Cancion(metadata.get("title"), metadata.get("xmpDM:artist"),
                archivo.getCanonicalPath());
        cancionesXnombre.agregar(c.darNombre(), c);
        cancionesXartista.agregar(c.darArtista(), c);
        cancionesXid.agregar(c.toString(), c);

    } else if (metadata.get("title") != null && !metadata.get("title").equals("")) {
        Cancion c = new Cancion(metadata.get("title"), archivo.getCanonicalPath());
        cancionesXnombre.agregar(c.darNombre(), c);
        cancionesXartista.agregar(c.darArtista(), c);
        cancionesXid.agregar(c.toString(), c);
    } else {
        String[] sp = archivo.getName().split("\\.");

        if (sp.length == 2) {
            Cancion c = new Cancion(sp[0], archivo.getCanonicalPath());
            cancionesXnombre.agregar(c.darNombre(), c);
            cancionesXartista.agregar(c.darArtista(), c);
            cancionesXid.agregar(c.toString(), c);
            System.out.println("Nombre: " + c.darNombre());
        }

    }
}

From source file:com.centurylink.mdw.util.HttpHelper.java

/**
 * Use SAX for fastest parsing./*from w  w  w  .j a  v a 2  s  .  c om*/
 */
private URL parseResponseForHtmlMetaEquiv(String response)
        throws IOException, SAXException, ParserConfigurationException {
    final StringBuffer urlBuf = new StringBuffer();
    InputStream xmlStream = new ByteArrayInputStream(response.getBytes());
    InputSource src = new InputSource(xmlStream);
    SAXParserFactory parserFactory = SAXParserFactory.newInstance();
    SAXParser parser = parserFactory.newSAXParser();
    parser.parse(src, new DefaultHandler() {
        boolean inHtml;
        boolean inHead;
        boolean inMeta;

        public void startElement(String uri, String localName, String qName, Attributes attrs)
                throws SAXException {
            if (qName.equals("html"))
                inHtml = true;
            else if (qName.equals("head"))
                inHead = true;
            else if (qName.equals("meta"))
                inMeta = true;

            if (inHtml && inHead && inMeta) {
                if ("refresh".equals(attrs.getValue("http-equiv"))) {
                    String cAttr = attrs.getValue("content");
                    if (cAttr != null) {
                        int urlIdx = cAttr.indexOf("url=");
                        if (urlIdx >= 0)
                            urlBuf.append(cAttr.substring(urlIdx + 4));
                    }
                }
            }
        }

        public void endElement(String uri, String localName, String qName) throws SAXException {
            if (qName.equals("html"))
                inHtml = false;
            else if (qName.equals("head"))
                inHead = false;
            else if (qName.equals("meta"))
                inMeta = false;
        }
    });

    String str = urlBuf.toString().trim();
    if (str.isEmpty())
        return null;
    else
        return new URL(str);
}

From source file:self.philbrown.droidQuery.Ajax.java

protected TaskResponse doInBackground(Void... arg0) {
    if (this.isCancelled)
        return null;

    //if synchronous, block on the background thread until ready. Then call beforeSend, etc, before resuming.
    if (!beforeSendIsAsync) {
        try {//from  ww w . jav a 2s .  c om
            mutex.acquire();
        } catch (InterruptedException e) {
            Log.w("AjaxTask", "Synchronization Error. Running Task Async");
        }
        final Thread asyncThread = Thread.currentThread();
        isLocked = true;
        mHandler.post(new Runnable() {
            @Override
            public void run() {
                if (options.beforeSend() != null) {
                    if (options.context() != null)
                        options.beforeSend().invoke($.with(options.context()), options);
                    else
                        options.beforeSend().invoke(null, options);
                }

                if (options.isAborted()) {
                    cancel(true);
                    return;
                }

                if (options.global()) {
                    synchronized (globalTasks) {
                        if (globalTasks.isEmpty()) {
                            $.ajaxStart();
                        }
                        globalTasks.add(Ajax.this);
                    }
                    $.ajaxSend();
                } else {
                    synchronized (localTasks) {
                        localTasks.add(Ajax.this);
                    }
                }
                isLocked = false;
                LockSupport.unpark(asyncThread);
            }
        });
        if (isLocked)
            LockSupport.park();
    }

    //here is where to use the mutex

    //handle cached responses
    Object cachedResponse = AjaxCache.sharedCache().getCachedResponse(options);
    //handle ajax caching option
    if (cachedResponse != null && options.cache()) {
        Success s = new Success(cachedResponse);
        s.reason = "cached response";
        s.allHeaders = null;
        return s;

    }

    if (connection == null) {
        try {
            String type = options.type();
            URL url = new URL(options.url());
            if (type == null) {
                type = "GET";
            }
            if (type.equalsIgnoreCase("CUSTOM")) {

                try {
                    connection = options.customConnection();
                } catch (Exception e) {
                    connection = null;
                }

                if (connection == null) {
                    Log.w("droidQuery.ajax",
                            "CUSTOM type set, but AjaxOptions.customRequest is invalid. Defaulting to GET.");
                    connection = (HttpURLConnection) url.openConnection();
                    connection.setRequestMethod("GET");
                }
            } else {
                connection = (HttpURLConnection) url.openConnection();
                connection.setRequestMethod(type);
                if (type.equalsIgnoreCase("POST") || type.equalsIgnoreCase("PUT")) {
                    connection.setDoOutput(true);
                }
            }
        } catch (Throwable t) {
            if (options.debug())
                t.printStackTrace();
            Error e = new Error(null);
            AjaxError error = new AjaxError();
            error.connection = connection;
            error.options = options;
            e.status = 0;
            e.reason = "Bad Configuration";
            error.status = e.status;
            error.reason = e.reason;
            error.response = e.response;
            e.allHeaders = new Headers();
            e.error = error;
            return e;
        }

    }

    Map<String, Object> args = new HashMap<String, Object>();
    args.put("options", options);
    args.put("request", null);
    args.put("connection", connection);
    EventCenter.trigger("ajaxPrefilter", args, null);

    if (options.headers() != null) {
        if (options.headers().authorization() != null) {
            options.headers()
                    .authorization(options.headers().authorization() + " " + options.getEncodedCredentials());
        } else if (options.username() != null) {
            //guessing that authentication is basic
            options.headers().authorization("Basic " + options.getEncodedCredentials());
        }

        for (Entry<String, String> entry : options.headers().map().entrySet()) {
            connection.setRequestProperty(entry.getKey(), entry.getValue());
        }
    }

    if (options.data() != null) {
        try {
            OutputStream os = connection.getOutputStream();
            os.write(options.data().toString().getBytes());
            os.close();
        } catch (Throwable t) {
            Log.w("Ajax", "Could not post data");
        }
    }

    if (options.timeout() != 0) {
        connection.setConnectTimeout(options.timeout());
        connection.setReadTimeout(options.timeout());
    }

    if (options.trustedCertificate() != null) {

        Certificate ca = options.trustedCertificate();

        String keyStoreType = KeyStore.getDefaultType();
        KeyStore keyStore = null;
        try {
            keyStore = KeyStore.getInstance(keyStoreType);
            keyStore.load(null, null);
            keyStore.setCertificateEntry("ca", ca);
        } catch (KeyStoreException e) {
            if (options.debug())
                e.printStackTrace();
        } catch (NoSuchAlgorithmException e) {
            if (options.debug())
                e.printStackTrace();
        } catch (CertificateException e) {
            if (options.debug())
                e.printStackTrace();
        } catch (IOException e) {
            if (options.debug())
                e.printStackTrace();
        }

        if (keyStore == null) {
            Log.w("Ajax", "Could not configure trusted certificate");
        } else {
            try {
                //Create a TrustManager that trusts the CAs in our KeyStore
                String tmfAlgorithm = TrustManagerFactory.getDefaultAlgorithm();
                TrustManagerFactory tmf = TrustManagerFactory.getInstance(tmfAlgorithm);
                tmf.init(keyStore);

                //Create an SSLContext that uses our TrustManager
                SSLContext sslContext = SSLContext.getInstance("TLS");
                sslContext.init(null, tmf.getTrustManagers(), null);
                ((HttpsURLConnection) connection).setSSLSocketFactory(sslContext.getSocketFactory());
            } catch (KeyManagementException e) {
                if (options.debug())
                    e.printStackTrace();
            } catch (NoSuchAlgorithmException e) {
                if (options.debug())
                    e.printStackTrace();
            } catch (KeyStoreException e) {
                if (options.debug())
                    e.printStackTrace();
            }
        }
    }

    try {

        if (options.cookies() != null) {
            CookieManager cm = new CookieManager();
            CookieStore cookies = cm.getCookieStore();
            URI uri = URI.create(options.url());
            for (Entry<String, String> entry : options.cookies().entrySet()) {
                HttpCookie cookie = new HttpCookie(entry.getKey(), entry.getValue());
                cookies.add(uri, cookie);
            }
            connection.setRequestProperty("Cookie", TextUtils.join(",", cookies.getCookies()));
        }

        connection.connect();
        final int statusCode = connection.getResponseCode();
        final String message = connection.getResponseMessage();

        if (options.dataFilter() != null) {
            if (options.context() != null)
                options.dataFilter().invoke($.with(options.context()), connection, options.dataType());
            else
                options.dataFilter().invoke(null, connection, options.dataType());
        }

        final Function function = options.statusCode().get(statusCode);
        if (function != null) {
            mHandler.post(new Runnable() {

                @Override
                public void run() {
                    if (options.context() != null)
                        function.invoke($.with(options.context()), statusCode, options.clone());
                    else
                        function.invoke(null, statusCode, options.clone());
                }

            });

        }

        //handle dataType
        String dataType = options.dataType();
        if (dataType == null)
            dataType = "text";
        if (options.debug())
            Log.i("Ajax", "dataType = " + dataType);
        Object parsedResponse = null;
        InputStream stream = null;
        try {
            if (dataType.equalsIgnoreCase("text") || dataType.equalsIgnoreCase("html")) {
                if (options.debug())
                    Log.i("Ajax", "parsing text");
                stream = AjaxUtil.getInputStream(connection);
                parsedResponse = parseText(stream);
            } else if (dataType.equalsIgnoreCase("xml")) {
                if (options.debug())
                    Log.i("Ajax", "parsing xml");
                if (options.customXMLParser() != null) {
                    stream = AjaxUtil.getInputStream(connection);
                    if (options.SAXContentHandler() != null)
                        options.customXMLParser().parse(stream, options.SAXContentHandler());
                    else
                        options.customXMLParser().parse(stream, new DefaultHandler());
                    parsedResponse = "Response handled by custom SAX parser";
                } else if (options.SAXContentHandler() != null) {
                    stream = AjaxUtil.getInputStream(connection);
                    SAXParserFactory factory = SAXParserFactory.newInstance();

                    factory.setFeature("http://xml.org/sax/features/namespaces", false);
                    factory.setFeature("http://xml.org/sax/features/namespace-prefixes", true);

                    SAXParser parser = factory.newSAXParser();

                    XMLReader reader = parser.getXMLReader();
                    reader.setContentHandler(options.SAXContentHandler());
                    reader.parse(new InputSource(stream));
                    parsedResponse = "Response handled by custom SAX content handler";
                } else {
                    parsedResponse = parseXML(connection);
                }
            } else if (dataType.equalsIgnoreCase("json")) {
                if (options.debug())
                    Log.i("Ajax", "parsing json");
                parsedResponse = parseJSON(connection);
            } else if (dataType.equalsIgnoreCase("script")) {
                if (options.debug())
                    Log.i("Ajax", "parsing script");
                parsedResponse = parseScript(connection);
            } else if (dataType.equalsIgnoreCase("image")) {
                if (options.debug())
                    Log.i("Ajax", "parsing image");
                stream = AjaxUtil.getInputStream(connection);
                parsedResponse = parseImage(stream);
            } else if (dataType.equalsIgnoreCase("raw")) {
                if (options.debug())
                    Log.i("Ajax", "parsing raw data");
                parsedResponse = parseRawContent(connection);
            }
        } catch (ClientProtocolException cpe) {
            if (options.debug())
                cpe.printStackTrace();
            Error e = new Error(parsedResponse);
            AjaxError error = new AjaxError();
            error.connection = connection;
            error.options = options;
            e.status = statusCode;
            e.reason = message;
            error.status = e.status;
            error.reason = e.reason;
            error.response = e.response;
            e.allHeaders = Headers.createHeaders(connection.getHeaderFields());
            e.error = error;
            return e;
        } catch (Exception ioe) {
            if (options.debug())
                ioe.printStackTrace();
            Error e = new Error(parsedResponse);
            AjaxError error = new AjaxError();
            error.connection = connection;
            error.options = options;
            e.status = statusCode;
            e.reason = message;
            error.status = e.status;
            error.reason = e.reason;
            error.response = e.response;
            e.allHeaders = Headers.createHeaders(connection.getHeaderFields());
            e.error = error;
            return e;
        } finally {
            connection.disconnect();
            try {
                if (stream != null) {
                    stream.close();
                }
            } catch (IOException e) {
            }
        }

        if (statusCode >= 300) {
            //an error occurred
            Error e = new Error(parsedResponse);
            Log.e("Ajax Test", parsedResponse.toString());
            //AjaxError error = new AjaxError();
            //error.request = request;
            //error.options = options;
            e.status = e.status;
            e.reason = e.reason;
            //error.status = e.status;
            //error.reason = e.reason;
            //error.response = e.response;
            e.allHeaders = Headers.createHeaders(connection.getHeaderFields());
            //e.error = error;
            if (options.debug())
                Log.i("Ajax", "Error " + e.status + ": " + e.reason);
            return e;
        } else {
            //handle ajax ifModified option
            List<String> lastModifiedHeaders = connection.getHeaderFields().get("last-modified");
            if (lastModifiedHeaders.size() >= 1) {
                try {
                    String h = lastModifiedHeaders.get(0);
                    SimpleDateFormat format = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss zzz", Locale.US);
                    Date lastModified = format.parse(h);
                    if (options.ifModified() && lastModified != null) {
                        Date lastModifiedDate;
                        synchronized (lastModifiedUrls) {
                            lastModifiedDate = lastModifiedUrls.get(options.url());
                        }

                        if (lastModifiedDate != null && lastModifiedDate.compareTo(lastModified) == 0) {
                            //request response has not been modified. 
                            //Causes an error instead of a success.
                            Error e = new Error(parsedResponse);
                            AjaxError error = new AjaxError();
                            error.connection = connection;
                            error.options = options;
                            e.status = e.status;
                            e.reason = e.reason;
                            error.status = e.status;
                            error.reason = e.reason;
                            error.response = e.response;
                            e.allHeaders = Headers.createHeaders(connection.getHeaderFields());
                            e.error = error;
                            Function func = options.statusCode().get(304);
                            if (func != null) {
                                if (options.context() != null)
                                    func.invoke($.with(options.context()));
                                else
                                    func.invoke(null);
                            }
                            return e;
                        } else {
                            synchronized (lastModifiedUrls) {
                                lastModifiedUrls.put(options.url(), lastModified);
                            }
                        }
                    }
                } catch (Throwable t) {
                    Log.e("Ajax", "Could not parse Last-Modified Header", t);
                }

            }

            //Now handle a successful request

            Success s = new Success(parsedResponse);
            s.reason = message;
            s.allHeaders = Headers.createHeaders(connection.getHeaderFields());
            return s;
        }

    } catch (Throwable t) {
        if (options.debug())
            t.printStackTrace();
        if (t instanceof java.net.SocketTimeoutException) {
            Error e = new Error(null);
            AjaxError error = new AjaxError();
            error.connection = connection;
            error.options = options;
            error.response = e.response;
            e.status = 0;
            String reason = t.getMessage();
            if (reason == null)
                reason = "Socket Timeout";
            e.reason = reason;
            error.status = e.status;
            error.reason = e.reason;
            if (connection != null)
                e.allHeaders = Headers.createHeaders(connection.getHeaderFields());
            else
                e.allHeaders = new Headers();
            e.error = error;
            return e;
        }
        return null;
    }
}

From source file:com.flexive.core.storage.GenericDivisionImporter.java

/**
 * Import data from a zip archive to a database table
 *
 * @param stmt               statement to use
 * @param zip                zip archive containing the zip entry
 * @param ze                 zip entry within the archive
 * @param xpath              xpath containing the entries to import
 * @param table              name of the table
 * @param executeInsertPhase execute the insert phase?
 * @param executeUpdatePhase execute the update phase?
 * @param updateColumns      columns that should be set to <code>null</code> in a first pass (insert)
 *                           and updated to the provided values in a second pass (update),
 *                           columns that should be used in the where clause have to be prefixed
 *                           with "KEY:", to assign a default value use the expression "columnname:default value",
 *                           if the default value is "@", it will be a negative counter starting at 0, decreasing.
 *                           If the default value starts with "%", it will be set to the column following the "%"
 *                           character in the first pass
 * @throws Exception on errors//  w  w w .j av  a  2 s.c o  m
 */
protected void importTable(Statement stmt, final ZipFile zip, final ZipEntry ze, final String xpath,
        final String table, final boolean executeInsertPhase, final boolean executeUpdatePhase,
        final String... updateColumns) throws Exception {
    //analyze the table
    final ResultSet rs = stmt.executeQuery("SELECT * FROM " + table + " WHERE 1=2");
    StringBuilder sbInsert = new StringBuilder(500);
    StringBuilder sbUpdate = updateColumns.length > 0 ? new StringBuilder(500) : null;
    if (rs == null)
        throw new IllegalArgumentException("Can not analyze table [" + table + "]!");
    sbInsert.append("INSERT INTO ").append(table).append(" (");
    final ResultSetMetaData md = rs.getMetaData();
    final Map<String, ColumnInfo> updateClauseColumns = updateColumns.length > 0
            ? new HashMap<String, ColumnInfo>(md.getColumnCount())
            : null;
    final Map<String, ColumnInfo> updateSetColumns = updateColumns.length > 0
            ? new LinkedHashMap<String, ColumnInfo>(md.getColumnCount())
            : null;
    final Map<String, String> presetColumns = updateColumns.length > 0 ? new HashMap<String, String>(10) : null;
    //preset to a referenced column (%column syntax)
    final Map<String, String> presetRefColumns = updateColumns.length > 0 ? new HashMap<String, String>(10)
            : null;
    final Map<String, Integer> counters = updateColumns.length > 0 ? new HashMap<String, Integer>(10) : null;
    final Map<String, ColumnInfo> insertColumns = new HashMap<String, ColumnInfo>(
            md.getColumnCount() + (counters != null ? counters.size() : 0));
    int insertIndex = 1;
    int updateSetIndex = 1;
    int updateClauseIndex = 1;
    boolean first = true;
    for (int i = 0; i < md.getColumnCount(); i++) {
        final String currCol = md.getColumnName(i + 1).toLowerCase();
        if (updateColumns.length > 0) {
            boolean abort = false;
            for (String col : updateColumns) {
                if (col.indexOf(':') > 0 && !col.startsWith("KEY:")) {
                    String value = col.substring(col.indexOf(':') + 1);
                    col = col.substring(0, col.indexOf(':'));
                    if ("@".equals(value)) {
                        if (currCol.equalsIgnoreCase(col)) {
                            counters.put(col, 0);
                            insertColumns.put(col, new ColumnInfo(md.getColumnType(i + 1), insertIndex++));
                            sbInsert.append(',').append(currCol);
                        }
                    } else if (value.startsWith("%")) {
                        if (currCol.equalsIgnoreCase(col)) {
                            presetRefColumns.put(col, value.substring(1));
                            insertColumns.put(col, new ColumnInfo(md.getColumnType(i + 1), insertIndex++));
                            sbInsert.append(',').append(currCol);
                            //                                System.out.println("==> adding presetRefColumn "+col+" with value of "+value.substring(1));
                        }
                    } else if (!presetColumns.containsKey(col))
                        presetColumns.put(col, value);
                }
                if (currCol.equalsIgnoreCase(col)) {
                    abort = true;
                    updateSetColumns.put(currCol, new ColumnInfo(md.getColumnType(i + 1), updateSetIndex++));
                    break;
                }
            }
            if (abort)
                continue;
        }
        if (first) {
            first = false;
        } else
            sbInsert.append(',');
        sbInsert.append(currCol);
        insertColumns.put(currCol, new ColumnInfo(md.getColumnType(i + 1), insertIndex++));
    }
    if (updateColumns.length > 0 && executeUpdatePhase) {
        sbUpdate.append("UPDATE ").append(table).append(" SET ");
        int counter = 0;
        for (String updateColumn : updateSetColumns.keySet()) {
            if (counter++ > 0)
                sbUpdate.append(',');
            sbUpdate.append(updateColumn).append("=?");
        }
        sbUpdate.append(" WHERE ");
        boolean hasKeyColumn = false;
        for (String col : updateColumns) {
            if (!col.startsWith("KEY:"))
                continue;
            hasKeyColumn = true;
            String keyCol = col.substring(4);
            for (int i = 0; i < md.getColumnCount(); i++) {
                if (!md.getColumnName(i + 1).equalsIgnoreCase(keyCol))
                    continue;
                updateClauseColumns.put(keyCol, new ColumnInfo(md.getColumnType(i + 1), updateClauseIndex++));
                sbUpdate.append(keyCol).append("=? AND ");
                break;
            }

        }
        if (!hasKeyColumn)
            throw new IllegalArgumentException("Update columns require a KEY!");
        sbUpdate.delete(sbUpdate.length() - 5, sbUpdate.length()); //remove trailing " AND "
        //"shift" clause indices
        for (String col : updateClauseColumns.keySet()) {
            GenericDivisionImporter.ColumnInfo ci = updateClauseColumns.get(col);
            ci.index += (updateSetIndex - 1);
        }
    }
    if (presetColumns != null) {
        for (String key : presetColumns.keySet())
            sbInsert.append(',').append(key);
    }
    sbInsert.append(")VALUES(");
    for (int i = 0; i < insertColumns.size(); i++) {
        if (i > 0)
            sbInsert.append(',');
        sbInsert.append('?');
    }
    if (presetColumns != null) {
        for (String key : presetColumns.keySet())
            sbInsert.append(',').append(presetColumns.get(key));
    }
    sbInsert.append(')');
    if (DBG) {
        LOG.info("Insert statement:\n" + sbInsert.toString());
        if (updateColumns.length > 0)
            LOG.info("Update statement:\n" + sbUpdate.toString());
    }
    //build a map containing all nodes that require attributes
    //this allows for matching simple xpath queries like "flatstorages/storage[@name='FX_FLAT_STORAGE']/data"
    final Map<String, List<String>> queryAttributes = new HashMap<String, List<String>>(5);
    for (String pElem : xpath.split("/")) {
        if (!(pElem.indexOf('@') > 0 && pElem.indexOf('[') > 0))
            continue;
        List<String> att = new ArrayList<String>(5);
        for (String pAtt : pElem.split("@")) {
            if (!(pAtt.indexOf('=') > 0))
                continue;
            att.add(pAtt.substring(0, pAtt.indexOf('=')));
        }
        queryAttributes.put(pElem.substring(0, pElem.indexOf('[')), att);
    }
    final PreparedStatement psInsert = stmt.getConnection().prepareStatement(sbInsert.toString());
    final PreparedStatement psUpdate = updateColumns.length > 0 && executeUpdatePhase
            ? stmt.getConnection().prepareStatement(sbUpdate.toString())
            : null;
    try {
        final SAXParser parser = SAXParserFactory.newInstance().newSAXParser();
        final DefaultHandler handler = new DefaultHandler() {
            private String currentElement = null;
            private Map<String, String> data = new HashMap<String, String>(10);
            private StringBuilder sbData = new StringBuilder(10000);
            boolean inTag = false;
            boolean inElement = false;
            int counter;
            List<String> path = new ArrayList<String>(10);
            StringBuilder currPath = new StringBuilder(100);
            boolean insertMode = true;

            /**
             * {@inheritDoc}
             */
            @Override
            public void startDocument() throws SAXException {
                counter = 0;
                inTag = false;
                inElement = false;
                path.clear();
                currPath.setLength(0);
                sbData.setLength(0);
                data.clear();
                currentElement = null;
            }

            /**
             * {@inheritDoc}
             */
            @Override
            public void processingInstruction(String target, String data) throws SAXException {
                if (target != null && target.startsWith("fx_")) {
                    if (target.equals("fx_mode"))
                        insertMode = "insert".equals(data);
                } else
                    super.processingInstruction(target, data);
            }

            /**
             * {@inheritDoc}
             */
            @Override
            public void endDocument() throws SAXException {
                if (insertMode)
                    LOG.info("Imported [" + counter + "] entries into [" + table + "] for xpath [" + xpath
                            + "]");
                else
                    LOG.info("Updated [" + counter + "] entries in [" + table + "] for xpath [" + xpath + "]");
            }

            /**
             * {@inheritDoc}
             */
            @Override
            public void startElement(String uri, String localName, String qName, Attributes attributes)
                    throws SAXException {
                pushPath(qName, attributes);
                if (currPath.toString().equals(xpath)) {
                    inTag = true;
                    data.clear();
                    for (int i = 0; i < attributes.getLength(); i++) {
                        String name = attributes.getLocalName(i);
                        if (StringUtils.isEmpty(name))
                            name = attributes.getQName(i);
                        data.put(name, attributes.getValue(i));
                    }
                } else {
                    currentElement = qName;
                }
                inElement = true;
                sbData.setLength(0);
            }

            /**
             * Push a path element from the stack
             *
             * @param qName element name to push
             * @param att attributes
             */
            private void pushPath(String qName, Attributes att) {
                if (att.getLength() > 0 && queryAttributes.containsKey(qName)) {
                    String curr = qName + "[";
                    boolean first = true;
                    final List<String> attList = queryAttributes.get(qName);
                    for (int i = 0; i < att.getLength(); i++) {
                        if (!attList.contains(att.getQName(i)))
                            continue;
                        if (first)
                            first = false;
                        else
                            curr += ',';
                        curr += "@" + att.getQName(i) + "='" + att.getValue(i) + "'";
                    }
                    curr += ']';
                    path.add(curr);
                } else
                    path.add(qName);
                buildPath();
            }

            /**
             * Pop the top path element from the stack
             */
            private void popPath() {
                path.remove(path.size() - 1);
                buildPath();
            }

            /**
             * Rebuild the current path
             */
            private synchronized void buildPath() {
                currPath.setLength(0);
                for (String s : path)
                    currPath.append(s).append('/');
                if (currPath.length() > 1)
                    currPath.delete(currPath.length() - 1, currPath.length());
                //                    System.out.println("currPath: " + currPath);
            }

            /**
             * {@inheritDoc}
             */
            @Override
            public void endElement(String uri, String localName, String qName) throws SAXException {
                if (currPath.toString().equals(xpath)) {
                    if (DBG)
                        LOG.info("Insert [" + xpath + "]: [" + data + "]");
                    inTag = false;
                    try {
                        if (insertMode) {
                            if (executeInsertPhase) {
                                processColumnSet(insertColumns, psInsert);
                                counter += psInsert.executeUpdate();
                            }
                        } else {
                            if (executeUpdatePhase) {
                                if (processColumnSet(updateSetColumns, psUpdate)) {
                                    processColumnSet(updateClauseColumns, psUpdate);
                                    counter += psUpdate.executeUpdate();
                                }
                            }
                        }
                    } catch (SQLException e) {
                        throw new SAXException(e);
                    } catch (ParseException e) {
                        throw new SAXException(e);
                    }
                } else {
                    if (inTag) {
                        data.put(currentElement, sbData.toString());
                    }
                    currentElement = null;
                }
                popPath();
                inElement = false;
                sbData.setLength(0);
            }

            /**
             * Process a column set
             *
             * @param columns the columns to process
             * @param ps prepared statement to use
             * @return if data other than <code>null</code> has been set
             * @throws SQLException on errors
             * @throws ParseException on date/time conversion errors
             */
            private boolean processColumnSet(Map<String, ColumnInfo> columns, PreparedStatement ps)
                    throws SQLException, ParseException {
                boolean dataSet = false;
                for (String col : columns.keySet()) {
                    ColumnInfo ci = columns.get(col);
                    String value = data.get(col);
                    if (insertMode && counters != null && counters.get(col) != null) {
                        final int newVal = counters.get(col) - 1;
                        value = String.valueOf(newVal);
                        counters.put(col, newVal);
                        //                            System.out.println("new value for " + col + ": " + newVal);
                    }
                    if (insertMode && presetRefColumns != null && presetRefColumns.get(col) != null) {
                        value = data.get(presetRefColumns.get(col));
                        //                            System.out.println("Set presetRefColumn for "+col+" to ["+value+"] from column ["+presetRefColumns.get(col)+"]");
                    }

                    if (value == null)
                        ps.setNull(ci.index, ci.columnType);
                    else {
                        dataSet = true;
                        switch (ci.columnType) {
                        case Types.BIGINT:
                        case Types.NUMERIC:
                            if (DBG)
                                LOG.info("BigInt " + ci.index + "->" + new BigDecimal(value));
                            ps.setBigDecimal(ci.index, new BigDecimal(value));
                            break;
                        case java.sql.Types.DOUBLE:
                            if (DBG)
                                LOG.info("Double " + ci.index + "->" + Double.parseDouble(value));
                            ps.setDouble(ci.index, Double.parseDouble(value));
                            break;
                        case java.sql.Types.FLOAT:
                        case java.sql.Types.REAL:
                            if (DBG)
                                LOG.info("Float " + ci.index + "->" + Float.parseFloat(value));
                            ps.setFloat(ci.index, Float.parseFloat(value));
                            break;
                        case java.sql.Types.TIMESTAMP:
                        case java.sql.Types.DATE:
                            if (DBG)
                                LOG.info("Timestamp/Date " + ci.index + "->"
                                        + FxFormatUtils.getDateTimeFormat().parse(value));
                            ps.setTimestamp(ci.index,
                                    new Timestamp(FxFormatUtils.getDateTimeFormat().parse(value).getTime()));
                            break;
                        case Types.TINYINT:
                        case Types.SMALLINT:
                            if (DBG)
                                LOG.info("Integer " + ci.index + "->" + Integer.valueOf(value));
                            ps.setInt(ci.index, Integer.valueOf(value));
                            break;
                        case Types.INTEGER:
                        case Types.DECIMAL:
                            try {
                                if (DBG)
                                    LOG.info("Long " + ci.index + "->" + Long.valueOf(value));
                                ps.setLong(ci.index, Long.valueOf(value));
                            } catch (NumberFormatException e) {
                                //Fallback (temporary) for H2 if the reported long is a big decimal (tree...)
                                ps.setBigDecimal(ci.index, new BigDecimal(value));
                            }
                            break;
                        case Types.BIT:
                        case Types.CHAR:
                        case Types.BOOLEAN:
                            if (DBG)
                                LOG.info("Boolean " + ci.index + "->" + value);
                            if ("1".equals(value) || "true".equals(value))
                                ps.setBoolean(ci.index, true);
                            else
                                ps.setBoolean(ci.index, false);
                            break;
                        case Types.LONGVARBINARY:
                        case Types.VARBINARY:
                        case Types.BLOB:
                        case Types.BINARY:
                            ZipEntry bin = zip.getEntry(value);
                            if (bin == null) {
                                LOG.error("Failed to lookup binary [" + value + "]!");
                                ps.setNull(ci.index, ci.columnType);
                                break;
                            }
                            try {
                                ps.setBinaryStream(ci.index, zip.getInputStream(bin), (int) bin.getSize());
                            } catch (IOException e) {
                                LOG.error("IOException importing binary [" + value + "]: " + e.getMessage(), e);
                            }
                            break;
                        case Types.CLOB:
                        case Types.LONGVARCHAR:
                        case Types.VARCHAR:
                        case SQL_LONGNVARCHAR:
                        case SQL_NCHAR:
                        case SQL_NCLOB:
                        case SQL_NVARCHAR:
                            if (DBG)
                                LOG.info("String " + ci.index + "->" + value);
                            ps.setString(ci.index, value);
                            break;
                        default:
                            LOG.warn("Unhandled type [" + ci.columnType + "] for column [" + col + "]");
                        }
                    }
                }
                return dataSet;
            }

            /**
             * {@inheritDoc}
             */
            @Override
            public void characters(char[] ch, int start, int length) throws SAXException {
                if (inElement)
                    sbData.append(ch, start, length);
            }

        };
        handler.processingInstruction("fx_mode", "insert");
        parser.parse(zip.getInputStream(ze), handler);
        if (updateColumns.length > 0 && executeUpdatePhase) {
            handler.processingInstruction("fx_mode", "update");
            parser.parse(zip.getInputStream(ze), handler);
        }
    } finally {
        Database.closeObjects(GenericDivisionImporter.class, psInsert, psUpdate);
    }
}

From source file:efen.parsewiki.WikipediaDocumentSequence.java

@Override
public DocumentIterator iterator() throws IOException {
    final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
    saxParserFactory.setNamespaceAware(true);
    final MutableString nameSpaceAccumulator = new MutableString();
    final ObjectOpenHashSet<MutableString> nameSpacesAccumulator = new ObjectOpenHashSet<MutableString>();
    final ArrayBlockingQueue<DocumentFactory> freeFactories = new ArrayBlockingQueue<DocumentFactory>(16);
    for (int i = freeFactories.remainingCapacity(); i-- != 0;)
        freeFactories.add(this.factory.copy());
    final ArrayBlockingQueue<DocumentAndFactory> readyDocumentsAndFactories = new ArrayBlockingQueue<DocumentAndFactory>(
            freeFactories.size());//from  w  ww  .j a v a2s .  co m

    final SAXParser parser;
    try {
        parser = saxParserFactory.newSAXParser();
    } catch (Exception e) {
        throw new RuntimeException(e.getMessage(), e);
    }
    final DefaultHandler handler = new DefaultHandler() {
        private final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
        private boolean inText;
        private boolean inTitle;
        private boolean inId;
        private boolean inTimestamp;
        private boolean inNamespaceDef;
        private boolean redirect;
        private MutableString text = new MutableString();
        private MutableString title = new MutableString();
        private MutableString id = new MutableString();
        private MutableString timestamp = new MutableString();
        private final Reference2ObjectMap<Enum<?>, Object> metadata = new Reference2ObjectOpenHashMap<Enum<?>, Object>();
        {
            metadata.put(PropertyBasedDocumentFactory.MetadataKeys.ENCODING, "UTF-8");
            metadata.put(MetadataKeys.REDIRECT, redirectAnchors);
        }

        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes)
                throws SAXException {
            if ("page".equals(localName)) {
                redirect = inText = inTitle = inId = inTimestamp = false;
                text.length(0);
                title.length(0);
                id.length(0);
                timestamp.length(0);
            } else if ("text".equals(localName))
                inText = true;
            else if ("title".equals(localName) && title.length() == 0)
                inTitle = true; // We catch only the first id/title elements.
            else if ("id".equals(localName) && id.length() == 0)
                inId = true;
            else if ("timestamp".equals(localName) && timestamp.length() == 0)
                inTimestamp = true;
            else if ("redirect".equals(localName)) {
                redirect = true;
                if (attributes.getValue("title") != null)
                    // Accumulate the title of the page as virtual text of the redirect page.
                    synchronized (redirectAnchors) {
                        final String link = Encoder.encodeTitleToUrl(attributes.getValue("title"), true);
                        redirectAnchors.add(
                                new AnchorExtractor.Anchor(new MutableString(baseURL.length() + link.length())
                                        .append(baseURL).append(link), title.copy()));
                    }
            } else if ("namespace".equals(localName)) {
                // Found a new namespace
                inNamespaceDef = true;
                nameSpaceAccumulator.length(0);
            }
        }

        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            if ("namespace".equals(localName)) { // Collecting a namespace
                if (nameSpaceAccumulator.length() != 0)
                    nameSpacesAccumulator.add(nameSpaceAccumulator.copy().toLowerCase());
                return;
            }

            if ("namespaces".equals(localName)) { // All namespaces collected
                nameSpaces = ImmutableSet.copyOf(nameSpacesAccumulator);
                return;
            }

            if (!redirect) {
                if ("title".equals(localName)) {
                    // Set basic metadata for the page
                    metadata.put(PropertyBasedDocumentFactory.MetadataKeys.TITLE, title.copy());
                    String link = Encoder.encodeTitleToUrl(title.toString(), true);
                    metadata.put(PropertyBasedDocumentFactory.MetadataKeys.URI,
                            new MutableString(baseURL.length() + link.length()).append(baseURL).append(link));
                    inTitle = false;
                } else if ("id".equals(localName)) {
                    metadata.put(MetadataKeys.ID, Long.valueOf(id.toString()));
                    inId = false;
                } else if ("timestamp".equals(localName)) {
                    try {
                        metadata.put(MetadataKeys.LASTEDIT, dateFormat.parse(timestamp.toString()));
                    } catch (ParseException e) {
                        throw new RuntimeException(e.getMessage(), e);
                    }
                    inTimestamp = false;
                } else if ("text".equals(localName)) {
                    inText = false;
                    if (!keepNamespaced) {
                        // Namespaces are case-insensitive and language-dependent
                        final int pos = title.indexOf(':');
                        if (pos != -1 && isATrueNamespace(title.substring(0, pos)))
                            return;
                    }
                    try {
                        final MutableString html = new MutableString();
                        DocumentFactory freeFactory;
                        try {
                            freeFactory = freeFactories.take();
                        } catch (InterruptedException e) {
                            throw new RuntimeException(e.getMessage(), e);
                        }
                        if (parseText) {
                            if (DISAMBIGUATION.search(text) != -1) { // It's a disambiguation page.
                                /* Roi's hack: duplicate links using the page title, so the generic name will end up as anchor text. */
                                final MutableString newLinks = new MutableString();
                                for (int start = 0, end; (start = BRACKETS_OPEN.search(text,
                                        start)) != -1; start = end) {
                                    end = start;
                                    final int endOfLink = text.indexOfAnyOf(END_OF_DISAMBIGUATION_LINK, start);
                                    // Note that we don't escape title because we are working at the Wikipedia raw text level.
                                    if (endOfLink != -1) {
                                        newLinks.append(text.array(), start, endOfLink - start).append('|')
                                                .append(title).append("]]\n");
                                        end = endOfLink;
                                    }
                                    end++;
                                }

                                text.append(newLinks);
                            }
                            // We separate categories by OXOXO, so we don't get overflowing phrases.
                            final MutableString category = new MutableString();
                            for (int start = 0, end; (start = CATEGORY_START.search(text,
                                    start)) != -1; start = end) {
                                end = BRACKETS_CLOSED.search(text, start += CATEGORY_START.length());
                                if (end != -1)
                                    category.append(text.subSequence(start, end)).append(" OXOXO ");
                                else
                                    break;
                            }
                            metadata.put(MetadataKeys.CATEGORY, category);

                            // Heuristics to get the first paragraph
                            metadata.put(MetadataKeys.FIRSTPAR, new MutableString());
                            String plainText = new WikiModel(imageBaseURL, linkBaseURL)
                                    .render(new PlainTextConverter(true), text.toString());
                            for (int start = 0; start < plainText.length(); start++) {
                                //System.err.println("Examining " + plainText.charAt( start )  );
                                if (Character.isWhitespace(plainText.charAt(start)))
                                    continue;
                                if (plainText.charAt(start) == '{') {
                                    //System.err.print( "Braces " + start + " text: \"" + plainText.subSequence( start, start + 10 )  + "\" -> " );
                                    start = BRACES_CLOSED.search(plainText, start);
                                    //System.err.println( start + " text: \"" + plainText.subSequence( start, start + 10 ) + "\"" );
                                    if (start == -1)
                                        break;
                                    start++;
                                } else if (plainText.charAt(start) == '[') {
                                    start = BRACKETS_CLOSED.search(plainText, start);
                                    if (start == -1)
                                        break;
                                    start++;
                                } else {
                                    final int end = plainText.indexOf('\n', start);
                                    if (end != -1)
                                        metadata.put(MetadataKeys.FIRSTPAR,
                                                new MutableString(plainText.substring(start, end)));//new MutableString( new WikiModel( imageBaseURL, linkBaseURL ).render( new PlainTextConverter( true ), text.substring( start, end ).toString() ) ) );
                                    break;
                                }
                            }

                            try {
                                WikiModel wikiModel = new WikiModel(imageBaseURL, linkBaseURL);
                                wikiModel.render(new HTMLConverter(), text.toString(), html, false, false);
                                final Map<String, String> categories = wikiModel.getCategories();
                                // Put back category links in the page (they have been parsed by bliki and to not appear anymore in the HTML rendering)
                                for (Entry<String, String> entry : categories.entrySet()) {
                                    final String key = entry.getKey();
                                    final String value = entry.getValue().trim();
                                    if (value.length() != 0) // There are empty such things
                                        html.append("\n<a href=\"").append(baseURL).append("Category:")
                                                .append(Encoder.encodeTitleToUrl(key, true)).append("\">")
                                                .append(HtmlEscapers.htmlEscaper().escape(key))
                                                .append("</a>\n");
                                }
                            } catch (Exception e) {
                                LOGGER.error("Unexpected exception while parsing " + title, e);
                            }
                        }
                        readyDocumentsAndFactories.put(new DocumentAndFactory(
                                freeFactory.getDocument(IOUtils.toInputStream(html, Charsets.UTF_8),
                                        new Reference2ObjectOpenHashMap<Enum<?>, Object>(metadata)),
                                freeFactory));
                    } catch (InterruptedException e) {
                        throw new RuntimeException(e.getMessage(), e);
                    } catch (IOException e) {
                        throw new RuntimeException(e.getMessage(), e);
                    }
                }
            }
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            if (inText && parseText)
                text.append(ch, start, length);
            if (inTitle)
                title.append(ch, start, length);
            if (inId)
                id.append(ch, start, length);
            if (inTimestamp)
                timestamp.append(ch, start, length);
            if (inNamespaceDef) {
                nameSpaceAccumulator.append(ch, start, length);
                inNamespaceDef = false; // Dirty, but it works
            }
        }

        @Override
        public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
            if (inText && parseText)
                text.append(ch, start, length);
            if (inTitle)
                title.append(ch, start, length);
        }
    };

    final Thread parsingThread = new Thread() {
        public void run() {
            try {
                InputStream in = new FileInputStream(wikipediaXmlDump);
                if (bzipped)
                    in = new BZip2CompressorInputStream(in);
                parser.parse(
                        new InputSource(new InputStreamReader(new FastBufferedInputStream(in), Charsets.UTF_8)),
                        handler);
                readyDocumentsAndFactories.put(END);
            } catch (Exception e) {
                throw new RuntimeException(e.getMessage(), e);
            }
        }
    };

    parsingThread.start();

    return new AbstractDocumentIterator() {
        private DocumentFactory lastFactory;

        @Override
        public Document nextDocument() throws IOException {
            try {
                final DocumentAndFactory documentAndFactory = readyDocumentsAndFactories.take();
                if (lastFactory != null)
                    freeFactories.put(lastFactory);
                if (documentAndFactory == END)
                    return null;
                lastFactory = documentAndFactory.factory;
                return documentAndFactory.document;
            } catch (InterruptedException e) {
                throw new RuntimeException(e.getMessage(), e);
            }
        }
    };
}

From source file:it.unimi.di.big.mg4j.document.WikipediaDocumentSequence.java

@Override
public DocumentIterator iterator() throws IOException {
    final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
    saxParserFactory.setNamespaceAware(true);
    final MutableString nameSpaceAccumulator = new MutableString();
    final ObjectOpenHashSet<MutableString> nameSpacesAccumulator = new ObjectOpenHashSet<MutableString>();
    final ArrayBlockingQueue<DocumentFactory> freeFactories = new ArrayBlockingQueue<DocumentFactory>(16);
    for (int i = freeFactories.remainingCapacity(); i-- != 0;)
        freeFactories.add(this.factory.copy());
    final ArrayBlockingQueue<DocumentAndFactory> readyDocumentsAndFactories = new ArrayBlockingQueue<DocumentAndFactory>(
            freeFactories.size());//from   w  w  w . ja v a 2  s.c  o m

    final SAXParser parser;
    try {
        parser = saxParserFactory.newSAXParser();
    } catch (Exception e) {
        throw new RuntimeException(e.getMessage(), e);
    }
    final DefaultHandler handler = new DefaultHandler() {
        private final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'");
        private boolean inText;
        private boolean inTitle;
        private boolean inId;
        private boolean inTimestamp;
        private boolean inNamespaceDef;
        private boolean redirect;
        private MutableString text = new MutableString();
        private MutableString title = new MutableString();
        private MutableString id = new MutableString();
        private MutableString timestamp = new MutableString();
        private final Reference2ObjectMap<Enum<?>, Object> metadata = new Reference2ObjectOpenHashMap<Enum<?>, Object>();
        {
            metadata.put(PropertyBasedDocumentFactory.MetadataKeys.ENCODING, "UTF-8");
            metadata.put(MetadataKeys.REDIRECT, redirectAnchors);
        }

        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes)
                throws SAXException {
            if ("page".equals(localName)) {
                redirect = inText = inTitle = inId = inTimestamp = false;
                text.length(0);
                title.length(0);
                id.length(0);
                timestamp.length(0);
            } else if ("text".equals(localName))
                inText = true;
            else if ("title".equals(localName) && title.length() == 0)
                inTitle = true; // We catch only the first id/title elements.
            else if ("id".equals(localName) && id.length() == 0)
                inId = true;
            else if ("timestamp".equals(localName) && timestamp.length() == 0)
                inTimestamp = true;
            else if ("redirect".equals(localName)) {
                redirect = true;
                if (attributes.getValue("title") != null)
                    // Accumulate the title of the page as virtual text of the redirect page.
                    synchronized (redirectAnchors) {
                        final String link = Encoder.encodeTitleToUrl(attributes.getValue("title"), true);
                        redirectAnchors.add(
                                new AnchorExtractor.Anchor(new MutableString(baseURL.length() + link.length())
                                        .append(baseURL).append(link), title.copy()));
                    }
            } else if ("namespace".equals(localName)) {
                // Found a new namespace
                inNamespaceDef = true;
                nameSpaceAccumulator.length(0);
            }
        }

        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            if ("namespace".equals(localName)) { // Collecting a namespace
                if (nameSpaceAccumulator.length() != 0)
                    nameSpacesAccumulator.add(nameSpaceAccumulator.copy().toLowerCase());
                return;
            }

            if ("namespaces".equals(localName)) { // All namespaces collected
                nameSpaces = ImmutableSet.copyOf(nameSpacesAccumulator);
                return;
            }

            if (!redirect) {
                if ("title".equals(localName)) {
                    // Set basic metadata for the page
                    metadata.put(PropertyBasedDocumentFactory.MetadataKeys.TITLE, title.copy());
                    String link = Encoder.encodeTitleToUrl(title.toString(), true);
                    metadata.put(PropertyBasedDocumentFactory.MetadataKeys.URI,
                            new MutableString(baseURL.length() + link.length()).append(baseURL).append(link));
                    inTitle = false;
                } else if ("id".equals(localName)) {
                    metadata.put(MetadataKeys.ID, Long.valueOf(id.toString()));
                    inId = false;
                } else if ("timestamp".equals(localName)) {
                    try {
                        metadata.put(MetadataKeys.LASTEDIT, dateFormat.parse(timestamp.toString()));
                    } catch (ParseException e) {
                        throw new RuntimeException(e.getMessage(), e);
                    }
                    inTimestamp = false;
                } else if ("text".equals(localName)) {
                    inText = false;
                    if (!keepNamespaced) {
                        // Namespaces are case-insensitive and language-dependent
                        final int pos = title.indexOf(':');
                        if (pos != -1 && nameSpaces.contains(title.substring(0, pos).toLowerCase()))
                            return;
                    }
                    try {
                        final MutableString html = new MutableString();
                        DocumentFactory freeFactory;
                        try {
                            freeFactory = freeFactories.take();
                        } catch (InterruptedException e) {
                            throw new RuntimeException(e.getMessage(), e);
                        }
                        if (parseText) {
                            if (DISAMBIGUATION.search(text) != -1) { // It's a disambiguation page.
                                /* Roi's hack: duplicate links using the page title, so the generic name will end up as anchor text. */
                                final MutableString newLinks = new MutableString();
                                for (int start = 0, end; (start = BRACKETS_OPEN.search(text,
                                        start)) != -1; start = end) {
                                    end = start;
                                    final int endOfLink = text.indexOfAnyOf(END_OF_DISAMBIGUATION_LINK, start);
                                    // Note that we don't escape title because we are working at the Wikipedia raw text level.
                                    if (endOfLink != -1) {
                                        newLinks.append(text.array(), start, endOfLink - start).append('|')
                                                .append(title).append("]]\n");
                                        end = endOfLink;
                                    }
                                    end++;
                                }

                                text.append(newLinks);
                            }
                            // We separate categories by OXOXO, so we don't get overflowing phrases.
                            final MutableString category = new MutableString();
                            for (int start = 0, end; (start = CATEGORY_START.search(text,
                                    start)) != -1; start = end) {
                                end = BRACKETS_CLOSED.search(text, start += CATEGORY_START.length());
                                if (end != -1)
                                    category.append(text.subSequence(start, end)).append(" OXOXO ");
                                else
                                    break;
                            }
                            metadata.put(MetadataKeys.CATEGORY, category);

                            // Heuristics to get the first paragraph
                            metadata.put(MetadataKeys.FIRSTPAR, new MutableString());
                            String plainText = wikiModel.render(new PlainTextConverter(true), text.toString());
                            for (int start = 0; start < plainText.length(); start++) {
                                //System.err.println("Examining " + plainText.charAt( start )  );
                                if (Character.isWhitespace(plainText.charAt(start)))
                                    continue;
                                if (plainText.charAt(start) == '{') {
                                    //System.err.print( "Braces " + start + " text: \"" + plainText.subSequence( start, start + 10 )  + "\" -> " );
                                    start = BRACES_CLOSED.search(plainText, start);
                                    //System.err.println( start + " text: \"" + plainText.subSequence( start, start + 10 ) + "\"" );
                                    if (start == -1)
                                        break;
                                    start++;
                                } else if (plainText.charAt(start) == '[') {
                                    start = BRACKETS_CLOSED.search(plainText, start);
                                    if (start == -1)
                                        break;
                                    start++;
                                } else {
                                    final int end = plainText.indexOf('\n', start);
                                    if (end != -1)
                                        metadata.put(MetadataKeys.FIRSTPAR,
                                                new MutableString(plainText.substring(start, end)));
                                    break;
                                }
                            }

                            try {
                                wikiModel.render(new HTMLConverter(), text.toString(), html, false, true);
                                final Map<String, String> categories = wikiModel.getCategories();
                                // Put back category links in the page (they have been parsed by bliki and to not appear anymore in the HTML rendering)
                                for (Entry<String, String> entry : categories.entrySet()) {
                                    final String key = entry.getKey();
                                    final String value = entry.getValue().trim();
                                    if (value.length() != 0) // There are empty such things
                                        html.append("\n<a href=\"").append(baseURL).append("Category:")
                                                .append(Encoder.encodeTitleToUrl(key, true)).append("\">")
                                                .append(HtmlEscapers.htmlEscaper().escape(key))
                                                .append("</a>\n");
                                }
                            } catch (Exception e) {
                                LOGGER.error("Unexpected exception while parsing " + title, e);
                            }
                        }
                        readyDocumentsAndFactories.put(new DocumentAndFactory(
                                freeFactory.getDocument(IOUtils.toInputStream(html, Charsets.UTF_8),
                                        new Reference2ObjectOpenHashMap<Enum<?>, Object>(metadata)),
                                freeFactory));
                    } catch (InterruptedException e) {
                        throw new RuntimeException(e.getMessage(), e);
                    } catch (IOException e) {
                        throw new RuntimeException(e.getMessage(), e);
                    }
                }
            }
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            if (inText && parseText)
                text.append(ch, start, length);
            if (inTitle)
                title.append(ch, start, length);
            if (inId)
                id.append(ch, start, length);
            if (inTimestamp)
                timestamp.append(ch, start, length);
            if (inNamespaceDef) {
                nameSpaceAccumulator.append(ch, start, length);
                inNamespaceDef = false; // Dirty, but it works
            }
        }

        @Override
        public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
            if (inText && parseText)
                text.append(ch, start, length);
            if (inTitle)
                title.append(ch, start, length);
        }
    };

    final Thread parsingThread = new Thread() {
        public void run() {
            try {
                InputStream in = new FileInputStream(wikipediaXmlDump);
                if (bzipped)
                    in = new BZip2CompressorInputStream(in);
                parser.parse(
                        new InputSource(new InputStreamReader(new FastBufferedInputStream(in), Charsets.UTF_8)),
                        handler);
                readyDocumentsAndFactories.put(END);
            } catch (Exception e) {
                throw new RuntimeException(e.getMessage(), e);
            }
        }
    };

    parsingThread.start();

    return new AbstractDocumentIterator() {
        private DocumentFactory lastFactory;

        @Override
        public Document nextDocument() throws IOException {
            try {
                final DocumentAndFactory documentAndFactory = readyDocumentsAndFactories.take();
                if (lastFactory != null)
                    freeFactories.put(lastFactory);
                if (documentAndFactory == END)
                    return null;
                lastFactory = documentAndFactory.factory;
                return documentAndFactory.document;
            } catch (InterruptedException e) {
                throw new RuntimeException(e.getMessage(), e);
            }
        }
    };
}

From source file:io.datalayer.conf.XmlConfigurationTest.java

/**
 * Creates a validating document builder.
 * /*  w  w w  .  j a  v a 2 s .  c o m*/
 * @return the document builder
 * @throws ParserConfigurationException
 *             if an error occurs
 */
private DocumentBuilder createValidatingDocBuilder() throws ParserConfigurationException {
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    factory.setValidating(true);
    DocumentBuilder builder = factory.newDocumentBuilder();
    builder.setErrorHandler(new DefaultHandler() {
        @Override
        public void error(SAXParseException ex) throws SAXException {
            throw ex;
        }
    });
    return builder;
}

From source file:org.hil.webservice.mobile.impl.ChildrenWebServiceImpl.java

private void parseXml(String xml) {
    list = new ArrayList<Children>();
    tmpAuth = "";
    tmpAuthor = "";
    SAXParserFactory factory = SAXParserFactory.newInstance();
    try {//from   w  ww . j ava2  s  . c om
        SAXParser saxParser = factory.newSAXParser();

        DefaultHandler handler = new DefaultHandler() {

            Children tempChild;
            String tempVal = "";

            public void startElement(String uri, String localName, String qName, Attributes attributes)
                    throws SAXException {
                if (qName.equalsIgnoreCase("Children")) {
                    tmpAuth = attributes.getValue("sessionAuth");

                    if (!tmpAuth.equalsIgnoreCase("sessionAuth"))
                        return;
                    tmpAuthor = attributes.getValue("author");
                    force = Boolean.parseBoolean(attributes.getValue("force"));
                }
                tempVal = "";
                if (qName.equalsIgnoreCase("Child")) {
                    tempChild = new Children();
                }
            }

            public void endElement(String uri, String localName, String qName) throws SAXException {
                if (qName.equalsIgnoreCase("Child")) {
                    //add it to the list
                    list.add(tempChild);
                } else if (qName.equalsIgnoreCase("id")) {
                    if (tempVal.length() > 0)
                        tempChild.setId(Long.parseLong(tempVal));
                } else if (qName.equalsIgnoreCase("fullName")) {
                    if (tempVal.length() > 0)
                        tempChild.setFullName(tempVal);
                } else if (qName.equalsIgnoreCase("dateOfBirth")) {
                    if (tempVal.length() > 0) {
                        SimpleDateFormat format = new SimpleDateFormat("dd/MM/yyyy");
                        try {
                            Date bdate = format.parse(tempVal);
                            tempChild.setDateOfBirth(bdate);
                        } catch (ParseException e) {
                            e.printStackTrace();
                        }
                    }
                } else if (qName.equalsIgnoreCase("gender")) {
                    if (tempVal.length() > 0)
                        tempChild.setGender(Boolean.parseBoolean(tempVal));
                } else if (qName.equalsIgnoreCase("childCode")) {
                    if (tempVal.length() > 0)
                        tempChild.setChildCode(tempVal);
                } else if (qName.equalsIgnoreCase("fatherName")) {
                    if (tempVal.length() > 0)
                        tempChild.setFatherName(tempVal);
                } else if (qName.equalsIgnoreCase("fatherBirthYear")) {
                    if (tempVal.length() > 0)
                        tempChild.setFatherBirthYear(Integer.parseInt(tempVal));
                } else if (qName.equalsIgnoreCase("fatherID")) {
                    if (tempVal.length() > 0)
                        tempChild.setFatherID(tempVal);
                } else if (qName.equalsIgnoreCase("fatherMobile")) {
                    if (tempVal.length() > 0)
                        tempChild.setFatherMobile(tempVal);
                } else if (qName.equalsIgnoreCase("motherName")) {
                    if (tempVal.length() > 0)
                        tempChild.setMotherName(tempVal);
                } else if (qName.equalsIgnoreCase("motherBirthYear")) {
                    if (tempVal.length() > 0)
                        tempChild.setMotherBirthYear(Integer.parseInt(tempVal));
                } else if (qName.equalsIgnoreCase("motherID")) {
                    if (tempVal.length() > 0)
                        tempChild.setMotherID(tempVal);
                } else if (qName.equalsIgnoreCase("motherMobile")) {
                    if (tempVal.length() > 0)
                        tempChild.setMotherMobile(tempVal);
                } else if (qName.equalsIgnoreCase("caretakerName")) {
                    if (tempVal.length() > 0)
                        tempChild.setCaretakerName(tempVal);
                } else if (qName.equalsIgnoreCase("caretakerBirthYear")) {
                    if (tempVal.length() > 0)
                        tempChild.setCaretakerBirthYear(Integer.parseInt(tempVal));
                } else if (qName.equalsIgnoreCase("caretakerID")) {
                    if (tempVal.length() > 0)
                        tempChild.setCaretakerID(tempVal);
                } else if (qName.equalsIgnoreCase("caretakerMobile")) {
                    if (tempVal.length() > 0)
                        tempChild.setCaretakerMobile(tempVal);
                } else if (qName.equalsIgnoreCase("currentCaretaker")) {
                    if (tempVal.length() > 0)
                        tempChild.setCurrentCaretaker(Short.parseShort(tempVal));
                } else if (qName.equalsIgnoreCase("villageId")) {
                    if (tempVal.length() > 0) {
                        tempChild.setVillage(villageDao.get(Long.parseLong(tempVal)));
                    }
                }
            }

            public void characters(char ch[], int start, int length) throws SAXException {
                tempVal = new String(ch, start, length);
            }

        };
        saxParser.parse(new InputSource(new StringReader(xml)), handler);
    } catch (ParserConfigurationException e) {
        e.printStackTrace();
    } catch (SAXException e) {
        e.printStackTrace();
    } catch (IOException e) {
        // TODO Auto-generated catch block
        e.printStackTrace();
    }

}

From source file:efen.parsewiki.WikipediaDocumentSequence.java

public static void main(final String arg[])
        throws ParserConfigurationException, SAXException, IOException, JSAPException, ClassNotFoundException {
    SimpleJSAP jsap = new SimpleJSAP(WikipediaDocumentSequence.class.getName(),
            "Computes the redirects of a Wikipedia dump and integrate them into an existing virtual document resolver for the dump.",
            new Parameter[] { new Switch("bzip2", 'b', "bzip2", "The file is compressed with bzip2"),
                    new Switch("iso", 'i', "iso",
                            "Use ISO-8859-1 coding internally (i.e., just use the lower eight bits of each character)."),
                    new FlaggedOption("width", JSAP.INTEGER_PARSER, Integer.toString(Long.SIZE),
                            JSAP.NOT_REQUIRED, 'w', "width",
                            "The width, in bits, of the signatures used to sign the function from URIs to their rank."),
                    new UnflaggedOption("file", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The file containing the Wikipedia dump."),
                    new UnflaggedOption("baseURL", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The base URL for the collection (e.g., http://en.wikipedia.org/wiki/)."),
                    new UnflaggedOption("uris", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The URIs of the documents in the collection (generated by ScanMetadata)."),
                    new UnflaggedOption("vdr", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The name of a precomputed virtual document resolver for the collection."),
                    new UnflaggedOption("redvdr", JSAP.STRING_PARSER, JSAP.REQUIRED,
                            "The name of the resulting virtual document resolver.") });

    JSAPResult jsapResult = jsap.parse(arg);
    if (jsap.messagePrinted())
        return;/* w w w  .j a  v  a  2 s.  c  om*/

    final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance();
    saxParserFactory.setNamespaceAware(true);
    final Object2ObjectOpenHashMap<MutableString, String> redirects = new Object2ObjectOpenHashMap<MutableString, String>();
    final String baseURL = jsapResult.getString("baseURL");
    final ProgressLogger progressLogger = new ProgressLogger(LOGGER);
    progressLogger.itemsName = "redirects";
    progressLogger.start("Extracting redirects...");

    final SAXParser parser = saxParserFactory.newSAXParser();
    final DefaultHandler handler = new DefaultHandler() {
        private boolean inTitle;
        private MutableString title = new MutableString();

        @Override
        public void startElement(String uri, String localName, String qName, Attributes attributes)
                throws SAXException {
            if ("page".equals(localName)) {
                inTitle = false;
                title.length(0);
            } else if ("title".equals(localName) && title.length() == 0)
                inTitle = true; // We catch only the first title element.
            else if ("redirect".equals(localName) && attributes.getValue("title") != null) {
                progressLogger.update();
                redirects.put(title.copy(), attributes.getValue("title"));
            }
        }

        @Override
        public void endElement(String uri, String localName, String qName) throws SAXException {
            if ("title".equals(localName))
                inTitle = false;
        }

        @Override
        public void characters(char[] ch, int start, int length) throws SAXException {
            if (inTitle)
                title.append(ch, start, length);
        }

        @Override
        public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
            if (inTitle)
                title.append(ch, start, length);
        }
    };

    InputStream in = new FileInputStream(jsapResult.getString("file"));
    if (jsapResult.userSpecified("bzip2"))
        in = new BZip2CompressorInputStream(in);
    parser.parse(new InputSource(new InputStreamReader(new FastBufferedInputStream(in), Charsets.UTF_8)),
            handler);
    progressLogger.done();

    final Object2LongLinkedOpenHashMap<MutableString> resolved = new Object2LongLinkedOpenHashMap<MutableString>();
    final VirtualDocumentResolver vdr = (VirtualDocumentResolver) BinIO.loadObject(jsapResult.getString("vdr"));

    progressLogger.expectedUpdates = redirects.size();
    progressLogger.start("Examining redirects...");

    for (Map.Entry<MutableString, String> e : redirects.entrySet()) {
        final MutableString start = new MutableString().append(baseURL)
                .append(Encoder.encodeTitleToUrl(e.getKey().toString(), true));
        final MutableString end = new MutableString().append(baseURL)
                .append(Encoder.encodeTitleToUrl(e.getValue(), true));
        final long s = vdr.resolve(start);
        if (s == -1) {
            final long t = vdr.resolve(end);
            if (t != -1)
                resolved.put(start.copy(), t);
            else
                LOGGER.warn("Failed redirect: " + start + " -> " + end);
        } else
            LOGGER.warn("URL " + start + " is already known to the virtual document resolver");

        progressLogger.lightUpdate();
    }

    progressLogger.done();

    //System.err.println(resolved);

    final Iterable<MutableString> allURIs = Iterables
            .concat(new FileLinesCollection(jsapResult.getString("uris"), "UTF-8"), resolved.keySet());
    final long numberOfDocuments = vdr.numberOfDocuments();

    final TransformationStrategy<CharSequence> transformationStrategy = jsapResult.userSpecified("iso")
            ? TransformationStrategies.iso()
            : TransformationStrategies.utf16();

    BinIO.storeObject(new URLMPHVirtualDocumentResolver(new SignedRedirectedStringMap(numberOfDocuments,
            new ShiftAddXorSignedStringMap(allURIs.iterator(),
                    new MWHCFunction.Builder<CharSequence>().keys(allURIs).transform(transformationStrategy)
                            .build(),
                    jsapResult.getInt("width")),
            resolved.values().toLongArray())), jsapResult.getString("redvdr"));
}