List of usage examples for org.xml.sax.helpers DefaultHandler DefaultHandler
DefaultHandler
From source file:co.anarquianegra.rockolappServidor.mundo.ListaReproductor.java
private void cargarCanciones(File archivo, String extension) throws IOException, SAXException, TikaException { cant++;/*from www . j av a 2 s. c o m*/ Parser parser = null; if (extension.equals("wav")) parser = new AudioParser(); else if (extension.equals("mp3")) parser = new Mp3Parser(); InputStream input = new FileInputStream(new File(archivo.toString())); ContentHandler handler = new DefaultHandler(); Metadata metadata = new Metadata(); ParseContext parseCtx = new ParseContext(); parser.parse(input, handler, metadata, parseCtx); input.close(); System.out.println("------------------------------------------"); System.out.println(archivo.getCanonicalPath()); System.out.println("Title: " + metadata.get("title")); System.out.println("Artists: " + metadata.get("xmpDM:artist")); System.out.println("Album: " + metadata.get("xmpDM:album")); if (metadata.get("title") != null && !metadata.get("title").equals("") && metadata.get("xmpDM:artist") != null && !metadata.get("xmpDM:artist").equals("") && metadata.get("xmpDM:album") != null && !metadata.get("xmpDM:album").equals("")) { Cancion c = new Cancion(metadata.get("title"), metadata.get("xmpDM:artist"), metadata.get("xmpDM:album"), archivo.getCanonicalPath()); cancionesXnombre.agregar(c.darNombre(), c); cancionesXartista.agregar(c.darArtista(), c); cancionesXid.agregar(c.toString(), c); } else if (metadata.get("title") != null && !metadata.get("title").equals("") && metadata.get("xmpDM:artist") != null && !metadata.get("xmpDM:artist").equals("")) { Cancion c = new Cancion(metadata.get("title"), metadata.get("xmpDM:artist"), archivo.getCanonicalPath()); cancionesXnombre.agregar(c.darNombre(), c); cancionesXartista.agregar(c.darArtista(), c); cancionesXid.agregar(c.toString(), c); } else if (metadata.get("title") != null && !metadata.get("title").equals("")) { Cancion c = new Cancion(metadata.get("title"), archivo.getCanonicalPath()); cancionesXnombre.agregar(c.darNombre(), c); cancionesXartista.agregar(c.darArtista(), c); cancionesXid.agregar(c.toString(), c); } else { String[] sp = archivo.getName().split("\\."); if (sp.length == 2) { Cancion c = new Cancion(sp[0], archivo.getCanonicalPath()); cancionesXnombre.agregar(c.darNombre(), c); cancionesXartista.agregar(c.darArtista(), c); cancionesXid.agregar(c.toString(), c); System.out.println("Nombre: " + c.darNombre()); } } }
From source file:com.centurylink.mdw.util.HttpHelper.java
/** * Use SAX for fastest parsing./*from w w w .j a v a 2 s . c om*/ */ private URL parseResponseForHtmlMetaEquiv(String response) throws IOException, SAXException, ParserConfigurationException { final StringBuffer urlBuf = new StringBuffer(); InputStream xmlStream = new ByteArrayInputStream(response.getBytes()); InputSource src = new InputSource(xmlStream); SAXParserFactory parserFactory = SAXParserFactory.newInstance(); SAXParser parser = parserFactory.newSAXParser(); parser.parse(src, new DefaultHandler() { boolean inHtml; boolean inHead; boolean inMeta; public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException { if (qName.equals("html")) inHtml = true; else if (qName.equals("head")) inHead = true; else if (qName.equals("meta")) inMeta = true; if (inHtml && inHead && inMeta) { if ("refresh".equals(attrs.getValue("http-equiv"))) { String cAttr = attrs.getValue("content"); if (cAttr != null) { int urlIdx = cAttr.indexOf("url="); if (urlIdx >= 0) urlBuf.append(cAttr.substring(urlIdx + 4)); } } } } public void endElement(String uri, String localName, String qName) throws SAXException { if (qName.equals("html")) inHtml = false; else if (qName.equals("head")) inHead = false; else if (qName.equals("meta")) inMeta = false; } }); String str = urlBuf.toString().trim(); if (str.isEmpty()) return null; else return new URL(str); }
From source file:self.philbrown.droidQuery.Ajax.java
protected TaskResponse doInBackground(Void... arg0) { if (this.isCancelled) return null; //if synchronous, block on the background thread until ready. Then call beforeSend, etc, before resuming. if (!beforeSendIsAsync) { try {//from ww w . jav a 2s . c om mutex.acquire(); } catch (InterruptedException e) { Log.w("AjaxTask", "Synchronization Error. Running Task Async"); } final Thread asyncThread = Thread.currentThread(); isLocked = true; mHandler.post(new Runnable() { @Override public void run() { if (options.beforeSend() != null) { if (options.context() != null) options.beforeSend().invoke($.with(options.context()), options); else options.beforeSend().invoke(null, options); } if (options.isAborted()) { cancel(true); return; } if (options.global()) { synchronized (globalTasks) { if (globalTasks.isEmpty()) { $.ajaxStart(); } globalTasks.add(Ajax.this); } $.ajaxSend(); } else { synchronized (localTasks) { localTasks.add(Ajax.this); } } isLocked = false; LockSupport.unpark(asyncThread); } }); if (isLocked) LockSupport.park(); } //here is where to use the mutex //handle cached responses Object cachedResponse = AjaxCache.sharedCache().getCachedResponse(options); //handle ajax caching option if (cachedResponse != null && options.cache()) { Success s = new Success(cachedResponse); s.reason = "cached response"; s.allHeaders = null; return s; } if (connection == null) { try { String type = options.type(); URL url = new URL(options.url()); if (type == null) { type = "GET"; } if (type.equalsIgnoreCase("CUSTOM")) { try { connection = options.customConnection(); } catch (Exception e) { connection = null; } if (connection == null) { Log.w("droidQuery.ajax", "CUSTOM type set, but AjaxOptions.customRequest is invalid. Defaulting to GET."); connection = (HttpURLConnection) url.openConnection(); connection.setRequestMethod("GET"); } } else { connection = (HttpURLConnection) url.openConnection(); connection.setRequestMethod(type); if (type.equalsIgnoreCase("POST") || type.equalsIgnoreCase("PUT")) { connection.setDoOutput(true); } } } catch (Throwable t) { if (options.debug()) t.printStackTrace(); Error e = new Error(null); AjaxError error = new AjaxError(); error.connection = connection; error.options = options; e.status = 0; e.reason = "Bad Configuration"; error.status = e.status; error.reason = e.reason; error.response = e.response; e.allHeaders = new Headers(); e.error = error; return e; } } Map<String, Object> args = new HashMap<String, Object>(); args.put("options", options); args.put("request", null); args.put("connection", connection); EventCenter.trigger("ajaxPrefilter", args, null); if (options.headers() != null) { if (options.headers().authorization() != null) { options.headers() .authorization(options.headers().authorization() + " " + options.getEncodedCredentials()); } else if (options.username() != null) { //guessing that authentication is basic options.headers().authorization("Basic " + options.getEncodedCredentials()); } for (Entry<String, String> entry : options.headers().map().entrySet()) { connection.setRequestProperty(entry.getKey(), entry.getValue()); } } if (options.data() != null) { try { OutputStream os = connection.getOutputStream(); os.write(options.data().toString().getBytes()); os.close(); } catch (Throwable t) { Log.w("Ajax", "Could not post data"); } } if (options.timeout() != 0) { connection.setConnectTimeout(options.timeout()); connection.setReadTimeout(options.timeout()); } if (options.trustedCertificate() != null) { Certificate ca = options.trustedCertificate(); String keyStoreType = KeyStore.getDefaultType(); KeyStore keyStore = null; try { keyStore = KeyStore.getInstance(keyStoreType); keyStore.load(null, null); keyStore.setCertificateEntry("ca", ca); } catch (KeyStoreException e) { if (options.debug()) e.printStackTrace(); } catch (NoSuchAlgorithmException e) { if (options.debug()) e.printStackTrace(); } catch (CertificateException e) { if (options.debug()) e.printStackTrace(); } catch (IOException e) { if (options.debug()) e.printStackTrace(); } if (keyStore == null) { Log.w("Ajax", "Could not configure trusted certificate"); } else { try { //Create a TrustManager that trusts the CAs in our KeyStore String tmfAlgorithm = TrustManagerFactory.getDefaultAlgorithm(); TrustManagerFactory tmf = TrustManagerFactory.getInstance(tmfAlgorithm); tmf.init(keyStore); //Create an SSLContext that uses our TrustManager SSLContext sslContext = SSLContext.getInstance("TLS"); sslContext.init(null, tmf.getTrustManagers(), null); ((HttpsURLConnection) connection).setSSLSocketFactory(sslContext.getSocketFactory()); } catch (KeyManagementException e) { if (options.debug()) e.printStackTrace(); } catch (NoSuchAlgorithmException e) { if (options.debug()) e.printStackTrace(); } catch (KeyStoreException e) { if (options.debug()) e.printStackTrace(); } } } try { if (options.cookies() != null) { CookieManager cm = new CookieManager(); CookieStore cookies = cm.getCookieStore(); URI uri = URI.create(options.url()); for (Entry<String, String> entry : options.cookies().entrySet()) { HttpCookie cookie = new HttpCookie(entry.getKey(), entry.getValue()); cookies.add(uri, cookie); } connection.setRequestProperty("Cookie", TextUtils.join(",", cookies.getCookies())); } connection.connect(); final int statusCode = connection.getResponseCode(); final String message = connection.getResponseMessage(); if (options.dataFilter() != null) { if (options.context() != null) options.dataFilter().invoke($.with(options.context()), connection, options.dataType()); else options.dataFilter().invoke(null, connection, options.dataType()); } final Function function = options.statusCode().get(statusCode); if (function != null) { mHandler.post(new Runnable() { @Override public void run() { if (options.context() != null) function.invoke($.with(options.context()), statusCode, options.clone()); else function.invoke(null, statusCode, options.clone()); } }); } //handle dataType String dataType = options.dataType(); if (dataType == null) dataType = "text"; if (options.debug()) Log.i("Ajax", "dataType = " + dataType); Object parsedResponse = null; InputStream stream = null; try { if (dataType.equalsIgnoreCase("text") || dataType.equalsIgnoreCase("html")) { if (options.debug()) Log.i("Ajax", "parsing text"); stream = AjaxUtil.getInputStream(connection); parsedResponse = parseText(stream); } else if (dataType.equalsIgnoreCase("xml")) { if (options.debug()) Log.i("Ajax", "parsing xml"); if (options.customXMLParser() != null) { stream = AjaxUtil.getInputStream(connection); if (options.SAXContentHandler() != null) options.customXMLParser().parse(stream, options.SAXContentHandler()); else options.customXMLParser().parse(stream, new DefaultHandler()); parsedResponse = "Response handled by custom SAX parser"; } else if (options.SAXContentHandler() != null) { stream = AjaxUtil.getInputStream(connection); SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setFeature("http://xml.org/sax/features/namespaces", false); factory.setFeature("http://xml.org/sax/features/namespace-prefixes", true); SAXParser parser = factory.newSAXParser(); XMLReader reader = parser.getXMLReader(); reader.setContentHandler(options.SAXContentHandler()); reader.parse(new InputSource(stream)); parsedResponse = "Response handled by custom SAX content handler"; } else { parsedResponse = parseXML(connection); } } else if (dataType.equalsIgnoreCase("json")) { if (options.debug()) Log.i("Ajax", "parsing json"); parsedResponse = parseJSON(connection); } else if (dataType.equalsIgnoreCase("script")) { if (options.debug()) Log.i("Ajax", "parsing script"); parsedResponse = parseScript(connection); } else if (dataType.equalsIgnoreCase("image")) { if (options.debug()) Log.i("Ajax", "parsing image"); stream = AjaxUtil.getInputStream(connection); parsedResponse = parseImage(stream); } else if (dataType.equalsIgnoreCase("raw")) { if (options.debug()) Log.i("Ajax", "parsing raw data"); parsedResponse = parseRawContent(connection); } } catch (ClientProtocolException cpe) { if (options.debug()) cpe.printStackTrace(); Error e = new Error(parsedResponse); AjaxError error = new AjaxError(); error.connection = connection; error.options = options; e.status = statusCode; e.reason = message; error.status = e.status; error.reason = e.reason; error.response = e.response; e.allHeaders = Headers.createHeaders(connection.getHeaderFields()); e.error = error; return e; } catch (Exception ioe) { if (options.debug()) ioe.printStackTrace(); Error e = new Error(parsedResponse); AjaxError error = new AjaxError(); error.connection = connection; error.options = options; e.status = statusCode; e.reason = message; error.status = e.status; error.reason = e.reason; error.response = e.response; e.allHeaders = Headers.createHeaders(connection.getHeaderFields()); e.error = error; return e; } finally { connection.disconnect(); try { if (stream != null) { stream.close(); } } catch (IOException e) { } } if (statusCode >= 300) { //an error occurred Error e = new Error(parsedResponse); Log.e("Ajax Test", parsedResponse.toString()); //AjaxError error = new AjaxError(); //error.request = request; //error.options = options; e.status = e.status; e.reason = e.reason; //error.status = e.status; //error.reason = e.reason; //error.response = e.response; e.allHeaders = Headers.createHeaders(connection.getHeaderFields()); //e.error = error; if (options.debug()) Log.i("Ajax", "Error " + e.status + ": " + e.reason); return e; } else { //handle ajax ifModified option List<String> lastModifiedHeaders = connection.getHeaderFields().get("last-modified"); if (lastModifiedHeaders.size() >= 1) { try { String h = lastModifiedHeaders.get(0); SimpleDateFormat format = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss zzz", Locale.US); Date lastModified = format.parse(h); if (options.ifModified() && lastModified != null) { Date lastModifiedDate; synchronized (lastModifiedUrls) { lastModifiedDate = lastModifiedUrls.get(options.url()); } if (lastModifiedDate != null && lastModifiedDate.compareTo(lastModified) == 0) { //request response has not been modified. //Causes an error instead of a success. Error e = new Error(parsedResponse); AjaxError error = new AjaxError(); error.connection = connection; error.options = options; e.status = e.status; e.reason = e.reason; error.status = e.status; error.reason = e.reason; error.response = e.response; e.allHeaders = Headers.createHeaders(connection.getHeaderFields()); e.error = error; Function func = options.statusCode().get(304); if (func != null) { if (options.context() != null) func.invoke($.with(options.context())); else func.invoke(null); } return e; } else { synchronized (lastModifiedUrls) { lastModifiedUrls.put(options.url(), lastModified); } } } } catch (Throwable t) { Log.e("Ajax", "Could not parse Last-Modified Header", t); } } //Now handle a successful request Success s = new Success(parsedResponse); s.reason = message; s.allHeaders = Headers.createHeaders(connection.getHeaderFields()); return s; } } catch (Throwable t) { if (options.debug()) t.printStackTrace(); if (t instanceof java.net.SocketTimeoutException) { Error e = new Error(null); AjaxError error = new AjaxError(); error.connection = connection; error.options = options; error.response = e.response; e.status = 0; String reason = t.getMessage(); if (reason == null) reason = "Socket Timeout"; e.reason = reason; error.status = e.status; error.reason = e.reason; if (connection != null) e.allHeaders = Headers.createHeaders(connection.getHeaderFields()); else e.allHeaders = new Headers(); e.error = error; return e; } return null; } }
From source file:com.flexive.core.storage.GenericDivisionImporter.java
/** * Import data from a zip archive to a database table * * @param stmt statement to use * @param zip zip archive containing the zip entry * @param ze zip entry within the archive * @param xpath xpath containing the entries to import * @param table name of the table * @param executeInsertPhase execute the insert phase? * @param executeUpdatePhase execute the update phase? * @param updateColumns columns that should be set to <code>null</code> in a first pass (insert) * and updated to the provided values in a second pass (update), * columns that should be used in the where clause have to be prefixed * with "KEY:", to assign a default value use the expression "columnname:default value", * if the default value is "@", it will be a negative counter starting at 0, decreasing. * If the default value starts with "%", it will be set to the column following the "%" * character in the first pass * @throws Exception on errors// w w w .j av a 2 s.c o m */ protected void importTable(Statement stmt, final ZipFile zip, final ZipEntry ze, final String xpath, final String table, final boolean executeInsertPhase, final boolean executeUpdatePhase, final String... updateColumns) throws Exception { //analyze the table final ResultSet rs = stmt.executeQuery("SELECT * FROM " + table + " WHERE 1=2"); StringBuilder sbInsert = new StringBuilder(500); StringBuilder sbUpdate = updateColumns.length > 0 ? new StringBuilder(500) : null; if (rs == null) throw new IllegalArgumentException("Can not analyze table [" + table + "]!"); sbInsert.append("INSERT INTO ").append(table).append(" ("); final ResultSetMetaData md = rs.getMetaData(); final Map<String, ColumnInfo> updateClauseColumns = updateColumns.length > 0 ? new HashMap<String, ColumnInfo>(md.getColumnCount()) : null; final Map<String, ColumnInfo> updateSetColumns = updateColumns.length > 0 ? new LinkedHashMap<String, ColumnInfo>(md.getColumnCount()) : null; final Map<String, String> presetColumns = updateColumns.length > 0 ? new HashMap<String, String>(10) : null; //preset to a referenced column (%column syntax) final Map<String, String> presetRefColumns = updateColumns.length > 0 ? new HashMap<String, String>(10) : null; final Map<String, Integer> counters = updateColumns.length > 0 ? new HashMap<String, Integer>(10) : null; final Map<String, ColumnInfo> insertColumns = new HashMap<String, ColumnInfo>( md.getColumnCount() + (counters != null ? counters.size() : 0)); int insertIndex = 1; int updateSetIndex = 1; int updateClauseIndex = 1; boolean first = true; for (int i = 0; i < md.getColumnCount(); i++) { final String currCol = md.getColumnName(i + 1).toLowerCase(); if (updateColumns.length > 0) { boolean abort = false; for (String col : updateColumns) { if (col.indexOf(':') > 0 && !col.startsWith("KEY:")) { String value = col.substring(col.indexOf(':') + 1); col = col.substring(0, col.indexOf(':')); if ("@".equals(value)) { if (currCol.equalsIgnoreCase(col)) { counters.put(col, 0); insertColumns.put(col, new ColumnInfo(md.getColumnType(i + 1), insertIndex++)); sbInsert.append(',').append(currCol); } } else if (value.startsWith("%")) { if (currCol.equalsIgnoreCase(col)) { presetRefColumns.put(col, value.substring(1)); insertColumns.put(col, new ColumnInfo(md.getColumnType(i + 1), insertIndex++)); sbInsert.append(',').append(currCol); // System.out.println("==> adding presetRefColumn "+col+" with value of "+value.substring(1)); } } else if (!presetColumns.containsKey(col)) presetColumns.put(col, value); } if (currCol.equalsIgnoreCase(col)) { abort = true; updateSetColumns.put(currCol, new ColumnInfo(md.getColumnType(i + 1), updateSetIndex++)); break; } } if (abort) continue; } if (first) { first = false; } else sbInsert.append(','); sbInsert.append(currCol); insertColumns.put(currCol, new ColumnInfo(md.getColumnType(i + 1), insertIndex++)); } if (updateColumns.length > 0 && executeUpdatePhase) { sbUpdate.append("UPDATE ").append(table).append(" SET "); int counter = 0; for (String updateColumn : updateSetColumns.keySet()) { if (counter++ > 0) sbUpdate.append(','); sbUpdate.append(updateColumn).append("=?"); } sbUpdate.append(" WHERE "); boolean hasKeyColumn = false; for (String col : updateColumns) { if (!col.startsWith("KEY:")) continue; hasKeyColumn = true; String keyCol = col.substring(4); for (int i = 0; i < md.getColumnCount(); i++) { if (!md.getColumnName(i + 1).equalsIgnoreCase(keyCol)) continue; updateClauseColumns.put(keyCol, new ColumnInfo(md.getColumnType(i + 1), updateClauseIndex++)); sbUpdate.append(keyCol).append("=? AND "); break; } } if (!hasKeyColumn) throw new IllegalArgumentException("Update columns require a KEY!"); sbUpdate.delete(sbUpdate.length() - 5, sbUpdate.length()); //remove trailing " AND " //"shift" clause indices for (String col : updateClauseColumns.keySet()) { GenericDivisionImporter.ColumnInfo ci = updateClauseColumns.get(col); ci.index += (updateSetIndex - 1); } } if (presetColumns != null) { for (String key : presetColumns.keySet()) sbInsert.append(',').append(key); } sbInsert.append(")VALUES("); for (int i = 0; i < insertColumns.size(); i++) { if (i > 0) sbInsert.append(','); sbInsert.append('?'); } if (presetColumns != null) { for (String key : presetColumns.keySet()) sbInsert.append(',').append(presetColumns.get(key)); } sbInsert.append(')'); if (DBG) { LOG.info("Insert statement:\n" + sbInsert.toString()); if (updateColumns.length > 0) LOG.info("Update statement:\n" + sbUpdate.toString()); } //build a map containing all nodes that require attributes //this allows for matching simple xpath queries like "flatstorages/storage[@name='FX_FLAT_STORAGE']/data" final Map<String, List<String>> queryAttributes = new HashMap<String, List<String>>(5); for (String pElem : xpath.split("/")) { if (!(pElem.indexOf('@') > 0 && pElem.indexOf('[') > 0)) continue; List<String> att = new ArrayList<String>(5); for (String pAtt : pElem.split("@")) { if (!(pAtt.indexOf('=') > 0)) continue; att.add(pAtt.substring(0, pAtt.indexOf('='))); } queryAttributes.put(pElem.substring(0, pElem.indexOf('[')), att); } final PreparedStatement psInsert = stmt.getConnection().prepareStatement(sbInsert.toString()); final PreparedStatement psUpdate = updateColumns.length > 0 && executeUpdatePhase ? stmt.getConnection().prepareStatement(sbUpdate.toString()) : null; try { final SAXParser parser = SAXParserFactory.newInstance().newSAXParser(); final DefaultHandler handler = new DefaultHandler() { private String currentElement = null; private Map<String, String> data = new HashMap<String, String>(10); private StringBuilder sbData = new StringBuilder(10000); boolean inTag = false; boolean inElement = false; int counter; List<String> path = new ArrayList<String>(10); StringBuilder currPath = new StringBuilder(100); boolean insertMode = true; /** * {@inheritDoc} */ @Override public void startDocument() throws SAXException { counter = 0; inTag = false; inElement = false; path.clear(); currPath.setLength(0); sbData.setLength(0); data.clear(); currentElement = null; } /** * {@inheritDoc} */ @Override public void processingInstruction(String target, String data) throws SAXException { if (target != null && target.startsWith("fx_")) { if (target.equals("fx_mode")) insertMode = "insert".equals(data); } else super.processingInstruction(target, data); } /** * {@inheritDoc} */ @Override public void endDocument() throws SAXException { if (insertMode) LOG.info("Imported [" + counter + "] entries into [" + table + "] for xpath [" + xpath + "]"); else LOG.info("Updated [" + counter + "] entries in [" + table + "] for xpath [" + xpath + "]"); } /** * {@inheritDoc} */ @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { pushPath(qName, attributes); if (currPath.toString().equals(xpath)) { inTag = true; data.clear(); for (int i = 0; i < attributes.getLength(); i++) { String name = attributes.getLocalName(i); if (StringUtils.isEmpty(name)) name = attributes.getQName(i); data.put(name, attributes.getValue(i)); } } else { currentElement = qName; } inElement = true; sbData.setLength(0); } /** * Push a path element from the stack * * @param qName element name to push * @param att attributes */ private void pushPath(String qName, Attributes att) { if (att.getLength() > 0 && queryAttributes.containsKey(qName)) { String curr = qName + "["; boolean first = true; final List<String> attList = queryAttributes.get(qName); for (int i = 0; i < att.getLength(); i++) { if (!attList.contains(att.getQName(i))) continue; if (first) first = false; else curr += ','; curr += "@" + att.getQName(i) + "='" + att.getValue(i) + "'"; } curr += ']'; path.add(curr); } else path.add(qName); buildPath(); } /** * Pop the top path element from the stack */ private void popPath() { path.remove(path.size() - 1); buildPath(); } /** * Rebuild the current path */ private synchronized void buildPath() { currPath.setLength(0); for (String s : path) currPath.append(s).append('/'); if (currPath.length() > 1) currPath.delete(currPath.length() - 1, currPath.length()); // System.out.println("currPath: " + currPath); } /** * {@inheritDoc} */ @Override public void endElement(String uri, String localName, String qName) throws SAXException { if (currPath.toString().equals(xpath)) { if (DBG) LOG.info("Insert [" + xpath + "]: [" + data + "]"); inTag = false; try { if (insertMode) { if (executeInsertPhase) { processColumnSet(insertColumns, psInsert); counter += psInsert.executeUpdate(); } } else { if (executeUpdatePhase) { if (processColumnSet(updateSetColumns, psUpdate)) { processColumnSet(updateClauseColumns, psUpdate); counter += psUpdate.executeUpdate(); } } } } catch (SQLException e) { throw new SAXException(e); } catch (ParseException e) { throw new SAXException(e); } } else { if (inTag) { data.put(currentElement, sbData.toString()); } currentElement = null; } popPath(); inElement = false; sbData.setLength(0); } /** * Process a column set * * @param columns the columns to process * @param ps prepared statement to use * @return if data other than <code>null</code> has been set * @throws SQLException on errors * @throws ParseException on date/time conversion errors */ private boolean processColumnSet(Map<String, ColumnInfo> columns, PreparedStatement ps) throws SQLException, ParseException { boolean dataSet = false; for (String col : columns.keySet()) { ColumnInfo ci = columns.get(col); String value = data.get(col); if (insertMode && counters != null && counters.get(col) != null) { final int newVal = counters.get(col) - 1; value = String.valueOf(newVal); counters.put(col, newVal); // System.out.println("new value for " + col + ": " + newVal); } if (insertMode && presetRefColumns != null && presetRefColumns.get(col) != null) { value = data.get(presetRefColumns.get(col)); // System.out.println("Set presetRefColumn for "+col+" to ["+value+"] from column ["+presetRefColumns.get(col)+"]"); } if (value == null) ps.setNull(ci.index, ci.columnType); else { dataSet = true; switch (ci.columnType) { case Types.BIGINT: case Types.NUMERIC: if (DBG) LOG.info("BigInt " + ci.index + "->" + new BigDecimal(value)); ps.setBigDecimal(ci.index, new BigDecimal(value)); break; case java.sql.Types.DOUBLE: if (DBG) LOG.info("Double " + ci.index + "->" + Double.parseDouble(value)); ps.setDouble(ci.index, Double.parseDouble(value)); break; case java.sql.Types.FLOAT: case java.sql.Types.REAL: if (DBG) LOG.info("Float " + ci.index + "->" + Float.parseFloat(value)); ps.setFloat(ci.index, Float.parseFloat(value)); break; case java.sql.Types.TIMESTAMP: case java.sql.Types.DATE: if (DBG) LOG.info("Timestamp/Date " + ci.index + "->" + FxFormatUtils.getDateTimeFormat().parse(value)); ps.setTimestamp(ci.index, new Timestamp(FxFormatUtils.getDateTimeFormat().parse(value).getTime())); break; case Types.TINYINT: case Types.SMALLINT: if (DBG) LOG.info("Integer " + ci.index + "->" + Integer.valueOf(value)); ps.setInt(ci.index, Integer.valueOf(value)); break; case Types.INTEGER: case Types.DECIMAL: try { if (DBG) LOG.info("Long " + ci.index + "->" + Long.valueOf(value)); ps.setLong(ci.index, Long.valueOf(value)); } catch (NumberFormatException e) { //Fallback (temporary) for H2 if the reported long is a big decimal (tree...) ps.setBigDecimal(ci.index, new BigDecimal(value)); } break; case Types.BIT: case Types.CHAR: case Types.BOOLEAN: if (DBG) LOG.info("Boolean " + ci.index + "->" + value); if ("1".equals(value) || "true".equals(value)) ps.setBoolean(ci.index, true); else ps.setBoolean(ci.index, false); break; case Types.LONGVARBINARY: case Types.VARBINARY: case Types.BLOB: case Types.BINARY: ZipEntry bin = zip.getEntry(value); if (bin == null) { LOG.error("Failed to lookup binary [" + value + "]!"); ps.setNull(ci.index, ci.columnType); break; } try { ps.setBinaryStream(ci.index, zip.getInputStream(bin), (int) bin.getSize()); } catch (IOException e) { LOG.error("IOException importing binary [" + value + "]: " + e.getMessage(), e); } break; case Types.CLOB: case Types.LONGVARCHAR: case Types.VARCHAR: case SQL_LONGNVARCHAR: case SQL_NCHAR: case SQL_NCLOB: case SQL_NVARCHAR: if (DBG) LOG.info("String " + ci.index + "->" + value); ps.setString(ci.index, value); break; default: LOG.warn("Unhandled type [" + ci.columnType + "] for column [" + col + "]"); } } } return dataSet; } /** * {@inheritDoc} */ @Override public void characters(char[] ch, int start, int length) throws SAXException { if (inElement) sbData.append(ch, start, length); } }; handler.processingInstruction("fx_mode", "insert"); parser.parse(zip.getInputStream(ze), handler); if (updateColumns.length > 0 && executeUpdatePhase) { handler.processingInstruction("fx_mode", "update"); parser.parse(zip.getInputStream(ze), handler); } } finally { Database.closeObjects(GenericDivisionImporter.class, psInsert, psUpdate); } }
From source file:efen.parsewiki.WikipediaDocumentSequence.java
@Override public DocumentIterator iterator() throws IOException { final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance(); saxParserFactory.setNamespaceAware(true); final MutableString nameSpaceAccumulator = new MutableString(); final ObjectOpenHashSet<MutableString> nameSpacesAccumulator = new ObjectOpenHashSet<MutableString>(); final ArrayBlockingQueue<DocumentFactory> freeFactories = new ArrayBlockingQueue<DocumentFactory>(16); for (int i = freeFactories.remainingCapacity(); i-- != 0;) freeFactories.add(this.factory.copy()); final ArrayBlockingQueue<DocumentAndFactory> readyDocumentsAndFactories = new ArrayBlockingQueue<DocumentAndFactory>( freeFactories.size());//from w ww .j a v a2s . co m final SAXParser parser; try { parser = saxParserFactory.newSAXParser(); } catch (Exception e) { throw new RuntimeException(e.getMessage(), e); } final DefaultHandler handler = new DefaultHandler() { private final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); private boolean inText; private boolean inTitle; private boolean inId; private boolean inTimestamp; private boolean inNamespaceDef; private boolean redirect; private MutableString text = new MutableString(); private MutableString title = new MutableString(); private MutableString id = new MutableString(); private MutableString timestamp = new MutableString(); private final Reference2ObjectMap<Enum<?>, Object> metadata = new Reference2ObjectOpenHashMap<Enum<?>, Object>(); { metadata.put(PropertyBasedDocumentFactory.MetadataKeys.ENCODING, "UTF-8"); metadata.put(MetadataKeys.REDIRECT, redirectAnchors); } @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if ("page".equals(localName)) { redirect = inText = inTitle = inId = inTimestamp = false; text.length(0); title.length(0); id.length(0); timestamp.length(0); } else if ("text".equals(localName)) inText = true; else if ("title".equals(localName) && title.length() == 0) inTitle = true; // We catch only the first id/title elements. else if ("id".equals(localName) && id.length() == 0) inId = true; else if ("timestamp".equals(localName) && timestamp.length() == 0) inTimestamp = true; else if ("redirect".equals(localName)) { redirect = true; if (attributes.getValue("title") != null) // Accumulate the title of the page as virtual text of the redirect page. synchronized (redirectAnchors) { final String link = Encoder.encodeTitleToUrl(attributes.getValue("title"), true); redirectAnchors.add( new AnchorExtractor.Anchor(new MutableString(baseURL.length() + link.length()) .append(baseURL).append(link), title.copy())); } } else if ("namespace".equals(localName)) { // Found a new namespace inNamespaceDef = true; nameSpaceAccumulator.length(0); } } @Override public void endElement(String uri, String localName, String qName) throws SAXException { if ("namespace".equals(localName)) { // Collecting a namespace if (nameSpaceAccumulator.length() != 0) nameSpacesAccumulator.add(nameSpaceAccumulator.copy().toLowerCase()); return; } if ("namespaces".equals(localName)) { // All namespaces collected nameSpaces = ImmutableSet.copyOf(nameSpacesAccumulator); return; } if (!redirect) { if ("title".equals(localName)) { // Set basic metadata for the page metadata.put(PropertyBasedDocumentFactory.MetadataKeys.TITLE, title.copy()); String link = Encoder.encodeTitleToUrl(title.toString(), true); metadata.put(PropertyBasedDocumentFactory.MetadataKeys.URI, new MutableString(baseURL.length() + link.length()).append(baseURL).append(link)); inTitle = false; } else if ("id".equals(localName)) { metadata.put(MetadataKeys.ID, Long.valueOf(id.toString())); inId = false; } else if ("timestamp".equals(localName)) { try { metadata.put(MetadataKeys.LASTEDIT, dateFormat.parse(timestamp.toString())); } catch (ParseException e) { throw new RuntimeException(e.getMessage(), e); } inTimestamp = false; } else if ("text".equals(localName)) { inText = false; if (!keepNamespaced) { // Namespaces are case-insensitive and language-dependent final int pos = title.indexOf(':'); if (pos != -1 && isATrueNamespace(title.substring(0, pos))) return; } try { final MutableString html = new MutableString(); DocumentFactory freeFactory; try { freeFactory = freeFactories.take(); } catch (InterruptedException e) { throw new RuntimeException(e.getMessage(), e); } if (parseText) { if (DISAMBIGUATION.search(text) != -1) { // It's a disambiguation page. /* Roi's hack: duplicate links using the page title, so the generic name will end up as anchor text. */ final MutableString newLinks = new MutableString(); for (int start = 0, end; (start = BRACKETS_OPEN.search(text, start)) != -1; start = end) { end = start; final int endOfLink = text.indexOfAnyOf(END_OF_DISAMBIGUATION_LINK, start); // Note that we don't escape title because we are working at the Wikipedia raw text level. if (endOfLink != -1) { newLinks.append(text.array(), start, endOfLink - start).append('|') .append(title).append("]]\n"); end = endOfLink; } end++; } text.append(newLinks); } // We separate categories by OXOXO, so we don't get overflowing phrases. final MutableString category = new MutableString(); for (int start = 0, end; (start = CATEGORY_START.search(text, start)) != -1; start = end) { end = BRACKETS_CLOSED.search(text, start += CATEGORY_START.length()); if (end != -1) category.append(text.subSequence(start, end)).append(" OXOXO "); else break; } metadata.put(MetadataKeys.CATEGORY, category); // Heuristics to get the first paragraph metadata.put(MetadataKeys.FIRSTPAR, new MutableString()); String plainText = new WikiModel(imageBaseURL, linkBaseURL) .render(new PlainTextConverter(true), text.toString()); for (int start = 0; start < plainText.length(); start++) { //System.err.println("Examining " + plainText.charAt( start ) ); if (Character.isWhitespace(plainText.charAt(start))) continue; if (plainText.charAt(start) == '{') { //System.err.print( "Braces " + start + " text: \"" + plainText.subSequence( start, start + 10 ) + "\" -> " ); start = BRACES_CLOSED.search(plainText, start); //System.err.println( start + " text: \"" + plainText.subSequence( start, start + 10 ) + "\"" ); if (start == -1) break; start++; } else if (plainText.charAt(start) == '[') { start = BRACKETS_CLOSED.search(plainText, start); if (start == -1) break; start++; } else { final int end = plainText.indexOf('\n', start); if (end != -1) metadata.put(MetadataKeys.FIRSTPAR, new MutableString(plainText.substring(start, end)));//new MutableString( new WikiModel( imageBaseURL, linkBaseURL ).render( new PlainTextConverter( true ), text.substring( start, end ).toString() ) ) ); break; } } try { WikiModel wikiModel = new WikiModel(imageBaseURL, linkBaseURL); wikiModel.render(new HTMLConverter(), text.toString(), html, false, false); final Map<String, String> categories = wikiModel.getCategories(); // Put back category links in the page (they have been parsed by bliki and to not appear anymore in the HTML rendering) for (Entry<String, String> entry : categories.entrySet()) { final String key = entry.getKey(); final String value = entry.getValue().trim(); if (value.length() != 0) // There are empty such things html.append("\n<a href=\"").append(baseURL).append("Category:") .append(Encoder.encodeTitleToUrl(key, true)).append("\">") .append(HtmlEscapers.htmlEscaper().escape(key)) .append("</a>\n"); } } catch (Exception e) { LOGGER.error("Unexpected exception while parsing " + title, e); } } readyDocumentsAndFactories.put(new DocumentAndFactory( freeFactory.getDocument(IOUtils.toInputStream(html, Charsets.UTF_8), new Reference2ObjectOpenHashMap<Enum<?>, Object>(metadata)), freeFactory)); } catch (InterruptedException e) { throw new RuntimeException(e.getMessage(), e); } catch (IOException e) { throw new RuntimeException(e.getMessage(), e); } } } } @Override public void characters(char[] ch, int start, int length) throws SAXException { if (inText && parseText) text.append(ch, start, length); if (inTitle) title.append(ch, start, length); if (inId) id.append(ch, start, length); if (inTimestamp) timestamp.append(ch, start, length); if (inNamespaceDef) { nameSpaceAccumulator.append(ch, start, length); inNamespaceDef = false; // Dirty, but it works } } @Override public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { if (inText && parseText) text.append(ch, start, length); if (inTitle) title.append(ch, start, length); } }; final Thread parsingThread = new Thread() { public void run() { try { InputStream in = new FileInputStream(wikipediaXmlDump); if (bzipped) in = new BZip2CompressorInputStream(in); parser.parse( new InputSource(new InputStreamReader(new FastBufferedInputStream(in), Charsets.UTF_8)), handler); readyDocumentsAndFactories.put(END); } catch (Exception e) { throw new RuntimeException(e.getMessage(), e); } } }; parsingThread.start(); return new AbstractDocumentIterator() { private DocumentFactory lastFactory; @Override public Document nextDocument() throws IOException { try { final DocumentAndFactory documentAndFactory = readyDocumentsAndFactories.take(); if (lastFactory != null) freeFactories.put(lastFactory); if (documentAndFactory == END) return null; lastFactory = documentAndFactory.factory; return documentAndFactory.document; } catch (InterruptedException e) { throw new RuntimeException(e.getMessage(), e); } } }; }
From source file:it.unimi.di.big.mg4j.document.WikipediaDocumentSequence.java
@Override public DocumentIterator iterator() throws IOException { final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance(); saxParserFactory.setNamespaceAware(true); final MutableString nameSpaceAccumulator = new MutableString(); final ObjectOpenHashSet<MutableString> nameSpacesAccumulator = new ObjectOpenHashSet<MutableString>(); final ArrayBlockingQueue<DocumentFactory> freeFactories = new ArrayBlockingQueue<DocumentFactory>(16); for (int i = freeFactories.remainingCapacity(); i-- != 0;) freeFactories.add(this.factory.copy()); final ArrayBlockingQueue<DocumentAndFactory> readyDocumentsAndFactories = new ArrayBlockingQueue<DocumentAndFactory>( freeFactories.size());//from w w w . ja v a 2 s.c o m final SAXParser parser; try { parser = saxParserFactory.newSAXParser(); } catch (Exception e) { throw new RuntimeException(e.getMessage(), e); } final DefaultHandler handler = new DefaultHandler() { private final SimpleDateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'"); private boolean inText; private boolean inTitle; private boolean inId; private boolean inTimestamp; private boolean inNamespaceDef; private boolean redirect; private MutableString text = new MutableString(); private MutableString title = new MutableString(); private MutableString id = new MutableString(); private MutableString timestamp = new MutableString(); private final Reference2ObjectMap<Enum<?>, Object> metadata = new Reference2ObjectOpenHashMap<Enum<?>, Object>(); { metadata.put(PropertyBasedDocumentFactory.MetadataKeys.ENCODING, "UTF-8"); metadata.put(MetadataKeys.REDIRECT, redirectAnchors); } @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if ("page".equals(localName)) { redirect = inText = inTitle = inId = inTimestamp = false; text.length(0); title.length(0); id.length(0); timestamp.length(0); } else if ("text".equals(localName)) inText = true; else if ("title".equals(localName) && title.length() == 0) inTitle = true; // We catch only the first id/title elements. else if ("id".equals(localName) && id.length() == 0) inId = true; else if ("timestamp".equals(localName) && timestamp.length() == 0) inTimestamp = true; else if ("redirect".equals(localName)) { redirect = true; if (attributes.getValue("title") != null) // Accumulate the title of the page as virtual text of the redirect page. synchronized (redirectAnchors) { final String link = Encoder.encodeTitleToUrl(attributes.getValue("title"), true); redirectAnchors.add( new AnchorExtractor.Anchor(new MutableString(baseURL.length() + link.length()) .append(baseURL).append(link), title.copy())); } } else if ("namespace".equals(localName)) { // Found a new namespace inNamespaceDef = true; nameSpaceAccumulator.length(0); } } @Override public void endElement(String uri, String localName, String qName) throws SAXException { if ("namespace".equals(localName)) { // Collecting a namespace if (nameSpaceAccumulator.length() != 0) nameSpacesAccumulator.add(nameSpaceAccumulator.copy().toLowerCase()); return; } if ("namespaces".equals(localName)) { // All namespaces collected nameSpaces = ImmutableSet.copyOf(nameSpacesAccumulator); return; } if (!redirect) { if ("title".equals(localName)) { // Set basic metadata for the page metadata.put(PropertyBasedDocumentFactory.MetadataKeys.TITLE, title.copy()); String link = Encoder.encodeTitleToUrl(title.toString(), true); metadata.put(PropertyBasedDocumentFactory.MetadataKeys.URI, new MutableString(baseURL.length() + link.length()).append(baseURL).append(link)); inTitle = false; } else if ("id".equals(localName)) { metadata.put(MetadataKeys.ID, Long.valueOf(id.toString())); inId = false; } else if ("timestamp".equals(localName)) { try { metadata.put(MetadataKeys.LASTEDIT, dateFormat.parse(timestamp.toString())); } catch (ParseException e) { throw new RuntimeException(e.getMessage(), e); } inTimestamp = false; } else if ("text".equals(localName)) { inText = false; if (!keepNamespaced) { // Namespaces are case-insensitive and language-dependent final int pos = title.indexOf(':'); if (pos != -1 && nameSpaces.contains(title.substring(0, pos).toLowerCase())) return; } try { final MutableString html = new MutableString(); DocumentFactory freeFactory; try { freeFactory = freeFactories.take(); } catch (InterruptedException e) { throw new RuntimeException(e.getMessage(), e); } if (parseText) { if (DISAMBIGUATION.search(text) != -1) { // It's a disambiguation page. /* Roi's hack: duplicate links using the page title, so the generic name will end up as anchor text. */ final MutableString newLinks = new MutableString(); for (int start = 0, end; (start = BRACKETS_OPEN.search(text, start)) != -1; start = end) { end = start; final int endOfLink = text.indexOfAnyOf(END_OF_DISAMBIGUATION_LINK, start); // Note that we don't escape title because we are working at the Wikipedia raw text level. if (endOfLink != -1) { newLinks.append(text.array(), start, endOfLink - start).append('|') .append(title).append("]]\n"); end = endOfLink; } end++; } text.append(newLinks); } // We separate categories by OXOXO, so we don't get overflowing phrases. final MutableString category = new MutableString(); for (int start = 0, end; (start = CATEGORY_START.search(text, start)) != -1; start = end) { end = BRACKETS_CLOSED.search(text, start += CATEGORY_START.length()); if (end != -1) category.append(text.subSequence(start, end)).append(" OXOXO "); else break; } metadata.put(MetadataKeys.CATEGORY, category); // Heuristics to get the first paragraph metadata.put(MetadataKeys.FIRSTPAR, new MutableString()); String plainText = wikiModel.render(new PlainTextConverter(true), text.toString()); for (int start = 0; start < plainText.length(); start++) { //System.err.println("Examining " + plainText.charAt( start ) ); if (Character.isWhitespace(plainText.charAt(start))) continue; if (plainText.charAt(start) == '{') { //System.err.print( "Braces " + start + " text: \"" + plainText.subSequence( start, start + 10 ) + "\" -> " ); start = BRACES_CLOSED.search(plainText, start); //System.err.println( start + " text: \"" + plainText.subSequence( start, start + 10 ) + "\"" ); if (start == -1) break; start++; } else if (plainText.charAt(start) == '[') { start = BRACKETS_CLOSED.search(plainText, start); if (start == -1) break; start++; } else { final int end = plainText.indexOf('\n', start); if (end != -1) metadata.put(MetadataKeys.FIRSTPAR, new MutableString(plainText.substring(start, end))); break; } } try { wikiModel.render(new HTMLConverter(), text.toString(), html, false, true); final Map<String, String> categories = wikiModel.getCategories(); // Put back category links in the page (they have been parsed by bliki and to not appear anymore in the HTML rendering) for (Entry<String, String> entry : categories.entrySet()) { final String key = entry.getKey(); final String value = entry.getValue().trim(); if (value.length() != 0) // There are empty such things html.append("\n<a href=\"").append(baseURL).append("Category:") .append(Encoder.encodeTitleToUrl(key, true)).append("\">") .append(HtmlEscapers.htmlEscaper().escape(key)) .append("</a>\n"); } } catch (Exception e) { LOGGER.error("Unexpected exception while parsing " + title, e); } } readyDocumentsAndFactories.put(new DocumentAndFactory( freeFactory.getDocument(IOUtils.toInputStream(html, Charsets.UTF_8), new Reference2ObjectOpenHashMap<Enum<?>, Object>(metadata)), freeFactory)); } catch (InterruptedException e) { throw new RuntimeException(e.getMessage(), e); } catch (IOException e) { throw new RuntimeException(e.getMessage(), e); } } } } @Override public void characters(char[] ch, int start, int length) throws SAXException { if (inText && parseText) text.append(ch, start, length); if (inTitle) title.append(ch, start, length); if (inId) id.append(ch, start, length); if (inTimestamp) timestamp.append(ch, start, length); if (inNamespaceDef) { nameSpaceAccumulator.append(ch, start, length); inNamespaceDef = false; // Dirty, but it works } } @Override public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { if (inText && parseText) text.append(ch, start, length); if (inTitle) title.append(ch, start, length); } }; final Thread parsingThread = new Thread() { public void run() { try { InputStream in = new FileInputStream(wikipediaXmlDump); if (bzipped) in = new BZip2CompressorInputStream(in); parser.parse( new InputSource(new InputStreamReader(new FastBufferedInputStream(in), Charsets.UTF_8)), handler); readyDocumentsAndFactories.put(END); } catch (Exception e) { throw new RuntimeException(e.getMessage(), e); } } }; parsingThread.start(); return new AbstractDocumentIterator() { private DocumentFactory lastFactory; @Override public Document nextDocument() throws IOException { try { final DocumentAndFactory documentAndFactory = readyDocumentsAndFactories.take(); if (lastFactory != null) freeFactories.put(lastFactory); if (documentAndFactory == END) return null; lastFactory = documentAndFactory.factory; return documentAndFactory.document; } catch (InterruptedException e) { throw new RuntimeException(e.getMessage(), e); } } }; }
From source file:io.datalayer.conf.XmlConfigurationTest.java
/** * Creates a validating document builder. * /* w w w . j a v a 2 s . c o m*/ * @return the document builder * @throws ParserConfigurationException * if an error occurs */ private DocumentBuilder createValidatingDocBuilder() throws ParserConfigurationException { DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); factory.setValidating(true); DocumentBuilder builder = factory.newDocumentBuilder(); builder.setErrorHandler(new DefaultHandler() { @Override public void error(SAXParseException ex) throws SAXException { throw ex; } }); return builder; }
From source file:org.hil.webservice.mobile.impl.ChildrenWebServiceImpl.java
private void parseXml(String xml) { list = new ArrayList<Children>(); tmpAuth = ""; tmpAuthor = ""; SAXParserFactory factory = SAXParserFactory.newInstance(); try {//from w ww . j ava2 s . c om SAXParser saxParser = factory.newSAXParser(); DefaultHandler handler = new DefaultHandler() { Children tempChild; String tempVal = ""; public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if (qName.equalsIgnoreCase("Children")) { tmpAuth = attributes.getValue("sessionAuth"); if (!tmpAuth.equalsIgnoreCase("sessionAuth")) return; tmpAuthor = attributes.getValue("author"); force = Boolean.parseBoolean(attributes.getValue("force")); } tempVal = ""; if (qName.equalsIgnoreCase("Child")) { tempChild = new Children(); } } public void endElement(String uri, String localName, String qName) throws SAXException { if (qName.equalsIgnoreCase("Child")) { //add it to the list list.add(tempChild); } else if (qName.equalsIgnoreCase("id")) { if (tempVal.length() > 0) tempChild.setId(Long.parseLong(tempVal)); } else if (qName.equalsIgnoreCase("fullName")) { if (tempVal.length() > 0) tempChild.setFullName(tempVal); } else if (qName.equalsIgnoreCase("dateOfBirth")) { if (tempVal.length() > 0) { SimpleDateFormat format = new SimpleDateFormat("dd/MM/yyyy"); try { Date bdate = format.parse(tempVal); tempChild.setDateOfBirth(bdate); } catch (ParseException e) { e.printStackTrace(); } } } else if (qName.equalsIgnoreCase("gender")) { if (tempVal.length() > 0) tempChild.setGender(Boolean.parseBoolean(tempVal)); } else if (qName.equalsIgnoreCase("childCode")) { if (tempVal.length() > 0) tempChild.setChildCode(tempVal); } else if (qName.equalsIgnoreCase("fatherName")) { if (tempVal.length() > 0) tempChild.setFatherName(tempVal); } else if (qName.equalsIgnoreCase("fatherBirthYear")) { if (tempVal.length() > 0) tempChild.setFatherBirthYear(Integer.parseInt(tempVal)); } else if (qName.equalsIgnoreCase("fatherID")) { if (tempVal.length() > 0) tempChild.setFatherID(tempVal); } else if (qName.equalsIgnoreCase("fatherMobile")) { if (tempVal.length() > 0) tempChild.setFatherMobile(tempVal); } else if (qName.equalsIgnoreCase("motherName")) { if (tempVal.length() > 0) tempChild.setMotherName(tempVal); } else if (qName.equalsIgnoreCase("motherBirthYear")) { if (tempVal.length() > 0) tempChild.setMotherBirthYear(Integer.parseInt(tempVal)); } else if (qName.equalsIgnoreCase("motherID")) { if (tempVal.length() > 0) tempChild.setMotherID(tempVal); } else if (qName.equalsIgnoreCase("motherMobile")) { if (tempVal.length() > 0) tempChild.setMotherMobile(tempVal); } else if (qName.equalsIgnoreCase("caretakerName")) { if (tempVal.length() > 0) tempChild.setCaretakerName(tempVal); } else if (qName.equalsIgnoreCase("caretakerBirthYear")) { if (tempVal.length() > 0) tempChild.setCaretakerBirthYear(Integer.parseInt(tempVal)); } else if (qName.equalsIgnoreCase("caretakerID")) { if (tempVal.length() > 0) tempChild.setCaretakerID(tempVal); } else if (qName.equalsIgnoreCase("caretakerMobile")) { if (tempVal.length() > 0) tempChild.setCaretakerMobile(tempVal); } else if (qName.equalsIgnoreCase("currentCaretaker")) { if (tempVal.length() > 0) tempChild.setCurrentCaretaker(Short.parseShort(tempVal)); } else if (qName.equalsIgnoreCase("villageId")) { if (tempVal.length() > 0) { tempChild.setVillage(villageDao.get(Long.parseLong(tempVal))); } } } public void characters(char ch[], int start, int length) throws SAXException { tempVal = new String(ch, start, length); } }; saxParser.parse(new InputSource(new StringReader(xml)), handler); } catch (ParserConfigurationException e) { e.printStackTrace(); } catch (SAXException e) { e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } }
From source file:efen.parsewiki.WikipediaDocumentSequence.java
public static void main(final String arg[]) throws ParserConfigurationException, SAXException, IOException, JSAPException, ClassNotFoundException { SimpleJSAP jsap = new SimpleJSAP(WikipediaDocumentSequence.class.getName(), "Computes the redirects of a Wikipedia dump and integrate them into an existing virtual document resolver for the dump.", new Parameter[] { new Switch("bzip2", 'b', "bzip2", "The file is compressed with bzip2"), new Switch("iso", 'i', "iso", "Use ISO-8859-1 coding internally (i.e., just use the lower eight bits of each character)."), new FlaggedOption("width", JSAP.INTEGER_PARSER, Integer.toString(Long.SIZE), JSAP.NOT_REQUIRED, 'w', "width", "The width, in bits, of the signatures used to sign the function from URIs to their rank."), new UnflaggedOption("file", JSAP.STRING_PARSER, JSAP.REQUIRED, "The file containing the Wikipedia dump."), new UnflaggedOption("baseURL", JSAP.STRING_PARSER, JSAP.REQUIRED, "The base URL for the collection (e.g., http://en.wikipedia.org/wiki/)."), new UnflaggedOption("uris", JSAP.STRING_PARSER, JSAP.REQUIRED, "The URIs of the documents in the collection (generated by ScanMetadata)."), new UnflaggedOption("vdr", JSAP.STRING_PARSER, JSAP.REQUIRED, "The name of a precomputed virtual document resolver for the collection."), new UnflaggedOption("redvdr", JSAP.STRING_PARSER, JSAP.REQUIRED, "The name of the resulting virtual document resolver.") }); JSAPResult jsapResult = jsap.parse(arg); if (jsap.messagePrinted()) return;/* w w w .j a v a 2 s. c om*/ final SAXParserFactory saxParserFactory = SAXParserFactory.newInstance(); saxParserFactory.setNamespaceAware(true); final Object2ObjectOpenHashMap<MutableString, String> redirects = new Object2ObjectOpenHashMap<MutableString, String>(); final String baseURL = jsapResult.getString("baseURL"); final ProgressLogger progressLogger = new ProgressLogger(LOGGER); progressLogger.itemsName = "redirects"; progressLogger.start("Extracting redirects..."); final SAXParser parser = saxParserFactory.newSAXParser(); final DefaultHandler handler = new DefaultHandler() { private boolean inTitle; private MutableString title = new MutableString(); @Override public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { if ("page".equals(localName)) { inTitle = false; title.length(0); } else if ("title".equals(localName) && title.length() == 0) inTitle = true; // We catch only the first title element. else if ("redirect".equals(localName) && attributes.getValue("title") != null) { progressLogger.update(); redirects.put(title.copy(), attributes.getValue("title")); } } @Override public void endElement(String uri, String localName, String qName) throws SAXException { if ("title".equals(localName)) inTitle = false; } @Override public void characters(char[] ch, int start, int length) throws SAXException { if (inTitle) title.append(ch, start, length); } @Override public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException { if (inTitle) title.append(ch, start, length); } }; InputStream in = new FileInputStream(jsapResult.getString("file")); if (jsapResult.userSpecified("bzip2")) in = new BZip2CompressorInputStream(in); parser.parse(new InputSource(new InputStreamReader(new FastBufferedInputStream(in), Charsets.UTF_8)), handler); progressLogger.done(); final Object2LongLinkedOpenHashMap<MutableString> resolved = new Object2LongLinkedOpenHashMap<MutableString>(); final VirtualDocumentResolver vdr = (VirtualDocumentResolver) BinIO.loadObject(jsapResult.getString("vdr")); progressLogger.expectedUpdates = redirects.size(); progressLogger.start("Examining redirects..."); for (Map.Entry<MutableString, String> e : redirects.entrySet()) { final MutableString start = new MutableString().append(baseURL) .append(Encoder.encodeTitleToUrl(e.getKey().toString(), true)); final MutableString end = new MutableString().append(baseURL) .append(Encoder.encodeTitleToUrl(e.getValue(), true)); final long s = vdr.resolve(start); if (s == -1) { final long t = vdr.resolve(end); if (t != -1) resolved.put(start.copy(), t); else LOGGER.warn("Failed redirect: " + start + " -> " + end); } else LOGGER.warn("URL " + start + " is already known to the virtual document resolver"); progressLogger.lightUpdate(); } progressLogger.done(); //System.err.println(resolved); final Iterable<MutableString> allURIs = Iterables .concat(new FileLinesCollection(jsapResult.getString("uris"), "UTF-8"), resolved.keySet()); final long numberOfDocuments = vdr.numberOfDocuments(); final TransformationStrategy<CharSequence> transformationStrategy = jsapResult.userSpecified("iso") ? TransformationStrategies.iso() : TransformationStrategies.utf16(); BinIO.storeObject(new URLMPHVirtualDocumentResolver(new SignedRedirectedStringMap(numberOfDocuments, new ShiftAddXorSignedStringMap(allURIs.iterator(), new MWHCFunction.Builder<CharSequence>().keys(allURIs).transform(transformationStrategy) .build(), jsapResult.getInt("width")), resolved.values().toLongArray())), jsapResult.getString("redvdr")); }