List of usage examples for com.google.gson.stream JsonReader close
public void close() throws IOException
From source file:com.hichinaschool.flashcards.async.DeckTask.java
License:Open Source License
private TaskData doInBackgroundImportReplace(TaskData... params) { // Log.i(AnkiDroidApp.TAG, "doInBackgroundImportReplace"); Collection col = params[0].getCollection(); String path = params[0].getString(); Resources res = AnkiDroidApp.getInstance().getBaseContext().getResources(); // extract the deck from the zip file String fileDir = AnkiDroidApp.getCurrentAnkiDroidDirectory() + "/tmpzip"; File dir = new File(fileDir); if (dir.exists()) { BackupManager.removeDir(dir);/* ww w. jav a 2 s .c o m*/ } publishProgress(new TaskData(res.getString(R.string.import_unpacking))); // from anki2.py String colFile = fileDir + "/collection.anki2"; ZipFile zip; try { zip = new ZipFile(new File(path), ZipFile.OPEN_READ); } catch (IOException e) { Log.e(AnkiDroidApp.TAG, "doInBackgroundImportReplace - Error while unzipping: ", e); AnkiDroidApp.saveExceptionReportFile(e, "doInBackgroundImportReplace0"); return new TaskData(false); } if (!Utils.unzipFiles(zip, fileDir, new String[] { "collection.anki2", "media" }, null) || !(new File(colFile)).exists()) { return new TaskData(-2, null, false); } Collection tmpCol = null; try { tmpCol = Storage.Collection(colFile); if (!tmpCol.validCollection()) { tmpCol.close(); return new TaskData(-2, null, false); } } finally { if (tmpCol != null) { tmpCol.close(); } } publishProgress(new TaskData(res.getString(R.string.importing_collection))); String colPath; if (col != null) { // unload collection and trigger a backup colPath = col.getPath(); AnkiDroidApp.closeCollection(true); BackupManager.performBackup(colPath, true); } // overwrite collection colPath = AnkiDroidApp.getCollectionPath(); File f = new File(colFile); f.renameTo(new File(colPath)); int addedCount = -1; try { col = AnkiDroidApp.openCollection(colPath); // because users don't have a backup of media, it's safer to import new // data and rely on them running a media db check to get rid of any // unwanted media. in the future we might also want to duplicate this step // import media HashMap<String, String> nameToNum = new HashMap<String, String>(); HashMap<String, String> numToName = new HashMap<String, String>(); File mediaMapFile = new File(fileDir, "media"); if (mediaMapFile.exists()) { JsonReader jr = new JsonReader(new FileReader(mediaMapFile)); jr.beginObject(); String name; String num; while (jr.hasNext()) { num = jr.nextName(); name = jr.nextString(); nameToNum.put(name, num); numToName.put(num, name); } jr.endObject(); jr.close(); } String mediaDir = col.getMedia().getDir(); int total = nameToNum.size(); int i = 0; for (Map.Entry<String, String> entry : nameToNum.entrySet()) { String file = entry.getKey(); String c = entry.getValue(); File of = new File(mediaDir, file); if (!of.exists()) { Utils.unzipFiles(zip, mediaDir, new String[] { c }, numToName); } ++i; publishProgress(new TaskData(res.getString(R.string.import_media_count, (i + 1) * 100 / total))); } zip.close(); // delete tmp dir BackupManager.removeDir(dir); publishProgress(new TaskData(res.getString(R.string.import_update_counts))); // Update the counts DeckTask.TaskData result = doInBackgroundLoadDeckCounts(new TaskData(col)); if (result == null) { return null; } return new TaskData(addedCount, result.getObjArray(), true); } catch (RuntimeException e) { Log.e(AnkiDroidApp.TAG, "doInBackgroundImportReplace - RuntimeException: ", e); AnkiDroidApp.saveExceptionReportFile(e, "doInBackgroundImportReplace1"); return new TaskData(false); } catch (FileNotFoundException e) { Log.e(AnkiDroidApp.TAG, "doInBackgroundImportReplace - FileNotFoundException: ", e); AnkiDroidApp.saveExceptionReportFile(e, "doInBackgroundImportReplace2"); return new TaskData(false); } catch (IOException e) { Log.e(AnkiDroidApp.TAG, "doInBackgroundImportReplace - IOException: ", e); AnkiDroidApp.saveExceptionReportFile(e, "doInBackgroundImportReplace3"); return new TaskData(false); } }
From source file:com.hichinaschool.flashcards.libanki.importer.Anki2Importer.java
License:Open Source License
public int run() { publishProgress(false, 0, 0, false); try {//from www. ja va 2 s . c o m // extract the deck from the zip file String tempDir = AnkiDroidApp.getCurrentAnkiDroidDirectory() + "/tmpzip"; // from anki2.py String colFile = tempDir + "/collection.anki2"; if (!Utils.unzipFiles(mZip, tempDir, new String[] { "collection.anki2", "media" }, null) || !(new File(colFile)).exists() || !Storage.Collection(colFile).validCollection()) { return -2; } // we need the media dict in advance, and we'll need a map of fname number to use during the import File mediaMapFile = new File(tempDir, "media"); HashMap<String, String> numToName = new HashMap<String, String>(); if (mediaMapFile.exists()) { JsonReader jr = new JsonReader(new FileReader(mediaMapFile)); jr.beginObject(); String name; String num; while (jr.hasNext()) { num = jr.nextName(); name = jr.nextString(); nameToNum.put(name, num); numToName.put(num, name); } jr.endObject(); jr.close(); } _prepareFiles(colFile); publishProgress(true, 0, 0, false); int cnt = -1; try { cnt = _import(); } finally { // do not close collection but close only db (in order not to confuse access counting in storage.java // Note that the media database is still open and needs to be closed below. AnkiDatabaseManager.closeDatabase(mSrc.getPath()); } // import static media String mediaDir = mCol.getMedia().getDir(); if (nameToNum.size() != 0) { for (Map.Entry<String, String> entry : nameToNum.entrySet()) { String file = entry.getKey(); String c = entry.getValue(); if (!file.startsWith("_") && !file.startsWith("latex-")) { continue; } File of = new File(mediaDir, file); if (!of.exists()) { Utils.unzipFiles(mZip, mediaDir, new String[] { c }, numToName); } } } mZip.close(); mSrc.getMedia().close(); // delete tmp dir File dir = new File(tempDir); BackupManager.removeDir(dir); publishProgress(true, 100, 100, true); return cnt; } catch (RuntimeException e) { Log.e(AnkiDroidApp.TAG, "RuntimeException while importing ", e); return -1; } catch (IOException e) { Log.e(AnkiDroidApp.TAG, "IOException while importing ", e); return -1; } }
From source file:com.ichi2.anki.AnkiDroidProxy.java
License:Open Source License
/** * Parses a JSON InputStream to a list of SharedDecks efficiently * @param is InputStream to parse and convert. It only works with the reply from * getSharedDecks request/* w w w .j a v a 2 s. c o m*/ * @return List of SharedDecks that were parsed * @throws Exception */ public static List<SharedDeck> parseGetSharedDecksResponce(InputStream is) throws Exception { BufferedReader rd = new BufferedReader(new InputStreamReader(is), 409600); JsonReader reader = new JsonReader(rd); List<SharedDeck> sharedDecks = new ArrayList<SharedDeck>(); try { reader.beginArray(); int count = 0; while (reader.hasNext()) { SharedDeck sd = new SharedDeck(); reader.beginArray(); sd.setId(reader.nextInt()); // SD_ID reader.skipValue(); // SD_USERNAME sd.setTitle(reader.nextString()); // SD_TITLE reader.skipValue(); // SD_DESCRIPTION reader.skipValue(); // SD_TAGS reader.skipValue(); // SD_VERSION sd.setFacts(reader.nextInt()); // SD_FACTS sd.setSize(reader.nextInt()); // SD_SIZE reader.skipValue(); // SD_COUNT reader.skipValue(); // SD_MODIFIED reader.skipValue(); // SD_FNAME reader.endArray(); sharedDecks.add(sd); count++; } reader.endArray(); reader.close(); Log.d(AnkiDroidApp.TAG, "parseGetSharedDecksResponce: found " + count + " shared decks"); } catch (Exception e) { Log.e(AnkiDroidApp.TAG, Log.getStackTraceString(e)); sharedDecks.clear(); throw e; } return sharedDecks; }
From source file:com.ichi2.async.DeckTask.java
License:Open Source License
private TaskData doInBackgroundImportReplace(TaskData... params) { Timber.d("doInBackgroundImportReplace"); Collection col = params[0].getCollection(); String path = params[0].getString(); Resources res = AnkiDroidApp.getInstance().getBaseContext().getResources(); // extract the deck from the zip file String fileDir = AnkiDroidApp.getCurrentAnkiDroidDirectory() + "/tmpzip"; File dir = new File(fileDir); if (dir.exists()) { BackupManager.removeDir(dir);/* ww w . j a v a 2 s . c o m*/ } publishProgress(new TaskData(res.getString(R.string.import_unpacking))); // from anki2.py String colFile = fileDir + "/collection.anki2"; ZipFile zip; try { zip = new ZipFile(new File(path), ZipFile.OPEN_READ); } catch (IOException e) { Timber.e(e, "doInBackgroundImportReplace - Error while unzipping"); AnkiDroidApp.sendExceptionReport(e, "doInBackgroundImportReplace0"); return new TaskData(false); } if (!Utils.unzipFiles(zip, fileDir, new String[] { "collection.anki2", "media" }, null) || !(new File(colFile)).exists()) { return new TaskData(-2, null, false); } Collection tmpCol = null; try { tmpCol = Storage.Collection(colFile); if (!tmpCol.validCollection()) { tmpCol.close(); return new TaskData(-2, null, false); } } catch (Exception e) { Timber.e("Error opening new collection file... probably it's invalid"); try { tmpCol.close(); } catch (Exception e2) { // do nothing } return new TaskData(-2, null, false); } finally { if (tmpCol != null) { tmpCol.close(); } } publishProgress(new TaskData(res.getString(R.string.importing_collection))); String colPath; if (col != null) { // unload collection and trigger a backup colPath = col.getPath(); AnkiDroidApp.closeCollection(true); BackupManager.performBackupInBackground(colPath, true); } // overwrite collection colPath = AnkiDroidApp.getCollectionPath(); File f = new File(colFile); if (!f.renameTo(new File(colPath))) { // Exit early if this didn't work return new TaskData(-2, null, false); } int addedCount = -1; try { // open using force close of old collection, as background loader may have reopened the col col = AnkiDroidApp.openCollection(colPath, true); // because users don't have a backup of media, it's safer to import new // data and rely on them running a media db check to get rid of any // unwanted media. in the future we might also want to duplicate this step // import media HashMap<String, String> nameToNum = new HashMap<String, String>(); HashMap<String, String> numToName = new HashMap<String, String>(); File mediaMapFile = new File(fileDir, "media"); if (mediaMapFile.exists()) { JsonReader jr = new JsonReader(new FileReader(mediaMapFile)); jr.beginObject(); String name; String num; while (jr.hasNext()) { num = jr.nextName(); name = jr.nextString(); nameToNum.put(name, num); numToName.put(num, name); } jr.endObject(); jr.close(); } String mediaDir = col.getMedia().dir(); int total = nameToNum.size(); int i = 0; for (Map.Entry<String, String> entry : nameToNum.entrySet()) { String file = entry.getKey(); String c = entry.getValue(); File of = new File(mediaDir, file); if (!of.exists()) { Utils.unzipFiles(zip, mediaDir, new String[] { c }, numToName); } ++i; publishProgress(new TaskData(res.getString(R.string.import_media_count, (i + 1) * 100 / total))); } zip.close(); // delete tmp dir BackupManager.removeDir(dir); publishProgress(new TaskData(res.getString(R.string.import_update_counts))); // Update the counts DeckTask.TaskData result = doInBackgroundLoadDeckCounts(new TaskData(col)); if (result == null) { return null; } return new TaskData(addedCount, result.getObjArray(), true); } catch (RuntimeException e) { Timber.e(e, "doInBackgroundImportReplace - RuntimeException"); AnkiDroidApp.sendExceptionReport(e, "doInBackgroundImportReplace1"); return new TaskData(false); } catch (FileNotFoundException e) { Timber.e(e, "doInBackgroundImportReplace - FileNotFoundException"); AnkiDroidApp.sendExceptionReport(e, "doInBackgroundImportReplace2"); return new TaskData(false); } catch (IOException e) { Timber.e(e, "doInBackgroundImportReplace - IOException"); AnkiDroidApp.sendExceptionReport(e, "doInBackgroundImportReplace3"); return new TaskData(false); } }
From source file:com.ichi2.libanki.importer.Anki2Importer.java
License:Open Source License
public int run() { publishProgress(false, 0, 0, false); try {/*ww w .j av a2 s.co m*/ // extract the deck from the zip file String tempDir = AnkiDroidApp.getCurrentAnkiDroidDirectory() + "/tmpzip"; // from anki2.py String colFile = tempDir + "/collection.anki2"; if (!Utils.unzipFiles(mZip, tempDir, new String[] { "collection.anki2", "media" }, null) || !(new File(colFile)).exists() || !Storage.Collection(colFile).validCollection()) { return -2; } // we need the media dict in advance, and we'll need a map of fname number to use during the import File mediaMapFile = new File(tempDir, "media"); HashMap<String, String> numToName = new HashMap<String, String>(); if (mediaMapFile.exists()) { JsonReader jr = new JsonReader(new FileReader(mediaMapFile)); jr.beginObject(); String name; String num; while (jr.hasNext()) { num = jr.nextName(); name = jr.nextString(); nameToNum.put(name, num); numToName.put(num, name); } jr.endObject(); jr.close(); } _prepareFiles(colFile); publishProgress(true, 0, 0, false); int cnt = -1; try { cnt = _import(); } finally { // do not close collection but close only db (in order not to confuse access counting in storage.java // Note that the media database is still open and needs to be closed below. AnkiDatabaseManager.closeDatabase(mSrc.getPath()); } // import static media String mediaDir = mCol.getMedia().dir(); if (nameToNum.size() != 0) { for (Map.Entry<String, String> entry : nameToNum.entrySet()) { String file = entry.getKey(); String c = entry.getValue(); if (!file.startsWith("_") && !file.startsWith("latex-")) { continue; } File of = new File(mediaDir, file); if (!of.exists()) { Utils.unzipFiles(mZip, mediaDir, new String[] { c }, numToName); } } } mZip.close(); mSrc.getMedia().close(); // delete tmp dir File dir = new File(tempDir); BackupManager.removeDir(dir); publishProgress(true, 100, 100, true); return cnt; } catch (RuntimeException e) { Timber.e(e, "RuntimeException while importing"); return -1; } catch (IOException e) { Timber.e(e, "IOException while importing"); return -1; } }
From source file:com.ichi2.libanki.importer.AnkiPackageImporter.java
License:Open Source License
@Override public void run() { publishProgress(0, 0, 0);//from ww w .j a v a2 s . c o m File tempDir = new File(new File(mCol.getPath()).getParent(), "tmpzip"); Collection tmpCol; try { // We extract the zip contents into a temporary directory and do a little more // validation than the desktop client to ensure the extracted collection is an apkg. try { // extract the deck from the zip file mZip = new ZipFile(new File(mFile), ZipFile.OPEN_READ); Utils.unzipFiles(mZip, tempDir.getAbsolutePath(), new String[] { "collection.anki2", "media" }, null); } catch (IOException e) { Timber.e(e, "Failed to unzip apkg."); mLog.add(getRes().getString(R.string.import_log_no_apkg)); return; } String colpath = new File(tempDir, "collection.anki2").getAbsolutePath(); if (!(new File(colpath)).exists()) { mLog.add(getRes().getString(R.string.import_log_no_apkg)); return; } tmpCol = Storage.Collection(mContext, colpath); try { if (!tmpCol.validCollection()) { mLog.add(getRes().getString(R.string.import_log_no_apkg)); return; } } finally { if (tmpCol != null) { tmpCol.close(); } } mFile = colpath; // we need the media dict in advance, and we'll need a map of fname -> // number to use during the import File mediaMapFile = new File(tempDir, "media"); mNameToNum = new HashMap<>(); // We need the opposite mapping in AnkiDroid since our extraction method requires it. Map<String, String> numToName = new HashMap<>(); try { JsonReader jr = new JsonReader(new FileReader(mediaMapFile)); jr.beginObject(); String name; String num; while (jr.hasNext()) { num = jr.nextName(); name = jr.nextString(); mNameToNum.put(name, num); numToName.put(num, name); } jr.endObject(); jr.close(); } catch (FileNotFoundException e) { Timber.e("Apkg did not contain a media dict. No media will be imported."); } catch (IOException e) { Timber.e("Malformed media dict. Media import will be incomplete."); } // run anki2 importer super.run(); // import static media for (Map.Entry<String, String> entry : mNameToNum.entrySet()) { String file = entry.getKey(); String c = entry.getValue(); if (!file.startsWith("_") && !file.startsWith("latex-")) { continue; } File path = new File(mCol.getMedia().dir(), Utils.nfcNormalized(file)); if (!path.exists()) { try { Utils.unzipFiles(mZip, mCol.getMedia().dir(), new String[] { c }, numToName); } catch (IOException e) { Timber.e("Failed to extract static media file. Ignoring."); } } } } finally { // Clean up our temporary files if (tempDir.exists()) { BackupManager.removeDir(tempDir); } } publishProgress(100, 100, 100); }
From source file:com.ikanow.infinit.e.harvest.enrichment.custom.UnstructuredAnalysisHarvester.java
License:Open Source License
/** * processMeta - handle an individual field *///w w w .j ava 2 s . co m private void processMeta(DocumentPojo f, metaField m, String text, SourcePojo source, UnstructuredAnalysisConfigPojo uap) { boolean bAllowDuplicates = false; if ((null != m.flags) && m.flags.contains("U")) { bAllowDuplicates = true; } if ((null == m.scriptlang) || m.scriptlang.equalsIgnoreCase("regex")) { Pattern metaPattern = createRegex(m.script, m.flags); int timesToRun = 1; Object[] currField = null; if ((null != m.flags) && m.flags.contains("c")) { currField = f.getMetadata().get(m.fieldName); } if (null != currField) { // chained metadata timesToRun = currField.length; text = (String) currField[0]; } //TESTED Matcher matcher = metaPattern.matcher(text); LinkedList<String> Llist = null; for (int ii = 0; ii < timesToRun; ++ii) { if (ii > 0) { // (else either just text, or in the above "chained metadata" initialization above) text = (String) currField[ii]; matcher = metaPattern.matcher(text); } //TESTED StringBuffer prefix = new StringBuffer(m.fieldName).append(':'); int nFieldNameLen = m.fieldName.length() + 1; try { while (matcher.find()) { if (null == Llist) { Llist = new LinkedList<String>(); } if (null == m.groupNum) { m.groupNum = 0; } String toAdd = matcher.group(m.groupNum); if (null != m.replace) { toAdd = metaPattern.matcher(toAdd).replaceFirst(m.replace); } if ((null != m.flags) && m.flags.contains("H")) { toAdd = StringEscapeUtils.unescapeHtml(toAdd); } prefix.setLength(nFieldNameLen); prefix.append(toAdd); String dupCheck = prefix.toString(); if (!regexDuplicates.contains(dupCheck)) { Llist.add(toAdd); if (!bAllowDuplicates) { regexDuplicates.add(dupCheck); } } } } catch (Exception e) { this._context.getHarvestStatus().logMessage("processMeta1: " + e.getMessage(), true); } } //(end metadata chaining handling) if (null != Llist) { if (null != currField) { // (overwrite) f.getMetadata().put(m.fieldName, Llist.toArray()); } else { f.addToMetadata(m.fieldName, Llist.toArray()); } } //TESTED } else if (m.scriptlang.equalsIgnoreCase("javascript")) { if (null == f.getMetadata()) { f.setMetadata(new LinkedHashMap<String, Object[]>()); } //set the script engine up if necessary if ((null != source) && (null != uap)) { //(these are null if called from new processing pipeline vs legacy code) intializeScriptEngine(source, uap); } try { //TODO (INF-2488): in new format, this should only happen in between contentMeta blocks/docs // (also should be able to use SAH _document object I think?) // Javascript: the user passes in Object[] currField = f.getMetadata().get(m.fieldName); if ((null == m.flags) || m.flags.isEmpty()) { if (null == currField) { engine.put("text", text); engine.put("_iterator", null); } //(otherwise will just pass the current fields in there) } else { // flags specified if (m.flags.contains("t")) { // text engine.put("text", text); } if (m.flags.contains("d")) { // entire document (minus ents and assocs) GsonBuilder gb = new GsonBuilder(); Gson g = gb.create(); List<EntityPojo> ents = f.getEntities(); List<AssociationPojo> assocs = f.getAssociations(); try { f.setEntities(null); f.setAssociations(null); engine.put("document", g.toJson(f)); securityManager.eval(engine, JavaScriptUtils.initScript); } finally { f.setEntities(ents); f.setAssociations(assocs); } } if (m.flags.contains("m")) { // metadata GsonBuilder gb = new GsonBuilder(); Gson g = gb.create(); engine.put("_metadata", g.toJson(f.getMetadata())); securityManager.eval(engine, JavaScriptUtils.iteratorMetaScript); } } //(end flags processing) if (null != currField) { f.getMetadata().remove(m.fieldName); GsonBuilder gb = new GsonBuilder(); Gson g = gb.create(); engine.put("_iterator", g.toJson(currField)); securityManager.eval(engine, JavaScriptUtils.iteratorDocScript); } //TESTED (handling of flags, and replacing of existing fields, including when field is null but specified) Object returnVal = securityManager.eval(engine, m.script); if (null != returnVal) { if (returnVal instanceof String) { // The only easy case Object[] array = new Object[1]; if ((null != m.flags) && m.flags.contains("H")) { returnVal = StringEscapeUtils.unescapeHtml((String) returnVal); } array[0] = returnVal; f.addToMetadata(m.fieldName, array); } else { // complex object or array - in either case the engine turns these into // internal.NativeArray or internal.NativeObject BasicDBList outList = JavaScriptUtils.parseNativeJsObject(returnVal, engine); f.addToMetadata(m.fieldName, outList.toArray()); } } } catch (ScriptException e) { _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); // Just do nothing and log // e.printStackTrace(); //DEBUG (don't output log messages per doc) //logger.error(e.getMessage()); } catch (Exception e) { _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); // Just do nothing and log // e.printStackTrace(); //DEBUG (don't output log messages per doc) //logger.error(e.getMessage()); } } else if (m.scriptlang.equalsIgnoreCase("xpath")) { String xpath = m.script; try { createHtmlCleanerIfNeeded(); int timesToRun = 1; Object[] currField = null; if ((null != m.flags) && m.flags.contains("c")) { currField = f.getMetadata().get(m.fieldName); } if (null != currField) { // chained metadata f.getMetadata().remove(m.fieldName); // (so will add to the end) timesToRun = currField.length; text = (String) currField[0]; } //TESTED for (int ii = 0; ii < timesToRun; ++ii) { if (ii > 0) { // (else either just text, or in the above "chained metadata" initialization above) text = (String) currField[ii]; } //TESTED TagNode node = cleaner.clean(new ByteArrayInputStream(text.getBytes())); //NewCode : Only use html cleaner for cleansing //use JAXP for full Xpath lib Document doc = new DomSerializer(new CleanerProperties()).createDOM(node); String extraRegex = extractRegexFromXpath(xpath); if (extraRegex != null) xpath = xpath.replace(extraRegex, ""); XPath xpa = XPathFactory.newInstance().newXPath(); NodeList res = (NodeList) xpa.evaluate(xpath, doc, XPathConstants.NODESET); if (res.getLength() > 0) { if ((null != m.flags) && (m.flags.contains("o"))) { // "o" for object m.groupNum = -1; // (see bConvertToObject below) } StringBuffer prefix = new StringBuffer(m.fieldName).append(':'); int nFieldNameLen = m.fieldName.length() + 1; ArrayList<Object> Llist = new ArrayList<Object>(res.getLength()); boolean bConvertToObject = ((m.groupNum != null) && (m.groupNum == -1)); boolean convertToXml = ((null != m.flags) && (m.flags.contains("x"))); for (int i = 0; i < res.getLength(); i++) { Node info_node = res.item(i); if ((null != m.flags) && (m.flags.contains("g"))) { Llist.add(parseHtmlTable(info_node, m.replace)); } else if (bConvertToObject || convertToXml) { // Try to create a JSON object out of this StringWriter writer = new StringWriter(); try { Transformer transformer = TransformerFactory.newInstance().newTransformer(); transformer.transform(new DOMSource(info_node), new StreamResult(writer)); } catch (TransformerException e1) { continue; } if (bConvertToObject) { try { JSONObject subObj = XML.toJSONObject(writer.toString()); if (xpath.endsWith("*")) { // (can have any number of different names here) Llist.add(XmlToMetadataParser.convertJsonObjectToLinkedHashMap(subObj)); } //TESTED else { String[] rootNames = JSONObject.getNames(subObj); if (1 == rootNames.length) { // (don't think it can't be any other number in fact) subObj = subObj.getJSONObject(rootNames[0]); } boolean bUnescapeHtml = ((null != m.flags) && m.flags.contains("H")); Llist.add(XmlToMetadataParser.convertJsonObjectToLinkedHashMap(subObj, bUnescapeHtml)); } //TESTED } catch (JSONException e) { // Just carry on continue; } //TESTED } else { // leave in XML form Llist.add(writer.toString().substring(38)); // +38: (step over <?xml version="1.0" encoding="UTF-8"?>) } //TESTED (xpath_test.json) } else { // Treat this as string, either directly or via regex String info = info_node.getTextContent().trim(); if (extraRegex == null || extraRegex.isEmpty()) { prefix.setLength(nFieldNameLen); prefix.append(info); String dupCheck = prefix.toString(); if (!regexDuplicates.contains(dupCheck)) { if ((null != m.flags) && m.flags.contains("H")) { info = StringEscapeUtils.unescapeHtml(info); } Llist.add(info); if (!bAllowDuplicates) { regexDuplicates.add(dupCheck); } } } else { // Apply regex to the string Pattern dataRegex = createRegex(extraRegex, m.flags); Matcher dataMatcher = dataRegex.matcher(info); boolean result = dataMatcher.find(); while (result) { String toAdd; if (m.groupNum != null) toAdd = dataMatcher.group(m.groupNum); else toAdd = dataMatcher.group(); prefix.setLength(nFieldNameLen); prefix.append(toAdd); String dupCheck = prefix.toString(); if (!regexDuplicates.contains(dupCheck)) { if ((null != m.flags) && m.flags.contains("H")) { toAdd = StringEscapeUtils.unescapeHtml(toAdd); } Llist.add(toAdd); if (!bAllowDuplicates) { regexDuplicates.add(dupCheck); } } result = dataMatcher.find(); } } //(regex vs no regex) } //(end string vs object) } if (Llist.size() > 0) { f.addToMetadata(m.fieldName, Llist.toArray()); } } } //(end loop over metadata objects if applicable) } catch (IOException ioe) { _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(ioe).toString(), true); // Just do nothing and log //DEBUG (don't output log messages per doc) //logger.error(ioe.getMessage()); } catch (ParserConfigurationException e1) { _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(), true); // Just do nothing and log //DEBUG (don't output log messages per doc) //logger.error(e1.getMessage()); } catch (XPathExpressionException e1) { _context.getHarvestStatus().logMessage("Error evaluating xpath expression: " + xpath, true); } } else if (m.scriptlang.equalsIgnoreCase("stream")) { // XML or JSON streaming interface // which one? try { boolean json = false; boolean xml = false; for (int i = 0; i < 128; ++i) { if ('<' == text.charAt(i)) { xml = true; break; } if ('{' == text.charAt(i) || '[' == text.charAt(i)) { json = true; break; } if (!Character.isSpaceChar(text.charAt(i))) { break; } } //TESTED (too many spaces: meta_stream_test, test4; incorrect chars: test3, xml: test1, json: test2) boolean textNotObject = m.flags == null || !m.flags.contains("o"); List<DocumentPojo> docs = new LinkedList<DocumentPojo>(); List<String> levelOneFields = null; if (null != m.script) { levelOneFields = Arrays.asList(m.script.split("\\s*,\\s*")); if ((1 == levelOneFields.size()) && levelOneFields.get(0).isEmpty()) { // convert [""] to null levelOneFields = null; } } //TESTED (json and xml) if (xml) { XmlToMetadataParser parser = new XmlToMetadataParser(levelOneFields, null, null, null, null, null, Integer.MAX_VALUE); XMLInputFactory factory = XMLInputFactory.newInstance(); factory.setProperty(XMLInputFactory.IS_COALESCING, true); factory.setProperty(XMLInputFactory.SUPPORT_DTD, false); XMLStreamReader reader = null; try { reader = factory.createXMLStreamReader(new ByteArrayInputStream(text.getBytes())); docs = parser.parseDocument(reader, textNotObject); } finally { if (null != reader) reader.close(); } } //TESTED (meta_stream_test, test1) if (json) { JsonReader jsonReader = null; try { JsonToMetadataParser parser = new JsonToMetadataParser(null, levelOneFields, null, null, Integer.MAX_VALUE); jsonReader = new JsonReader( new InputStreamReader(new ByteArrayInputStream(text.getBytes()), "UTF-8")); jsonReader.setLenient(true); docs = parser.parseDocument(jsonReader, textNotObject); } finally { if (null != jsonReader) jsonReader.close(); } } //TESTED (meta_stream_test test2) if (!docs.isEmpty()) { ArrayList<String> Llist = null; ArrayList<Object> LlistObj = null; if (textNotObject) { Llist = new ArrayList<String>(docs.size()); } else { LlistObj = new ArrayList<Object>(docs.size()); } for (DocumentPojo doc : docs) { if ((null != doc.getFullText()) || (null != doc.getMetadata())) { if (textNotObject) { Llist.add(doc.getFullText()); } //TESTED else if (xml) { LlistObj.add(doc.getMetadata()); } //TESTED else if (json) { Object o = doc.getMetadata(); if (null != o) { o = doc.getMetadata().get("json"); if (o instanceof Object[]) { LlistObj.addAll(Arrays.asList((Object[]) o)); } else if (null != o) { LlistObj.add(o); } //TESTED } } //TESTED } } //TESTED if ((null != Llist) && !Llist.isEmpty()) { f.addToMetadata(m.fieldName, Llist.toArray()); } //TESTED if ((null != LlistObj) && !LlistObj.isEmpty()) { f.addToMetadata(m.fieldName, LlistObj.toArray()); } //TESTED } //TESTED (meta_stream_test test1,test2) } //(end try) catch (Exception e) { // various parsing errors _context.getHarvestStatus().logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); } } //TESTED (meta_stream_test) // (don't currently support other script types) }
From source file:com.ikanow.infinit.e.harvest.extraction.document.file.FileHarvester.java
License:Open Source License
private void parse(InfiniteFile f, SourcePojo source) throws MalformedURLException, URISyntaxException { //NOTE: we only ever break out of here because of max docs in standalone mode // (because we don't know how to continue reading) DocumentPojo doc = null;//from w ww. j a v a2 s. co m //Determine File Extension String fileName = f.getName().toString(); int mid = fileName.lastIndexOf("."); String extension = fileName.substring(mid + 1, fileName.length()); //Checked to save processing time long fileTimestamp = (f.getDate() / 1000) * 1000; // (ensure truncated to seconds, since some operation somewhere hear does this...) Date modDate = new Date(fileTimestamp); //XML Data gets placed into MetaData boolean bIsXml = false; boolean bIsJson = false; boolean bIsLineOriented = false; if ((null != source.getFileConfig()) && (null != source.getFileConfig().type)) { extension = source.getFileConfig().type; } bIsXml = extension.equalsIgnoreCase("xml"); bIsJson = extension.equalsIgnoreCase("json"); bIsLineOriented = extension.endsWith("sv"); if (bIsXml || bIsJson || bIsLineOriented) { int debugMaxDocs = Integer.MAX_VALUE; // by default don't set this, it's only for debug mode if (_context.isStandalone()) { // debug mode debugMaxDocs = maxDocsPerCycle; } //fast check to see if the file has changed before processing (or if it never existed) if (needsUpdated_SourceUrl(modDate, f.getUrlString(), source)) { if (0 != modDate.getTime()) { // if it ==0 then sourceUrl doesn't exist at all, no need to delete // This file already exists - in normal/managed mode will re-create // In streaming mode, simple skip over if (_streaming) { return; } //TESTED DocumentPojo docRepresentingSrcUrl = new DocumentPojo(); docRepresentingSrcUrl.setSourceUrl(f.getUrlString()); docRepresentingSrcUrl.setSourceKey(source.getKey()); docRepresentingSrcUrl.setCommunityId(source.getCommunityIds().iterator().next()); sourceUrlsGettingUpdated.add(docRepresentingSrcUrl.getSourceUrl()); this.docsToRemove.add(docRepresentingSrcUrl); // (can add documents with just source URL, are treated differently in the core libraries) } SourceFileConfigPojo fileSystem = source.getFileConfig(); if ((null == fileSystem) && (bIsXml || bIsJson)) { fileSystem = new SourceFileConfigPojo(); } XmlToMetadataParser xmlParser = null; JsonToMetadataParser jsonParser = null; String urlType = extension; if (bIsXml) { xmlParser = new XmlToMetadataParser(fileSystem.XmlRootLevelValues, fileSystem.XmlIgnoreValues, fileSystem.XmlSourceName, fileSystem.XmlPrimaryKey, fileSystem.XmlAttributePrefix, fileSystem.XmlPreserveCase, debugMaxDocs); } //TESTED else if (bIsJson) { jsonParser = new JsonToMetadataParser(fileSystem.XmlSourceName, fileSystem.XmlRootLevelValues, fileSystem.XmlPrimaryKey, fileSystem.XmlIgnoreValues, debugMaxDocs); } //TESTED List<DocumentPojo> partials = null; try { if (bIsXml) { XMLStreamReader xmlStreamReader = null; XMLInputFactory factory = XMLInputFactory.newInstance(); factory.setProperty(XMLInputFactory.IS_COALESCING, true); factory.setProperty(XMLInputFactory.SUPPORT_DTD, false); try { xmlStreamReader = factory.createXMLStreamReader(f.getInputStream()); partials = xmlParser.parseDocument(xmlStreamReader); long memUsage = xmlParser.getMemUsage(); _memUsage += memUsage; _totalMemUsage.addAndGet(memUsage); } finally { if (null != xmlStreamReader) xmlStreamReader.close(); } } //TESTED else if (bIsJson) { JsonReader jsonReader = null; try { jsonReader = new JsonReader(new InputStreamReader(f.getInputStream(), "UTF-8")); jsonReader.setLenient(true); partials = jsonParser.parseDocument(jsonReader); long memUsage = jsonParser.getMemUsage(); _memUsage += memUsage; _totalMemUsage.addAndGet(memUsage); } finally { if (null != jsonReader) jsonReader.close(); } } //TESTED else if (bIsLineOriented) { // Just generate a document for every line BufferedReader lineReader = null; try { lineReader = new BufferedReader(new InputStreamReader(f.getInputStream(), "UTF-8")); CsvToMetadataParser lineParser = new CsvToMetadataParser(debugMaxDocs); partials = lineParser.parseDocument(lineReader, source); long memUsage = lineParser.getMemUsage(); _memUsage += memUsage; _totalMemUsage.addAndGet(memUsage); } finally { if (null != lineReader) lineReader.close(); } } //TESTED MessageDigest md5 = null; // (generates unique urls if the user doesn't below) try { md5 = MessageDigest.getInstance("MD5"); } catch (NoSuchAlgorithmException e) { // Do nothing, unlikely to happen... } int nIndex = 0; int numPartials = partials.size(); for (DocumentPojo doctoAdd : partials) { nIndex++; doctoAdd.setSource(source.getTitle()); doctoAdd.setSourceKey(source.getKey()); doctoAdd.setMediaType(source.getMediaType()); doctoAdd.setModified(new Date(fileTimestamp)); doctoAdd.setCreated(new Date()); if (null == doctoAdd.getUrl()) { // Can be set in the parser or here doctoAdd.setHasDefaultUrl(true); // (ie cannot occur in a different src URL) if (1 == numPartials) { String urlString = f.getUrlString(); if (urlString.endsWith(urlType)) { doctoAdd.setUrl(urlString); } else { doctoAdd.setUrl( new StringBuffer(urlString).append('.').append(urlType).toString()); } // (we always set sourceUrl as the true url of the file, so want to differentiate the URL with // some useful information) } else if (null == doctoAdd.getMetadata()) { // Line oriented case doctoAdd.setUrl(new StringBuffer(f.getUrlString()).append("/").append(nIndex) .append('.').append(urlType).toString()); } else { if (null == md5) { // Will never happen, MD5 always exists doctoAdd.setUrl(new StringBuffer(f.getUrlString()).append("/") .append(doctoAdd.getMetadata().hashCode()).append('.').append(urlType) .toString()); } else { // This is the standard call if the XML parser has not been configured to build the URL doctoAdd.setUrl(new StringBuffer(f.getUrlString()).append("/") .append(DigestUtils.md5Hex(doctoAdd.getMetadata().toString())) .append('.').append(urlType).toString()); } } //TESTED } doctoAdd.setTitle(f.getName().toString()); doctoAdd.setPublishedDate(new Date(fileTimestamp)); doctoAdd.setSourceUrl(f.getUrlString()); // Always add to files because I'm deleting the source URL files.add(doctoAdd); } //TESTED } catch (XMLStreamException e1) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(), true); } catch (FactoryConfigurationError e1) { errors++; _context.getHarvestStatus().logMessage(e1.getMessage(), true); } catch (IOException e1) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(), true); } catch (Exception e1) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e1).toString(), true); } } //(end if needs updated) } else //Tika supports Excel,Word,Powerpoint,Visio, & Outlook Documents { // (This dedup tells me if it's an add/update vs ignore - qr.isDuplicate higher up tells me if I need to add or update) if (needsUpdated_Url(modDate, f.getUrlString(), source)) { Metadata metadata = null; InputStream in = null; try { doc = new DocumentPojo(); // Create a tika object (first time only) if (null == _tika) { this.initializeTika(_context, source); } // BUGGERY // NEED TO LIKELY SET LIMIT TO BE 30MB or 50MB and BYPASS ANYTHING OVER THAT BELOW IS THE CODE TO DO THAT // tika.setMaxStringLength(30*1024*1024); // Disable the string length limit _tika.setMaxStringLength(-1); //input = new FileInputStream(new File(resourceLocation)); // Create a metadata object to contain the metadata metadata = new Metadata(); // Parse the file and get the text of the file doc.setSource(source.getTitle()); doc.setSourceKey(source.getKey()); doc.setMediaType(source.getMediaType()); String fullText = ""; in = f.getInputStream(); try { if (null == _tikaOutputFormat) { // text only fullText = _tika.parseToString(in, metadata); } //TESTED else { // XML/HMTL _tika.getParser().parse(in, _tikaOutputFormat, metadata, _tikaOutputParseContext); fullText = _tikaXmlFormatWriter.toString(); _tikaXmlFormatWriter.getBuffer().setLength(0); } //TESTED } finally { if (null != in) in.close(); } int descCap = 500; doc.setFullText(fullText); if (descCap > fullText.length()) { descCap = fullText.length(); } doc.setDescription(fullText.substring(0, descCap)); doc.setModified(new Date(fileTimestamp)); doc.setCreated(new Date()); doc.setUrl(f.getUrlString()); doc.setTitle(f.getName().toString()); doc.setPublishedDate(new Date(fileTimestamp)); long memUsage = (250L * (doc.getFullText().length() + doc.getDescription().length())) / 100L; // 25% overhead, 2x for string->byte _memUsage += memUsage; _totalMemUsage.addAndGet(memUsage); // If the metadata contains a more plausible date then use that try { String title = metadata.get(Metadata.TITLE); if (null != title) { doc.setTitle(title); } } catch (Exception e) { // Fine just carry on } try { Date date = metadata.getDate(Metadata.CREATION_DATE); // MS Word if (null != date) { doc.setPublishedDate(date); } else { date = metadata.getDate(Metadata.DATE); // Dublin if (null != date) { doc.setPublishedDate(date); } else { date = metadata.getDate(Metadata.ORIGINAL_DATE); if (null != date) { doc.setPublishedDate(date); } } } } catch (Exception e) { // Fine just carry on } //TESTED // If the metadata contains a geotag then apply that: try { String lat = metadata.get(Metadata.LATITUDE); String lon = metadata.get(Metadata.LONGITUDE); if ((null != lat) && (null != lon)) { GeoPojo gt = new GeoPojo(); gt.lat = Double.parseDouble(lat); gt.lon = Double.parseDouble(lon); doc.setDocGeo(gt); } } catch (Exception e) { // Fine just carry on } // Save the entire metadata: doc.addToMetadata("_FILE_METADATA_", metadata); for (ObjectId communityId : source.getCommunityIds()) { doc.setCommunityId(communityId); } files.add(doc); // Close the input stream in.close(); in = null; //TESTED } catch (SmbException e) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); } catch (MalformedURLException e) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); } catch (UnknownHostException e) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); } catch (IOException e) { errors++; _context.getHarvestStatus().logMessage(e.getMessage(), true); } catch (TikaException e) { errors++; _context.getHarvestStatus().logMessage(e.getMessage(), true); } catch (Exception e) { errors++; _context.getHarvestStatus() .logMessage(HarvestExceptionUtils.createExceptionMessage(e).toString(), true); } finally { // Close the input stream if an error occurs if (null != in) { try { in.close(); } catch (IOException e) { // All good, do nothing } } } // end exception handling } // end dedup check } // end XML vs "office" app //DEBUG //System.out.println("FILE=" + files.size() + " / MEM=" + _memUsage + " VS " + Runtime.getRuntime().totalMemory()); }
From source file:com.imminentmeals.android.core._Parcer.java
License:Apache License
private T decode(String json) throws IOException { JsonReader reader = new JsonReader(new StringReader(json)); //noinspection TryFinallyCanBeTryWithResources try {/*w w w . j a v a 2 s . c o m*/ reader.beginObject(); final Class<?> type = Class.forName(reader.nextName()); return _gson.fromJson(reader, type); } catch (ClassNotFoundException e) { throw new RuntimeException(e); } finally { reader.close(); } }
From source file:com.junichi11.netbeans.php.enhancements.editor.completion.Parameters.java
License:Open Source License
private static void buildParameters() { Gson gson = new Gson(); for (Map.Entry<String, List<Parameter>> entry : PARAMETER_MAP.entrySet()) { String target = entry.getKey(); List<Parameter> list = entry.getValue(); list.clear();/*from www . j a v a 2 s.c o m*/ String filePath = String.format("resources/%s.json", target); // NOI18N URL resource = Function.class.getResource(filePath); if (resource == null) { continue; } try { InputStream inputStream = resource.openStream(); JsonReader jsonReader = new JsonReader( new BufferedReader(new InputStreamReader(inputStream, "UTF-8"))); // NOI18N try { Type type = new TypeToken<ArrayList<Parameter>>() { }.getType(); ArrayList<Parameter> parameters = gson.fromJson(jsonReader, type); list.addAll(parameters); } finally { inputStream.close(); jsonReader.close(); } } catch (IOException ex) { LOGGER.log(Level.WARNING, ex.getMessage()); } } }