List of usage examples for org.joda.time.format ISODateTimeFormat dateTimeParser
public static DateTimeFormatter dateTimeParser()
From source file:org.traccar.protocol.OsmAndProtocolDecoder.java
License:Apache License
@Override protected Object decode(Channel channel, SocketAddress remoteAddress, Object msg) throws Exception { HttpRequest request = (HttpRequest) msg; QueryStringDecoder decoder = new QueryStringDecoder(request.getUri()); Map<String, List<String>> params = decoder.getParameters(); if (params.isEmpty()) { decoder = new QueryStringDecoder(request.getContent().toString(Charset.defaultCharset()), false); params = decoder.getParameters(); }//from w ww.j a v a 2 s .co m Position position = new Position(); position.setProtocol(getProtocolName()); position.setValid(true); for (Map.Entry<String, List<String>> entry : params.entrySet()) { String value = entry.getValue().get(0); switch (entry.getKey()) { case "id": case "deviceid": if (!identify(value, channel, remoteAddress)) { return null; } position.setDeviceId(getDeviceId()); break; case "valid": position.setValid(Boolean.parseBoolean(value)); break; case "timestamp": try { long timestamp = Long.parseLong(value); if (timestamp < Integer.MAX_VALUE) { timestamp *= 1000; } position.setTime(new Date(timestamp)); } catch (NumberFormatException error) { if (value.contains("T")) { position.setTime(new Date(ISODateTimeFormat.dateTimeParser().parseMillis(value))); } else { DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); position.setTime(dateFormat.parse(value)); } } break; case "lat": position.setLatitude(Double.parseDouble(value)); break; case "lon": position.setLongitude(Double.parseDouble(value)); break; case "speed": position.setSpeed(Double.parseDouble(value)); break; case "bearing": case "heading": position.setCourse(Double.parseDouble(value)); break; case "altitude": position.setAltitude(Double.parseDouble(value)); break; case "hdop": position.set(Event.KEY_HDOP, Double.parseDouble(value)); break; case "batt": position.set(Event.KEY_BATTERY, value); break; default: position.set(entry.getKey(), value); break; } } if (position.getFixTime() == null) { position.setTime(new Date()); } if (channel != null) { HttpResponse response = new DefaultHttpResponse(HttpVersion.HTTP_1_1, HttpResponseStatus.OK); channel.write(response).addListener(ChannelFutureListener.CLOSE); } return position; }
From source file:piecework.model.DateSearchFacet.java
License:Educational Community License
public Criteria criteria(DateRange value) { Criteria criteria = where(query);/*from www .j ava2 s .c o m*/ DateTimeFormatter dateTimeFormatter = ISODateTimeFormat.dateTimeParser(); try { if (StringUtils.isNotEmpty(value.getAfter())) { DateTime dateTime = dateTimeFormatter.parseDateTime(value.getAfter()); criteria.gt(dateTime.toDate()); } if (StringUtils.isNotEmpty(value.getBefore())) { DateTime dateTime = dateTimeFormatter.parseDateTime(value.getBefore()); criteria.lt(dateTime.toDate()); } } catch (Exception e) { LOG.warn("Unable to parse " + value + " as a datetime object", e); } return criteria; }
From source file:se.vgregion.pubsub.content.DateTimeUtils.java
License:Open Source License
/** * Parses common date time formats in feeds * @param value/*from ww w . java2 s . com*/ * @return */ public static DateTime parseDateTime(String value) { // Tue, 18 Jan 2011 07:42:14 +0000 DateTimeFormatter parser; if (Character.isDigit(value.charAt(0))) { // assume ISO parser = ISODateTimeFormat.dateTimeParser(); } else { // assume RSS datetime parser = new DateTimeFormatterBuilder().appendDayOfWeekShortText().appendLiteral(", ") .appendDayOfMonth(1).appendLiteral(" ").appendMonthOfYearShortText().appendLiteral(" ") .appendYear(4, 4).appendLiteral(" ").appendHourOfDay(2).appendLiteral(":").appendMinuteOfHour(2) .appendLiteral(":").appendSecondOfMinute(2).appendLiteral(" +0000").toFormatter(); } parser = parser.withZone(DateTimeZone.UTC).withLocale(Locale.US); return parser.parseDateTime(value); }
From source file:stormy.pythian.model.instance.DateFeature.java
License:Apache License
@Override public TextFeature toText() { if (value == null) { return new TextFeature(null); }//from w w w. j a v a2 s . c om return new TextFeature(ISODateTimeFormat.dateTimeParser().withOffsetParsed().print(new DateTime(value))); }
From source file:uk.ac.cam.db538.cryptosms.data.DbPendingAdapter.java
License:Apache License
private ArrayList<Pending> getPending(Cursor cursor) { ArrayList<Pending> list = new ArrayList<Pending>(cursor.getCount()); if (cursor.moveToFirst()) { do {/* w w w .ja va2 s . c o m*/ Pending pending = new Pending(cursor.getString(COLUMN_SENDER), ISODateTimeFormat.dateTimeParser().parseDateTime(cursor.getString(COLUMN_TIMESTAMP)), cursor.getBlob(COLUMN_DATA)); pending.setRowIndex(cursor.getLong(COLUMN_ID)); list.add(pending); } while (cursor.moveToNext()); } return list; }
From source file:uk.ac.cam.db538.cryptosms.storage.MessageData.java
License:Apache License
/** * Constructor/*w ww . j a va 2 s . co m*/ * @param index Which chunk of data should occupy in file * @param readFromFile Does this entry already exist in the file? * @throws StorageFileException */ private MessageData(long index, boolean readFromFile) throws StorageFileException { mEntryIndex = index; if (readFromFile) { byte[] dataEncrypted = Storage.getStorage().getEntry(index); byte[] dataPlain; try { dataPlain = Encryption.getEncryption().decryptSymmetricWithMasterKey(dataEncrypted); } catch (EncryptionException e) { throw new StorageFileException(e); } byte flags = dataPlain[OFFSET_FLAGS]; boolean deliveredPart = ((flags & (1 << 7)) == 0) ? false : true; boolean deliveredAll = ((flags & (1 << 6)) == 0) ? false : true; boolean messageOutgoing = ((flags & (1 << 5)) == 0) ? false : true; boolean unread = ((flags & (1 << 4)) == 0) ? false : true; boolean compressed = ((flags & (1 << 3)) == 0) ? false : true; boolean ascii = ((flags & (1 << 2)) == 0) ? false : true; String timeStamp = Charset.fromAscii8(dataPlain, OFFSET_TIMESTAMP, LENGTH_TIMESTAMP); setDeliveredPart(deliveredPart); setDeliveredAll(deliveredAll); setMessageType((messageOutgoing) ? MessageType.OUTGOING : MessageType.INCOMING); setUnread(unread); setCompressed(compressed); setAscii(ascii); setTimeStamp(ISODateTimeFormat.dateTimeParser().parseDateTime(timeStamp)); int messageBodyLength = Math.min(LENGTH_MESSAGEBODY, LowLevel.getUnsignedShort(dataPlain, OFFSET_MESSAGEBODYLEN)); setMessageBody(LowLevel.cutData(dataPlain, OFFSET_MESSAGEBODY, messageBodyLength)); setIndexParent(LowLevel.getUnsignedInt(dataPlain, OFFSET_PARENTINDEX)); setIndexMessageParts(LowLevel.getUnsignedInt(dataPlain, OFFSET_MSGSINDEX)); setIndexPrev(LowLevel.getUnsignedInt(dataPlain, OFFSET_PREVINDEX)); setIndexNext(LowLevel.getUnsignedInt(dataPlain, OFFSET_NEXTINDEX)); } else { // default values setDeliveredPart(false); setDeliveredAll(false); setMessageType(MessageType.OUTGOING); setUnread(false); setCompressed(false); setAscii(true); setTimeStamp(new DateTime()); setMessageBody(new byte[0]); setIndexParent(0L); setIndexMessageParts(0L); setIndexPrev(0L); setIndexNext(0L); saveToFile(); } synchronized (cacheMessageData) { cacheMessageData.add(this); } }
From source file:uk.bl.wa.analyser.payload.TikaPayloadAnalyser.java
License:Open Source License
/** * @param source the source of the input stream - typically a WARC file. Used for error logging. * @param solr /*from w ww . j a va 2 s .co m*/ * @param is content to analyse. * @param url optional URL for the bytes in is. * @return * @throws IOException */ @SuppressWarnings("deprecation") public SolrRecord extract(String source, SolrRecord solr, InputStream is, String url) throws IOException { // Set up the TikaInputStream: TikaInputStream tikainput = null; if (this.maxBytesToParser > 0) { tikainput = TikaInputStream .get(new BoundedInputStream(new CloseShieldInputStream(is), maxBytesToParser)); } else { tikainput = TikaInputStream.get(new CloseShieldInputStream(is)); } // Also pass URL as metadata to allow extension hints to work: Metadata metadata = new Metadata(); if (url != null) metadata.set(Metadata.RESOURCE_NAME_KEY, url); final long detectStart = System.nanoTime(); StringBuilder detected = new StringBuilder(); try { DetectRunner detect = new DetectRunner(source, tika, tikainput, detected, metadata); TimeLimiter.run(detect, 10000L, false); } catch (NoSuchFieldError e) { // TODO Is this an Apache POI version issue? log.error("Tika.detect(): " + e.getMessage() + " for " + url + " in " + source); addExceptionMetadata(metadata, new Exception("detect threw " + e.getClass().getCanonicalName())); } catch (Exception e) { log.error("Tika.detect(): " + e.getMessage() + " for " + url + " in " + source); addExceptionMetadata(metadata, e); } Instrument.timeRel("WARCPayloadAnalyzers.analyze#tikasolrextract", "TikaExtractor.extract#detect", detectStart); // Only proceed if we have a suitable type: if (!this.checkMime(detected.toString())) { if ("".equals(detected.toString())) { solr.addField(SolrFields.SOLR_CONTENT_TYPE, MediaType.APPLICATION_OCTET_STREAM.toString()); } else { solr.addField(SolrFields.SOLR_CONTENT_TYPE, detected.toString()); } return solr; } // Context ParseContext context = new ParseContext(); StringWriter content = new StringWriter(); // Override the recursive parsing: if (embedded == null) embedded = new NonRecursiveEmbeddedDocumentExtractor(context); context.set(EmbeddedDocumentExtractor.class, embedded); try { final long parseStart = System.nanoTime(); ParseRunner runner = new ParseRunner(source, tika.getParser(), tikainput, this.getHandler(content), metadata, context); try { TimeLimiter.run(runner, parseTimeout, true); } catch (OutOfMemoryError o) { log.error("TikaExtractor.parse() - OutOfMemoryError: " + o.getMessage() + " for " + url + " in " + source); addExceptionMetadata(metadata, new Exception("OutOfMemoryError")); } catch (RuntimeException r) { log.error("TikaExtractor.parse() - RuntimeException: " + r.getMessage() + " for " + url + " in " + source); addExceptionMetadata(metadata, r); } Instrument.timeRel("WARCPayloadAnalyzers.analyze#tikasolrextract", "TikaExtractor.extract#parse", parseStart); // If there was a parse error, report it: String tikaException = metadata.get(TikaPayloadAnalyser.TIKA_PARSE_EXCEPTION); if (tikaException != null) { solr.addParseException(tikaException, new RuntimeException("Exception from Tika")); } final long extractStart = System.nanoTime(); // Copy the body text, forcing a UTF-8 encoding: String output = new String(content.toString().getBytes("UTF-8")); if (runner.complete || !output.equals("")) { if (output.length() > this.max_text_length) { output = output.substring(0, this.max_text_length); } log.debug("Extracted text from: " + url + " in " + source); log.debug("Extracted text: " + StringUtils.left(output, 300)); solr.setField(SolrFields.SOLR_EXTRACTED_TEXT, output); solr.setField(SolrFields.SOLR_EXTRACTED_TEXT_LENGTH, Integer.toString(output.length())); } else { //log.debug("Failed to extract any text from: "+url); } // Noisily report all metadata properties: /* * for( String m : metadata.names() ) { * log.info("For "+url.substring(url.length() - (int) * Math.pow(url.length(),0.85))+": "+m+" -> "+metadata.get(m)); } */ // Attempt to record all metadata discovered: if (this.extractAllMetadata) { for (String m : metadata.names()) { // Ignore these as they are not very interesting: if (Metadata.RESOURCE_NAME_KEY.equalsIgnoreCase(m) || "dc:title".equalsIgnoreCase(m) || "title".equalsIgnoreCase(m) || "description".equalsIgnoreCase(m) || "keywords".equalsIgnoreCase(m) || Metadata.CONTENT_ENCODING.equalsIgnoreCase(m) || Metadata.CONTENT_LOCATION.equalsIgnoreCase(m) || "ACTINICTITLE".equalsIgnoreCase(m) || Metadata.CONTENT_TYPE.equalsIgnoreCase(m)) { continue; } // Record in the document, but trim big ones: String value = metadata.get(m); if (value != null && value.length() > 100) { value = value.substring(0, 100); } solr.addField(SolrFields.SOLR_TIKA_METADATA, m + "=" + value); } } // Also Pick out particular metadata: String contentType = metadata.get(Metadata.CONTENT_TYPE); solr.addField(SolrFields.SOLR_CONTENT_TYPE, contentType); solr.addField(SolrFields.SOLR_TITLE, metadata.get(DublinCore.TITLE)); solr.addField(SolrFields.SOLR_DESCRIPTION, metadata.get(DublinCore.DESCRIPTION)); solr.addField(SolrFields.SOLR_KEYWORDS, metadata.get("keywords")); solr.addField(SolrFields.SOLR_AUTHOR, metadata.get(DublinCore.CREATOR)); solr.addField(SolrFields.CONTENT_ENCODING, metadata.get(Metadata.CONTENT_ENCODING)); // Parse out any embedded date that can act as a created/modified date. // I was not able find a single example where both created and modified where defined and different. I single field is sufficient. String date = null; if (metadata.get(Metadata.CREATION_DATE) != null) date = metadata.get(Metadata.CREATION_DATE); if (metadata.get(Metadata.DATE) != null) date = metadata.get(Metadata.DATE); if (metadata.get(Metadata.MODIFIED) != null) date = metadata.get(Metadata.MODIFIED); if (date != null) { DateTimeFormatter df = ISODateTimeFormat.dateTimeParser(); DateTime edate = null; try { edate = df.parseDateTime(date); } catch (IllegalArgumentException e) { log.error("Could not parse date: " + date + " from URL " + url + " in " + source); } if (edate == null) { Date javadate = Times.extractDate(date); if (javadate != null) edate = new org.joda.time.DateTime(javadate); } if (edate != null) { solr.addField(SolrFields.LAST_MODIFIED_YEAR, "" + edate.getYear()); DateTimeFormatter iso_df = ISODateTimeFormat.dateTimeNoMillis().withZone(DateTimeZone.UTC); // solr.getSolrDocument().setField(SolrFields.LAST_MODIFIED, // edate); solr.setField(SolrFields.LAST_MODIFIED, iso_df.print(edate)); } } // Also look to record the software identifiers: // Look for generic xmp:CreatorTool solr.addField(SolrFields.GENERATOR, metadata.get("xmp:CreatorTool")); // For PDF, support other metadata tags: //solr.addField(SolrFields.GENERATOR, metadata.get( "creator" )); // This appears to be dc:creator i.e. author. solr.addField(SolrFields.GENERATOR, metadata.get("producer")); solr.addField(SolrFields.GENERATOR, metadata.get(Metadata.SOFTWARE)); solr.addField(SolrFields.GENERATOR, metadata.get("software")); solr.addField(SolrFields.GENERATOR, metadata.get("Software")); solr.addField(SolrFields.GENERATOR, metadata.get("generator")); solr.addField(SolrFields.GENERATOR, metadata.get("Generator")); solr.addField(SolrFields.GENERATOR, metadata.get("ProgId")); //handle image EXIF metaformat String exifVersion = metadata.get("Exif Version"); if (exifVersion != null) { solr.addField(SolrFields.EXIF_VERSION, exifVersion); String exif_artist = metadata.get("Artist"); if (exif_artist != null) { // This is a better value for the author field // This potentially results in multiple author, which is valid solr.addField(SolrFields.SOLR_AUTHOR, exif_artist); } if (this.extractExifLocation) { String exif_latitude = metadata.get("GPS Latitude"); String exif_longitude = metadata.get("GPS Longitude"); if (exif_latitude != null && exif_longitude != null) { double latitude = DMS2DG(exif_latitude); double longitude = DMS2DG(exif_longitude); try { if (latitude != 0d && longitude != 0d) { // Sometimes they are defined but both 0 if (latitude <= 90 && latitude >= -90 && longitude <= 180 && longitude >= -180) { solr.addField(SolrFields.EXIF_LOCATION, latitude + "," + longitude); } else { log.warn( "invalid gsp exif information:" + exif_latitude + "," + exif_longitude); } } } catch (Exception e) { //Just ignore. No GPS data added to solr log.warn("error parsing exif gps data. latitude:" + exif_latitude + " longitude:" + exif_longitude); } } } } //End image exif metadata // Application ID, MS Office only AFAICT, and the VERSION is only doc String software = null; if (metadata.get(Metadata.APPLICATION_NAME) != null) software = metadata.get(Metadata.APPLICATION_NAME); if (metadata.get(Metadata.APPLICATION_VERSION) != null) software += " " + metadata.get(Metadata.APPLICATION_VERSION); // Images, e.g. JPEG and TIFF, can have 'Software', 'tiff:Software', // PNGs have a 'tEXt tEXtEntry: keyword=Software, value=GPL Ghostscript 8.71' String png_textentry = metadata.get("tEXt tEXtEntry"); if (png_textentry != null && png_textentry.contains("keyword=Software, value=")) software = png_textentry.replace("keyword=Software, value=", ""); /* Some JPEGs have this: Jpeg Comment: CREATOR: gd-jpeg v1.0 (using IJG JPEG v62), default quality comment: CREATOR: gd-jpeg v1.0 (using IJG JPEG v62), default quality */ if (software != null) { solr.addField(SolrFields.GENERATOR, software); } Instrument.timeRel("WARCPayloadAnalyzers.analyze#tikasolrextract", "TikaExtractor.extract#extract", extractStart); } catch (Exception e) { log.error("TikaExtractor.extract(): " + e.getMessage() + " for URL " + url + " in " + source); } // TODO: This should probably be wrapped in a method-spanning try-finally to guarantee close if (tikainput != null) { try { tikainput.close(); } catch (IOException e) { log.warn("Exception closing TikaInputStream. This leaves tmp-files: " + e.getMessage() + " for " + url + " in " + source); } } return solr; }
From source file:uk.bl.wa.solr.TikaExtractor.java
License:Open Source License
/** * /*from w w w . j a va 2s . c o m*/ * @param solr * @param is * @param url * @return * @throws IOException */ @SuppressWarnings("deprecation") public SolrRecord extract(SolrRecord solr, InputStream is, String url) throws IOException { // Set up the TikaInputStream: TikaInputStream tikainput = null; if (this.maxBytesToParser > 0) { tikainput = TikaInputStream .get(new BoundedInputStream(new CloseShieldInputStream(is), maxBytesToParser)); } else { tikainput = TikaInputStream.get(new CloseShieldInputStream(is)); } // Also pass URL as metadata to allow extension hints to work: Metadata metadata = new Metadata(); if (url != null) metadata.set(Metadata.RESOURCE_NAME_KEY, url); final long detectStart = System.nanoTime(); StringBuilder detected = new StringBuilder(); try { DetectRunner detect = new DetectRunner(tika, tikainput, detected, metadata); Thread detectThread = new Thread(detect, Long.toString(System.currentTimeMillis())); detectThread.start(); detectThread.join(10000L); detectThread.interrupt(); } catch (NoSuchFieldError e) { // TODO Is this an Apache POI version issue? log.error("Tika.detect(): " + e.getMessage()); addExceptionMetadata(metadata, new Exception("detect threw " + e.getClass().getCanonicalName())); } catch (Exception e) { log.error("Tika.detect(): " + e.getMessage()); addExceptionMetadata(metadata, e); } Instrument.timeRel("WARCPayloadAnalyzers.analyze#tikasolrextract", "TikaExtractor.extract#detect", detectStart); // Only proceed if we have a suitable type: if (!this.checkMime(detected.toString())) { if ("".equals(detected.toString())) { solr.addField(SolrFields.SOLR_CONTENT_TYPE, MediaType.APPLICATION_OCTET_STREAM.toString()); } else { solr.addField(SolrFields.SOLR_CONTENT_TYPE, detected.toString()); } return solr; } // Context ParseContext context = new ParseContext(); StringWriter content = new StringWriter(); // Override the recursive parsing: if (embedded == null) embedded = new NonRecursiveEmbeddedDocumentExtractor(context); context.set(EmbeddedDocumentExtractor.class, embedded); try { final long parseStart = System.nanoTime(); ParseRunner runner = new ParseRunner(tika.getParser(), tikainput, this.getHandler(content), metadata, context); Thread parseThread = new Thread(runner, Long.toString(System.currentTimeMillis())); try { parseThread.start(); parseThread.join(this.parseTimeout); parseThread.interrupt(); parseThread.join(this.parseTimeout); } catch (OutOfMemoryError o) { log.error("TikaExtractor.parse() - OutOfMemoryError: " + o.getMessage()); addExceptionMetadata(metadata, new Exception("OutOfMemoryError")); } catch (RuntimeException r) { log.error("TikaExtractor.parse() - RuntimeException: " + r.getMessage()); addExceptionMetadata(metadata, r); } Instrument.timeRel("WARCPayloadAnalyzers.analyze#tikasolrextract", "TikaExtractor.extract#parse", parseStart); // If there was a parse error, report it: solr.addField(SolrFields.PARSE_ERROR, metadata.get(TikaExtractor.TIKA_PARSE_EXCEPTION)); final long extractStart = System.nanoTime(); // Copy the body text, forcing a UTF-8 encoding: String output = new String(content.toString().getBytes("UTF-8")); if (runner.complete || !output.equals("")) { if (output.length() > this.max_text_length) { output = output.substring(0, this.max_text_length); } log.debug("Extracted text from: " + url); log.debug("Extracted text: " + StringUtils.left(output, 300)); solr.setField(SolrFields.SOLR_EXTRACTED_TEXT, output); solr.setField(SolrFields.SOLR_EXTRACTED_TEXT_LENGTH, Integer.toString(output.length())); } else { //log.debug("Failed to extract any text from: "+url); } // Noisily report all metadata properties: /* * for( String m : metadata.names() ) { * log.info("For "+url.substring(url.length() - (int) * Math.pow(url.length(),0.85))+": "+m+" -> "+metadata.get(m)); } */ // Attempt to record all metadata discovered: if (this.extractAllMetadata) { for (String m : metadata.names()) { // Ignore these as they are not very interesting: if (Metadata.RESOURCE_NAME_KEY.equalsIgnoreCase(m) || "dc:title".equalsIgnoreCase(m) || "title".equalsIgnoreCase(m) || "description".equalsIgnoreCase(m) || "keywords".equalsIgnoreCase(m) || Metadata.CONTENT_ENCODING.equalsIgnoreCase(m) || Metadata.CONTENT_LOCATION.equalsIgnoreCase(m) || "ACTINICTITLE".equalsIgnoreCase(m) || Metadata.CONTENT_TYPE.equalsIgnoreCase(m)) { continue; } // Record in the document, but trim big ones: String value = metadata.get(m); if (value != null && value.length() > 100) { value = value.substring(0, 100); } solr.addField(SolrFields.SOLR_TIKA_METADATA, m + "=" + value); } } // Also Pick out particular metadata: String contentType = metadata.get(Metadata.CONTENT_TYPE); solr.addField(SolrFields.SOLR_CONTENT_TYPE, contentType); solr.addField(SolrFields.SOLR_TITLE, metadata.get(DublinCore.TITLE)); solr.addField(SolrFields.SOLR_DESCRIPTION, metadata.get(DublinCore.DESCRIPTION)); solr.addField(SolrFields.SOLR_KEYWORDS, metadata.get("keywords")); solr.addField(SolrFields.SOLR_AUTHOR, metadata.get(DublinCore.CREATOR)); solr.addField(SolrFields.CONTENT_ENCODING, metadata.get(Metadata.CONTENT_ENCODING)); // Parse out any embedded date that can act as a created/modified date. String date = null; if (metadata.get(Metadata.CREATION_DATE) != null) date = metadata.get(Metadata.CREATION_DATE); if (metadata.get(Metadata.DATE) != null) date = metadata.get(Metadata.DATE); if (metadata.get(Metadata.MODIFIED) != null) date = metadata.get(Metadata.MODIFIED); if (date != null) { DateTimeFormatter df = ISODateTimeFormat.dateTimeParser(); DateTime edate = null; try { edate = df.parseDateTime(date); } catch (IllegalArgumentException e) { log.error("Could not parse: " + date); } if (edate == null) { Date javadate = Times.extractDate(date); if (javadate != null) edate = new org.joda.time.DateTime(javadate); } if (edate != null) { solr.addField(SolrFields.LAST_MODIFIED_YEAR, "" + edate.getYear()); DateTimeFormatter iso_df = ISODateTimeFormat.dateTimeNoMillis().withZone(DateTimeZone.UTC); // solr.getSolrDocument().setField(SolrFields.LAST_MODIFIED, // edate); solr.setField(SolrFields.LAST_MODIFIED, iso_df.print(edate)); } } // Also look to record the software identifiers: // Look for generic xmp:CreatorTool solr.addField(SolrFields.GENERATOR, metadata.get("xmp:CreatorTool")); // For PDF, support other metadata tags: //solr.addField(SolrFields.GENERATOR, metadata.get( "creator" )); // This appears to be dc:creator i.e. author. solr.addField(SolrFields.GENERATOR, metadata.get("producer")); solr.addField(SolrFields.GENERATOR, metadata.get(Metadata.SOFTWARE)); solr.addField(SolrFields.GENERATOR, metadata.get("generator")); solr.addField(SolrFields.GENERATOR, metadata.get("Software")); // Application ID, MS Office only AFAICT, and the VERSION is only doc String software = null; if (metadata.get(Metadata.APPLICATION_NAME) != null) software = metadata.get(Metadata.APPLICATION_NAME); if (metadata.get(Metadata.APPLICATION_VERSION) != null) software += " " + metadata.get(Metadata.APPLICATION_VERSION); // Images, e.g. JPEG and TIFF, can have 'Software', 'tiff:Software', // PNGs have a 'tEXt tEXtEntry: keyword=Software, value=GPL Ghostscript 8.71' String png_textentry = metadata.get("tEXt tEXtEntry"); if (png_textentry != null && png_textentry.contains("keyword=Software, value=")) software = png_textentry.replace("keyword=Software, value=", ""); /* Some JPEGs have this: Jpeg Comment: CREATOR: gd-jpeg v1.0 (using IJG JPEG v62), default quality comment: CREATOR: gd-jpeg v1.0 (using IJG JPEG v62), default quality */ if (software != null) { solr.addField(SolrFields.GENERATOR, software); } Instrument.timeRel("WARCPayloadAnalyzers.analyze#tikasolrextract", "TikaExtractor.extract#extract", extractStart); } catch (Exception e) { log.error("TikaExtractor.extract(): " + e.getMessage()); } // TODO: This should probably be wrapped in a method-spanning try-finally to guarantee close if (tikainput != null) { try { tikainput.close(); } catch (IOException e) { log.warn("Exception closing TikaInputStream. This leaves tmp-files: " + e.getMessage()); } } return solr; }
From source file:uk.co.visalia.brightpearl.apiclient.client.adaptors.DateTimeAdaptor.java
License:Apache License
@Override public DateTime read(JsonReader jsonReader) throws IOException { if (jsonReader.peek() == JsonToken.NULL) { jsonReader.nextNull();//from w w w . ja v a 2s .c o m return null; } String string = jsonReader.nextString(); return ISODateTimeFormat.dateTimeParser().withOffsetParsed().parseDateTime(string); }