List of usage examples for org.apache.lucene.document StoredField StoredField
public StoredField(String name, double value)
From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java
License:Open Source License
public static Document addNumericField(Document doc, String propertyName, long propertyValue, boolean stored) { // StoredField is used if the property needs to be stored in the lucene document if (stored) { doc.add(new StoredField(propertyName, propertyValue)); }/*from w w w . j a va 2s. co m*/ // LongPoint adds an index field to the document that allows for efficient search // and range queries doc.add(new LongPoint(propertyName, propertyValue)); // NumericDocValues allow for efficient group operations for a property. // TODO Investigate and revert code to use 'sort' to determine the type of DocValuesField doc.add(new NumericDocValuesField(propertyName, propertyValue)); return doc; }
From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java
License:Open Source License
public static Document addNumericField(Document doc, String propertyName, double propertyValue, boolean stored) { long longPropertyValue = NumericUtils.doubleToSortableLong(propertyValue); // StoredField is used if the property needs to be stored in the lucene document if (stored) { doc.add(new StoredField(propertyName, propertyValue)); }//w w w . ja v a2 s . c o m // DoublePoint adds an index field to the document that allows for efficient search // and range queries doc.add(new DoublePoint(propertyName, propertyValue)); // NumericDocValues allow for efficient group operations for a property. // TODO Investigate and revert code to use 'sort' to determine the type of DocValuesField doc.add(new NumericDocValuesField(propertyName, longPropertyValue)); return doc; }
From source file:com.vmware.xenon.services.common.LuceneIndexDocumentHelper.java
License:Open Source License
void addNumericField(String propertyName, long propertyValue, boolean isStored, boolean isCollectionItem, boolean sorted) { if (isStored) { Field field = isCollectionItem ? new StoredField(propertyName, propertyValue) : getAndSetStoredField(propertyName, propertyValue); this.doc.add(field); }// w w w . j av a 2 s. com // LongPoint adds an index field to the document that allows for efficient search // and range queries if (isCollectionItem) { this.doc.add(new LongPoint(propertyName, propertyValue)); } else { LongPoint lpField = this.longPointFields.computeIfAbsent(propertyName, (k) -> { return new LongPoint(propertyName, propertyValue); }); lpField.setLongValue(propertyValue); this.doc.add(lpField); } // NumericDocValues allow for efficient group operations for a property. NumericDocValuesField ndField = getAndSetNumericField(propertyName, propertyValue, isCollectionItem); this.doc.add(ndField); if (sorted) { // special handling for groupBy queries, docValuesField can not be added twice // We suffix the property name with "_group", add a SortedDocValuesField Field sdField = getAndSetSortedStoredField(propertyName + GROUP_BY_PROPERTY_NAME_SUFFIX, Long.toString(propertyValue)); this.doc.add(sdField); } }
From source file:com.vmware.xenon.services.common.LuceneIndexDocumentHelper.java
License:Open Source License
private void addNumericField(String propertyName, double propertyValue, boolean stored, boolean isCollectionItem, boolean sorted) { long longPropertyValue = NumericUtils.doubleToSortableLong(propertyValue); if (stored) { Field field = isCollectionItem ? new StoredField(propertyName, propertyValue) : getAndSetStoredField(propertyName, propertyValue); this.doc.add(field); }/* ww w. j ava 2 s . c o m*/ // DoublePoint adds an index field to the document that allows for efficient search // and range queries if (isCollectionItem) { this.doc.add(new DoublePoint(propertyName, propertyValue)); } else { DoublePoint dpField = this.doublePointFields.computeIfAbsent(propertyName, (k) -> { return new DoublePoint(propertyName, propertyValue); }); dpField.setDoubleValue(propertyValue); this.doc.add(dpField); } NumericDocValuesField ndField = getAndSetNumericField(propertyName, longPropertyValue, isCollectionItem); this.doc.add(ndField); if (sorted) { // special handling for groupBy queries Field sdField = getAndSetSortedStoredField(propertyName + GROUP_BY_PROPERTY_NAME_SUFFIX, Double.toString(propertyValue)); this.doc.add(sdField); } }
From source file:com.vmware.xenon.services.common.LuceneIndexDocumentHelper.java
License:Open Source License
private Field getAndSetStoredField(String name, Long value) { Field f = this.storedFields.computeIfAbsent(name, (k) -> { return new StoredField(name, value); });/* ww w . j ava 2 s.co m*/ f.setLongValue(value); return f; }
From source file:com.vmware.xenon.services.common.LuceneIndexDocumentHelper.java
License:Open Source License
private Field getAndSetStoredField(String name, Double value) { Field f = this.storedFields.computeIfAbsent(name, (k) -> { return new StoredField(name, value); });//w w w . j a v a2 s . c o m f.setDoubleValue(value); return f; }
From source file:de.carne.filescanner.swt.main.SearchIndex.java
License:Open Source License
private Document buildDocument(BytesRef resultKey, long resultEnd, String resultContent) { Document document = new Document(); document.add(new SortedDocValuesField(FIELD_ID, resultKey)); document.add(new StoredField(FIELD_KEY_STORED, resultKey)); document.add(new StoredField(FIELD_END_STORED, resultEnd)); document.add(new TextField(FIELD_CONTENT, resultContent, Store.NO)); return document; }
From source file:de.dfki.km.leech.parser.incremental.IncrementalCrawlingHistory.java
License:Open Source License
/** * Remarks a new data entity, together with the current time as 'last crawled/checked time'. * //from w w w . ja va 2 s . c o m * @param strDataEntityId an identifier for a data entity that is independent from the content of this entity. It is only for identifying the occurence, not to * check whether it has changed (e.g. a filename) * @param strDataEntityContentFingerprint some fingerprint/identifier that gives the hint whether the content of the data entity has changed, e.g. the modifed date of * a file * @param strMasterDataEntityId optional: an EntityId of another data entity that is our 'master' which means that when the master is updated with * {@link #updateDataEntityLastCrawledTime(String)}, all associated slaves will be also updated. This is e.g. for the case when you are in a second run for * RSS-File indexing, and leech recognizes that this file didn't changed. Now we don't want to go unnecessarily into the fil and mark each entry on it's * own. We know no subentry has changed, and can immediately mark them as processed with {@link #updateDataEntityLastCrawledTime(String)} on the master * dataEntityId, which is the one from the RSS file. Leave it null or empty in the case you don't need to use it. * * @throws IOException * @throws CorruptIndexException */ public void addDataEntity(String strDataEntityId, String strDataEntityContentFingerprint, String strMasterDataEntityId) throws CorruptIndexException, IOException { Document doc = new Document(); doc.add(new StringField(dataEntityId, strDataEntityId, Store.YES)); doc.add(new StringField(dataEntityContentFingerprint, strDataEntityContentFingerprint, Store.YES)); doc.add(new LongPoint(lastCrawledTime, System.currentTimeMillis())); doc.add(new StoredField(lastCrawledTime, System.currentTimeMillis())); if (!StringUtils.nullOrWhitespace(strMasterDataEntityId)) doc.add(new StringField(masterDataEntityId, strMasterDataEntityId, Store.YES)); m_indexWriter.addDocument(doc); }
From source file:de.dfki.km.leech.parser.incremental.IncrementalCrawlingHistory.java
License:Open Source License
/** * Updates a whole data entity - same as addDataEntity, but removes a former entry before storing the new one * /*from ww w . ja va 2s . c o m*/ * @param strDataEntityId an identifier for a data entity that is independent from the content of this entity. It is only for identifying the occurence, not to * check whether it has changed (e.g. a filename) * @param strDataEntityContentFingerprint some fingerprint/identifier that gives the hint whether the content of the data entity has changed, e.g. the modifed date of * a file * @param strMasterDataEntityId optional: an EntityId of another data entity that is our 'master' which means that when the master is updated with * {@link #updateDataEntityLastCrawledTime(String)}, all associated slaves will be also updated. This is e.g. for the case when you are in a second run for * RSS-File indexing, and leech recognizes that this file didn't changed. Now we don't want to go unnecessarily into the fil and mark each entry on it's * own. We know no subentry has changed, and can immediately mark them as processed with {@link #updateDataEntityLastCrawledTime(String)} on the master * dataEntityId, which is the one from the RSS file. Leave it null or empty in the case you don't need to use it. * * @throws IOException * @throws CorruptIndexException */ public void updateDataEntity(String strDataEntityId, String strDataEntityContentFingerprint, String strMasterDataEntityId) throws CorruptIndexException, IOException { Term termId = new Term(dataEntityId, strDataEntityId); Document doc = new Document(); doc.add(new StringField(dataEntityId, strDataEntityId, Store.YES)); doc.add(new StringField(dataEntityContentFingerprint, strDataEntityContentFingerprint, Store.YES)); doc.add(new LongPoint(lastCrawledTime, System.currentTimeMillis())); doc.add(new StoredField(lastCrawledTime, System.currentTimeMillis())); if (!StringUtils.nullOrWhitespace(strMasterDataEntityId)) doc.add(new StringField(masterDataEntityId, strMasterDataEntityId, Store.YES)); m_indexWriter.updateDocument(termId, doc); }
From source file:de.dfki.km.leech.parser.incremental.IncrementalCrawlingHistory.java
License:Open Source License
/** * Sets a data entities 'last crawled/checked time' entry to the current time. In the case this data entity is a master entity, all slave documents will be updated * also. You can set an entity as a master entity with {@link #addDataEntity(String, String, String)} or {@link #updateDataEntity(String, String, String)} * /* w w w . j av a 2 s. c om*/ * @param strDataEntityId the data entity which is finally checked/crawled * * @throws IOException * @throws CorruptIndexException */ public void updateDataEntityLastCrawledTime(String strDataEntityId) throws CorruptIndexException, IOException { Term termId = new Term(dataEntityId, strDataEntityId); refreshIndexReaderz(); TopDocs topDocs = m_indexSearcher.search(new TermQuery(termId), 1); if (topDocs.totalHits == 0) throw new IllegalStateException("there has to be an data entry with Id " + strDataEntityId + " for updating. Nothing was found."); long lCurrentTime = System.currentTimeMillis(); Document doc = m_indexReader.document(topDocs.scoreDocs[0].doc); doc.removeFields(lastCrawledTime); doc.add(new LongPoint(lastCrawledTime, lCurrentTime)); doc.add(new StoredField(lastCrawledTime, lCurrentTime)); m_indexWriter.updateDocument(termId, doc); // wenn das Teil eine MasterDataEntity ist, dann mssen alle assoziierten Sklaven auch noch aktualisiert werden termId = new Term(masterDataEntityId, strDataEntityId); topDocs = m_indexSearcher.search(new TermQuery(termId), Integer.MAX_VALUE); for (int i = 0; i < topDocs.scoreDocs.length; i++) { Document slaveDoc = m_indexReader.document(topDocs.scoreDocs[i].doc); slaveDoc.removeFields(lastCrawledTime); slaveDoc.add(new LongPoint(lastCrawledTime, lCurrentTime)); slaveDoc.add(new StoredField(lastCrawledTime, lCurrentTime)); m_indexWriter.updateDocument(termId, doc); } }