Example usage for org.apache.lucene.document StoredField StoredField

List of usage examples for org.apache.lucene.document StoredField StoredField

Introduction

In this page you can find the example usage for org.apache.lucene.document StoredField StoredField.

Prototype

public StoredField(String name, double value) 

Source Link

Document

Create a stored-only field with the given double value.

Usage

From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java

License:Open Source License

public static Document addNumericField(Document doc, String propertyName, long propertyValue, boolean stored) {
    // StoredField is used if the property needs to be stored in the lucene document
    if (stored) {
        doc.add(new StoredField(propertyName, propertyValue));
    }/*from  w w  w  . j a va 2s.  co m*/

    // LongPoint adds an index field to the document that allows for efficient search
    // and range queries
    doc.add(new LongPoint(propertyName, propertyValue));

    // NumericDocValues allow for efficient group operations for a property.
    // TODO Investigate and revert code to use 'sort' to determine the type of DocValuesField
    doc.add(new NumericDocValuesField(propertyName, propertyValue));
    return doc;
}

From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java

License:Open Source License

public static Document addNumericField(Document doc, String propertyName, double propertyValue,
        boolean stored) {
    long longPropertyValue = NumericUtils.doubleToSortableLong(propertyValue);

    // StoredField is used if the property needs to be stored in the lucene document
    if (stored) {
        doc.add(new StoredField(propertyName, propertyValue));
    }//w  w  w . ja v  a2  s  .  c o  m

    // DoublePoint adds an index field to the document that allows for efficient search
    // and range queries
    doc.add(new DoublePoint(propertyName, propertyValue));

    // NumericDocValues allow for efficient group operations for a property.
    // TODO Investigate and revert code to use 'sort' to determine the type of DocValuesField
    doc.add(new NumericDocValuesField(propertyName, longPropertyValue));
    return doc;
}

From source file:com.vmware.xenon.services.common.LuceneIndexDocumentHelper.java

License:Open Source License

void addNumericField(String propertyName, long propertyValue, boolean isStored, boolean isCollectionItem,
        boolean sorted) {
    if (isStored) {
        Field field = isCollectionItem ? new StoredField(propertyName, propertyValue)
                : getAndSetStoredField(propertyName, propertyValue);
        this.doc.add(field);
    }// w w w .  j  av a  2 s.  com

    // LongPoint adds an index field to the document that allows for efficient search
    // and range queries
    if (isCollectionItem) {
        this.doc.add(new LongPoint(propertyName, propertyValue));
    } else {
        LongPoint lpField = this.longPointFields.computeIfAbsent(propertyName, (k) -> {
            return new LongPoint(propertyName, propertyValue);
        });
        lpField.setLongValue(propertyValue);
        this.doc.add(lpField);
    }

    // NumericDocValues allow for efficient group operations for a property.
    NumericDocValuesField ndField = getAndSetNumericField(propertyName, propertyValue, isCollectionItem);
    this.doc.add(ndField);

    if (sorted) {
        // special handling for groupBy queries, docValuesField can not be added twice
        // We suffix the property name with "_group", add a SortedDocValuesField
        Field sdField = getAndSetSortedStoredField(propertyName + GROUP_BY_PROPERTY_NAME_SUFFIX,
                Long.toString(propertyValue));
        this.doc.add(sdField);
    }
}

From source file:com.vmware.xenon.services.common.LuceneIndexDocumentHelper.java

License:Open Source License

private void addNumericField(String propertyName, double propertyValue, boolean stored,
        boolean isCollectionItem, boolean sorted) {
    long longPropertyValue = NumericUtils.doubleToSortableLong(propertyValue);

    if (stored) {
        Field field = isCollectionItem ? new StoredField(propertyName, propertyValue)
                : getAndSetStoredField(propertyName, propertyValue);
        this.doc.add(field);
    }/*  ww w. j ava  2 s  .  c o m*/

    // DoublePoint adds an index field to the document that allows for efficient search
    // and range queries
    if (isCollectionItem) {
        this.doc.add(new DoublePoint(propertyName, propertyValue));
    } else {
        DoublePoint dpField = this.doublePointFields.computeIfAbsent(propertyName, (k) -> {
            return new DoublePoint(propertyName, propertyValue);
        });
        dpField.setDoubleValue(propertyValue);
        this.doc.add(dpField);
    }

    NumericDocValuesField ndField = getAndSetNumericField(propertyName, longPropertyValue, isCollectionItem);
    this.doc.add(ndField);

    if (sorted) {
        // special handling for groupBy queries
        Field sdField = getAndSetSortedStoredField(propertyName + GROUP_BY_PROPERTY_NAME_SUFFIX,
                Double.toString(propertyValue));
        this.doc.add(sdField);
    }

}

From source file:com.vmware.xenon.services.common.LuceneIndexDocumentHelper.java

License:Open Source License

private Field getAndSetStoredField(String name, Long value) {
    Field f = this.storedFields.computeIfAbsent(name, (k) -> {
        return new StoredField(name, value);
    });/* ww  w  . j ava 2  s.co  m*/
    f.setLongValue(value);
    return f;
}

From source file:com.vmware.xenon.services.common.LuceneIndexDocumentHelper.java

License:Open Source License

private Field getAndSetStoredField(String name, Double value) {
    Field f = this.storedFields.computeIfAbsent(name, (k) -> {
        return new StoredField(name, value);
    });//w  w  w . j a  v  a2  s .  c o m
    f.setDoubleValue(value);
    return f;
}

From source file:de.carne.filescanner.swt.main.SearchIndex.java

License:Open Source License

private Document buildDocument(BytesRef resultKey, long resultEnd, String resultContent) {
    Document document = new Document();

    document.add(new SortedDocValuesField(FIELD_ID, resultKey));
    document.add(new StoredField(FIELD_KEY_STORED, resultKey));
    document.add(new StoredField(FIELD_END_STORED, resultEnd));
    document.add(new TextField(FIELD_CONTENT, resultContent, Store.NO));
    return document;
}

From source file:de.dfki.km.leech.parser.incremental.IncrementalCrawlingHistory.java

License:Open Source License

/**
 * Remarks a new data entity, together with the current time as 'last crawled/checked time'.
 * //from   w w w  .  ja va  2  s . c  o m
 * @param strDataEntityId an identifier for a data entity that is independent from the content of this entity. It is only for identifying the occurence, not to
 *            check whether it has changed (e.g. a filename)
 * @param strDataEntityContentFingerprint some fingerprint/identifier that gives the hint whether the content of the data entity has changed, e.g. the modifed date of
 *            a file
 * @param strMasterDataEntityId optional: an EntityId of another data entity that is our 'master' which means that when the master is updated with
 *            {@link #updateDataEntityLastCrawledTime(String)}, all associated slaves will be also updated. This is e.g. for the case when you are in a second run for
 *            RSS-File indexing, and leech recognizes that this file didn't changed. Now we don't want to go unnecessarily into the fil and mark each entry on it's
 *            own. We know no subentry has changed, and can immediately mark them as processed with {@link #updateDataEntityLastCrawledTime(String)} on the master
 *            dataEntityId, which is the one from the RSS file. Leave it null or empty in the case you don't need to use it.
 * 
 * @throws IOException
 * @throws CorruptIndexException
 */
public void addDataEntity(String strDataEntityId, String strDataEntityContentFingerprint,
        String strMasterDataEntityId) throws CorruptIndexException, IOException {

    Document doc = new Document();

    doc.add(new StringField(dataEntityId, strDataEntityId, Store.YES));
    doc.add(new StringField(dataEntityContentFingerprint, strDataEntityContentFingerprint, Store.YES));
    doc.add(new LongPoint(lastCrawledTime, System.currentTimeMillis()));
    doc.add(new StoredField(lastCrawledTime, System.currentTimeMillis()));
    if (!StringUtils.nullOrWhitespace(strMasterDataEntityId))
        doc.add(new StringField(masterDataEntityId, strMasterDataEntityId, Store.YES));

    m_indexWriter.addDocument(doc);

}

From source file:de.dfki.km.leech.parser.incremental.IncrementalCrawlingHistory.java

License:Open Source License

/**
 * Updates a whole data entity - same as addDataEntity, but removes a former entry before storing the new one
 * /*from  ww w  . ja  va  2s  .  c o m*/
 * @param strDataEntityId an identifier for a data entity that is independent from the content of this entity. It is only for identifying the occurence, not to
 *            check whether it has changed (e.g. a filename)
 * @param strDataEntityContentFingerprint some fingerprint/identifier that gives the hint whether the content of the data entity has changed, e.g. the modifed date of
 *            a file
 * @param strMasterDataEntityId optional: an EntityId of another data entity that is our 'master' which means that when the master is updated with
 *            {@link #updateDataEntityLastCrawledTime(String)}, all associated slaves will be also updated. This is e.g. for the case when you are in a second run for
 *            RSS-File indexing, and leech recognizes that this file didn't changed. Now we don't want to go unnecessarily into the fil and mark each entry on it's
 *            own. We know no subentry has changed, and can immediately mark them as processed with {@link #updateDataEntityLastCrawledTime(String)} on the master
 *            dataEntityId, which is the one from the RSS file. Leave it null or empty in the case you don't need to use it.
 * 
 * @throws IOException
 * @throws CorruptIndexException
 */
public void updateDataEntity(String strDataEntityId, String strDataEntityContentFingerprint,
        String strMasterDataEntityId) throws CorruptIndexException, IOException {

    Term termId = new Term(dataEntityId, strDataEntityId);

    Document doc = new Document();

    doc.add(new StringField(dataEntityId, strDataEntityId, Store.YES));
    doc.add(new StringField(dataEntityContentFingerprint, strDataEntityContentFingerprint, Store.YES));
    doc.add(new LongPoint(lastCrawledTime, System.currentTimeMillis()));
    doc.add(new StoredField(lastCrawledTime, System.currentTimeMillis()));
    if (!StringUtils.nullOrWhitespace(strMasterDataEntityId))
        doc.add(new StringField(masterDataEntityId, strMasterDataEntityId, Store.YES));

    m_indexWriter.updateDocument(termId, doc);

}

From source file:de.dfki.km.leech.parser.incremental.IncrementalCrawlingHistory.java

License:Open Source License

/**
 * Sets a data entities 'last crawled/checked time' entry to the current time. In the case this data entity is a master entity, all slave documents will be updated
 * also. You can set an entity as a master entity with {@link #addDataEntity(String, String, String)} or {@link #updateDataEntity(String, String, String)}
 * /*  w  w w .  j  av  a 2 s. c  om*/
 * @param strDataEntityId the data entity which is finally checked/crawled
 * 
 * @throws IOException
 * @throws CorruptIndexException
 */
public void updateDataEntityLastCrawledTime(String strDataEntityId) throws CorruptIndexException, IOException {

    Term termId = new Term(dataEntityId, strDataEntityId);

    refreshIndexReaderz();
    TopDocs topDocs = m_indexSearcher.search(new TermQuery(termId), 1);

    if (topDocs.totalHits == 0)
        throw new IllegalStateException("there has to be an data entry with Id " + strDataEntityId
                + " for updating. Nothing was found.");

    long lCurrentTime = System.currentTimeMillis();

    Document doc = m_indexReader.document(topDocs.scoreDocs[0].doc);

    doc.removeFields(lastCrawledTime);
    doc.add(new LongPoint(lastCrawledTime, lCurrentTime));
    doc.add(new StoredField(lastCrawledTime, lCurrentTime));

    m_indexWriter.updateDocument(termId, doc);

    // wenn das Teil eine MasterDataEntity ist, dann mssen alle assoziierten Sklaven auch noch aktualisiert werden

    termId = new Term(masterDataEntityId, strDataEntityId);

    topDocs = m_indexSearcher.search(new TermQuery(termId), Integer.MAX_VALUE);

    for (int i = 0; i < topDocs.scoreDocs.length; i++) {

        Document slaveDoc = m_indexReader.document(topDocs.scoreDocs[i].doc);

        slaveDoc.removeFields(lastCrawledTime);
        slaveDoc.add(new LongPoint(lastCrawledTime, lCurrentTime));
        slaveDoc.add(new StoredField(lastCrawledTime, lCurrentTime));

        m_indexWriter.updateDocument(termId, doc);
    }

}