Example usage for org.apache.lucene.document StoredField StoredField

Introduction

In this page you can find the example usage for org.apache.lucene.document StoredField StoredField.

Prototype

public StoredField(String name, double value)

Source Link

Document

Create a stored-only field with the given double value.

Usage

From source file:BlockBuilding.MemoryBased.SchemaBased.AbstractSchemaBasedMethod.java

License:Open Source License

@Override
protected void indexEntities(IndexWriter index, List<EntityProfile> entities) {
    System.out.println("Indexing " + entities.size() + " entities...");
    try {//from w  w  w.  j  a va2s.com
        int counter = 0;
        for (EntityProfile profile : entities) {
            AbstractProfile aProfile = getAbstractProfile(profile);
            Document doc = new Document();
            doc.add(new StoredField(DOC_ID, counter));
            for (int keyId : blockingKeys) {
                for (String key : getBlockingKeys(keyId, aProfile)) {
                    if (0 < key.trim().length()) {
                        doc.add(new StringField(VALUE_LABEL, key.trim(), Field.Store.YES));
                    }
                }
            }
            index.addDocument(doc);
            counter++;
        }
    } catch (IOException ex) {
        ex.printStackTrace();
    }
}

From source file:br.bireme.ngrams.NGrams.java

private static Document createDocument(final Map<String, br.bireme.ngrams.Field> fields, final String[] flds)
        throws IOException {
    assert fields != null;
    assert flds != null;

    Document doc = new Document();
    String dbName = null;/*from  w w  w .j a v a 2s.com*/
    String id = null;

    final Set<String> names = new HashSet<>();
    for (br.bireme.ngrams.Field fld : fields.values()) {
        final String content = flds[fld.pos];
        final String fname = fld.name;
        if (fld instanceof IndexedNGramField) {
            if (names.contains(fname)) {
                doc = null;
                break;
            }
            final String ncontent = Tools.limitSize(Tools.normalize(content, OCC_SEPARATOR), MAX_NG_TEXT_SIZE)
                    .trim();
            doc.add(new TextField(fname, ncontent, Field.Store.YES));
            doc.add(new StoredField(fname + NOT_NORMALIZED_FLD, content.trim()));
        } else if (fld instanceof DatabaseField) {
            if (names.contains(fname)) {
                doc = null;
                break;
            }
            dbName = Tools.limitSize(Tools.normalize(content, OCC_SEPARATOR), MAX_NG_TEXT_SIZE).trim();
            doc.add(new StringField(fname, dbName, Field.Store.YES));
            doc.add(new StoredField(fname + NOT_NORMALIZED_FLD, content.trim()));
        } else if (fld instanceof IdField) {
            if (names.contains(fname)) {
                doc = null;
                break;
            }
            id = Tools.limitSize(Tools.normalize(content, OCC_SEPARATOR), MAX_NG_TEXT_SIZE).trim();
            doc.add(new StringField(fname, id, Field.Store.YES));
            doc.add(new StoredField(fname + NOT_NORMALIZED_FLD, content.trim()));
        } else {
            final String ncontent = Tools.limitSize(Tools.normalize(content, OCC_SEPARATOR), MAX_NG_TEXT_SIZE)
                    .trim();
            doc.add(new StoredField(fname, ncontent));
            doc.add(new StoredField(fname + NOT_NORMALIZED_FLD, content.trim()));
        }
        names.add(fname);
    }
    // Add field to avoid duplicated documents in the index
    if (dbName == null) {
        throw new IOException("dbName");
    }
    if (id == null) {
        throw new IOException("id");
    }
    if (doc != null) {
        doc.add(new StringField("db_id", Tools.normalize(dbName + "_" + id, OCC_SEPARATOR), Store.YES));
    }

    return doc;
}

From source file:cn.hbu.cs.esearch.index.LuceneIndexDataLoader.java

License:Apache License

/**
 * @param events incoming events sorted by version number
 *               <br>every event in the events collection must be non-null
 *
 * @see cn.hbu.cs.esearch.consumer.DataConsumer#consume(java.util.Collection)
 */// w  w w .  j a  v  a  2 s.  c o  m
@Override
public void consume(Collection<DataEvent<EsearchIndexable>> events) throws EsearchException {

    if (events == null) {
        return;
    }
    int eventCount = events.size();
    if (eventCount == 0) {
        return;
    }
    BaseSearchIndex<R> idx = getSearchIndex();

    if (idx == null) {
        throw new EsearchException("trying to consume to null index");
    }
    Long2ObjectMap<List<EsearchIndexable.IndexingReq>> addList = new Long2ObjectOpenHashMap<List<EsearchIndexable.IndexingReq>>();
    String version = idx.getVersion(); // current version

    LongSet delSet = new LongOpenHashSet();

    try {
        for (DataEvent<EsearchIndexable> evt : events) {
            if (evt == null) {
                continue;
            }
            version = version == null ? evt.getVersion()
                    : (_versionComparator.compare(version, evt.getVersion()) < 0 ? evt.getVersion() : version);

            // interpret and get get the indexable instance
            EsearchIndexable indexable = evt.getData();
            if (indexable == null || indexable.isSkip()) {
                continue;
            }

            long uid = indexable.getUID();
            delSet.add(uid);
            addList.remove(uid);
            if (!(indexable.isDeleted() || evt.isDelete())) // update event
            {
                try {
                    EsearchIndexable.IndexingReq[] reqs = indexable.buildIndexingReqs();
                    for (EsearchIndexable.IndexingReq req : reqs) {
                        if (req != null) // if doc is provided, interpret as
                        // a delete, e.g. update with
                        // nothing
                        {
                            Document doc = req.getDocument();
                            if (doc != null) {
                                EsearchSegmentReader.fillDocumentID(doc, uid);
                                if (indexable.isStorable()) {
                                    byte[] bytes = indexable.getStoreValue();
                                    if (bytes != null) {
                                        doc.add(new StoredField(AbstractEsearchIndexable.DOCUMENT_STORE_FIELD,
                                                bytes));
                                    }
                                }
                            }
                            // add to the insert list
                            List<EsearchIndexable.IndexingReq> docList = addList.get(uid);
                            if (docList == null) {
                                docList = new LinkedList<EsearchIndexable.IndexingReq>();
                                addList.put(uid, docList);
                            }
                            docList.add(req);
                        }
                    }
                } catch (Exception ex) {
                    LOGGER.error("Couldn't index the event with uid - " + uid, ex);
                }
            }
        }

        List<EsearchIndexable.IndexingReq> docList = new ArrayList<EsearchIndexable.IndexingReq>(
                addList.size());
        for (List<EsearchIndexable.IndexingReq> tmpList : addList.values()) {
            docList.addAll(tmpList);
        }

        purgeDocuments();
        idx.updateIndex(delSet, docList, _analyzer, _similarity);
        propagateDeletes(delSet);
        synchronized (_idxMgr) {
            idx.refresh();
            commitPropagatedDeletes();
        }
    } catch (IOException ioe) {
        EsearchHealth.setFatal();
        LOGGER.error("Problem indexing batch: " + ioe.getMessage(), ioe);
    } finally {
        try {
            if (idx != null) {
                idx.setVersion(version);
                idx.incrementEventCount(eventCount);
            }
        } catch (Exception e) // catch all exceptions, or it would screw
        // up jobs framework
        {
            LOGGER.warn(e.getMessage());
        } finally {
            if (idx instanceof DiskSearchIndex<?>) {
                LOGGER.info("disk indexing requests flushed.");
            }
        }
    }
}

From source file:cn.hbu.cs.esearch.store.LuceneStore.java

License:Apache License

@Override
protected void persist(long uid, byte[] data) throws IOException {
    Document doc = new Document();
    doc.add(new StoredField(field, data));
    EsearchSegmentReader.fillDocumentID(doc, uid);
    indexWriter.addDocument(doc);/*from   www  .  ja va  2  s . com*/
}

From source file:com.b2international.index.lucene.FloatIndexField.java

License:Apache License

@Override
public void addTo(Document doc, Float value) {
    super.addTo(doc, value);
    if (Store.YES == isStored()) {
        doc.add(new StoredField(fieldName(), value));
    }//  w  w  w. j  a  v a  2 s. c o  m
}

From source file:com.b2international.index.lucene.IntIndexField.java

License:Apache License

@Override
public void addTo(Document doc, Integer value) {
    super.addTo(doc, value);
    if (Store.YES == isStored()) {
        doc.add(new StoredField(fieldName(), value));
    }/*from  www .jav  a  2  s.co m*/
}

From source file:com.b2international.index.lucene.LongIndexField.java

License:Apache License

@Override
public void addTo(Document doc, Long value) {
    super.addTo(doc, value);
    if (Store.YES == isStored()) {
        doc.add(new StoredField(fieldName(), value));
    }// ww w  . j  a  v a2s .c  o m
}

From source file:com.b2international.index.lucene.StoredOnlyIndexField.java

License:Apache License

@Override
public void addTo(Document doc, T value) {
    if (value instanceof String) {
        doc.add(new StoredField(fieldName(), (String) value));
    } else if (value instanceof Long) {
        doc.add(new StoredField(fieldName(), (Long) value));
    } else if (value instanceof Integer) {
        doc.add(new StoredField(fieldName(), (Integer) value));
    } else if (value instanceof Float) {
        doc.add(new StoredField(fieldName(), (Float) value));
    }/*from  w  w  w . j a  v a2s. co m*/
}

From source file:com.berico.clavin.index.IndexDirectoryBuilder.java

License:Apache License

/**
 * Builds a Lucene document to be added to the index based on a
 * specified name for the location and the corresponding
 * {@link GeoName} object./*w w  w  .j a  v  a  2 s.c  o  m*/
 * 
 * @param name         name to serve as index key
 * @param geonameEntry   string from GeoNames gazetteer
 * @param geonameID      unique identifier (for quick look-up)
 * @param population   number of inhabitants (used for scoring)
 * @return
 */
private static Document buildDoc(String name, String geonameEntry, int geonameID, Long population) {

    // in case you're wondering, yes, this is a non-standard use of
    // the Lucene Document construct
    Document doc = new Document();

    // this is essentially the key we'll try to match location
    // names against
    doc.add(new TextField("indexName", name, Field.Store.YES));

    // this is the payload we'll return when matching location
    // names to gazetteer records
    doc.add(new StoredField("geoname", geonameEntry));

    // TODO: use geonameID to link administrative subdivisions to
    //       each other
    doc.add(new IntField("geonameID", geonameID, Field.Store.YES));

    // we'll initially sort match results based on population
    doc.add(new LongField("population", population, Field.Store.YES));

    logger.debug("Adding to index: " + name);

    return doc;
}

From source file:com.bericotech.clavin.index.IndexDirectoryBuilder.java

License:Apache License

/**
 * Builds a set of Lucene documents for the provided GeoName, indexing
 * each using all available names and storing the entire ancestry path
 * for each GeoName in the index.  See {@link IndexField} for descriptions
 * of the fields indexed for each document.
 *
 * @param geoName       the GeoName to index
 * @throws IOException  if an error occurs while indexing
 *//*  w  w  w.  j  ava 2 s  . co m*/
private void indexGeoName(final GeoName geoName) throws IOException {
    indexCount++;
    // find all unique names for this GeoName
    String nm = geoName.getName();
    String asciiNm = geoName.getAsciiName();
    Set<String> names = new HashSet<String>();
    names.add(nm);
    names.add(asciiNm);
    names.addAll(geoName.getAlternateNames());
    // if this is a top-level administrative division, add its primary and alternate country codes
    // if they are not already found in the name or alternate names
    if (geoName.isTopLevelAdminDivision()) {
        if (geoName.getPrimaryCountryCode() != null) {
            names.add(geoName.getPrimaryCountryCode().name());
        }
        for (CountryCode cc : geoName.getAlternateCountryCodes()) {
            names.add(cc.name());
        }
    }
    AlternateName preferredName = alternateNameMap.get(geoName.getGeonameID());
    // ensure preferred name is found in alternate names
    if (preferredName != null) {
        names.add(preferredName.name);
    }
    names.remove(null);
    names.remove("");

    // reuse a single Document and field instances
    Document doc = new Document();
    doc.add(new StoredField(GEONAME.key(),
            fullAncestry ? geoName.getGazetteerRecordWithAncestry() : geoName.getGazetteerRecord()));
    doc.add(new IntField(GEONAME_ID.key(), geoName.getGeonameID(), Field.Store.YES));
    // if the alternate names file was loaded and we found a preferred name for this GeoName, store it
    if (preferredName != null) {
        doc.add(new StoredField(PREFERRED_NAME.key(), preferredName.name));
    }
    // index the direct parent ID in the PARENT_ID field
    GeoName parent = geoName.getParent();
    if (parent != null) {
        doc.add(new IntField(PARENT_ID.key(), parent.getGeonameID(), Field.Store.YES));
    }
    // index all ancestor IDs in the ANCESTOR_IDS field; this is a secondary field
    // so it can be used to restrict searches and PARENT_ID can be used for ancestor
    // resolution
    while (parent != null) {
        doc.add(new IntField(ANCESTOR_IDS.key(), parent.getGeonameID(), Field.Store.YES));
        parent = parent.getParent();
    }
    doc.add(new LongField(POPULATION.key(), geoName.getPopulation(), Field.Store.YES));
    // set up sort field based on population and geographic feature type
    if (geoName.getFeatureClass().equals(FeatureClass.P) || geoName.getFeatureCode().name().startsWith("PCL")) {
        if (geoName.getGeonameID() != 2643741) // todo: temporary hack until GeoNames.org fixes the population for City of London
            // boost cities and countries when sorting results by population
            doc.add(new LongField(SORT_POP.key(), geoName.getPopulation() * 11, Field.Store.YES));
    } else {
        // don't boost anything else, because people rarely talk about other stuff
        // (e.g., Washington State's population is more than 10x that of Washington, DC
        // but Washington, DC is mentioned far more frequently than Washington State)
        doc.add(new LongField(SORT_POP.key(), geoName.getPopulation(), Field.Store.YES));
    }
    doc.add(new IntField(HISTORICAL.key(),
            IndexField.getBooleanIndexValue(geoName.getFeatureCode().isHistorical()), Field.Store.NO));
    doc.add(new StringField(FEATURE_CODE.key(), geoName.getFeatureCode().name(), Field.Store.NO));

    // create a unique Document for each name of this GeoName
    TextField nameField = new TextField(INDEX_NAME.key(), "", Field.Store.YES);
    doc.add(nameField);
    for (String name : names) {
        nameField.setStringValue(name);
        indexWriter.addDocument(doc);
    }
}