Example usage for org.apache.lucene.document StoredField StoredField

List of usage examples for org.apache.lucene.document StoredField StoredField

Introduction

In this page you can find the example usage for org.apache.lucene.document StoredField StoredField.

Prototype

public StoredField(String name, double value) 

Source Link

Document

Create a stored-only field with the given double value.

Usage

From source file:BlockBuilding.MemoryBased.SchemaBased.AbstractSchemaBasedMethod.java

License:Open Source License

@Override
protected void indexEntities(IndexWriter index, List<EntityProfile> entities) {
    System.out.println("Indexing " + entities.size() + " entities...");
    try {//from w  w  w.  j  a va2s.com
        int counter = 0;
        for (EntityProfile profile : entities) {
            AbstractProfile aProfile = getAbstractProfile(profile);
            Document doc = new Document();
            doc.add(new StoredField(DOC_ID, counter));
            for (int keyId : blockingKeys) {
                for (String key : getBlockingKeys(keyId, aProfile)) {
                    if (0 < key.trim().length()) {
                        doc.add(new StringField(VALUE_LABEL, key.trim(), Field.Store.YES));
                    }
                }
            }
            index.addDocument(doc);
            counter++;
        }
    } catch (IOException ex) {
        ex.printStackTrace();
    }
}

From source file:br.bireme.ngrams.NGrams.java

private static Document createDocument(final Map<String, br.bireme.ngrams.Field> fields, final String[] flds)
        throws IOException {
    assert fields != null;
    assert flds != null;

    Document doc = new Document();
    String dbName = null;/*from  w w  w .j a v a 2s.com*/
    String id = null;

    final Set<String> names = new HashSet<>();
    for (br.bireme.ngrams.Field fld : fields.values()) {
        final String content = flds[fld.pos];
        final String fname = fld.name;
        if (fld instanceof IndexedNGramField) {
            if (names.contains(fname)) {
                doc = null;
                break;
            }
            final String ncontent = Tools.limitSize(Tools.normalize(content, OCC_SEPARATOR), MAX_NG_TEXT_SIZE)
                    .trim();
            doc.add(new TextField(fname, ncontent, Field.Store.YES));
            doc.add(new StoredField(fname + NOT_NORMALIZED_FLD, content.trim()));
        } else if (fld instanceof DatabaseField) {
            if (names.contains(fname)) {
                doc = null;
                break;
            }
            dbName = Tools.limitSize(Tools.normalize(content, OCC_SEPARATOR), MAX_NG_TEXT_SIZE).trim();
            doc.add(new StringField(fname, dbName, Field.Store.YES));
            doc.add(new StoredField(fname + NOT_NORMALIZED_FLD, content.trim()));
        } else if (fld instanceof IdField) {
            if (names.contains(fname)) {
                doc = null;
                break;
            }
            id = Tools.limitSize(Tools.normalize(content, OCC_SEPARATOR), MAX_NG_TEXT_SIZE).trim();
            doc.add(new StringField(fname, id, Field.Store.YES));
            doc.add(new StoredField(fname + NOT_NORMALIZED_FLD, content.trim()));
        } else {
            final String ncontent = Tools.limitSize(Tools.normalize(content, OCC_SEPARATOR), MAX_NG_TEXT_SIZE)
                    .trim();
            doc.add(new StoredField(fname, ncontent));
            doc.add(new StoredField(fname + NOT_NORMALIZED_FLD, content.trim()));
        }
        names.add(fname);
    }
    // Add field to avoid duplicated documents in the index
    if (dbName == null) {
        throw new IOException("dbName");
    }
    if (id == null) {
        throw new IOException("id");
    }
    if (doc != null) {
        doc.add(new StringField("db_id", Tools.normalize(dbName + "_" + id, OCC_SEPARATOR), Store.YES));
    }

    return doc;
}

From source file:cn.hbu.cs.esearch.index.LuceneIndexDataLoader.java

License:Apache License

/**
 * @param events incoming events sorted by version number
 *               <br>every event in the events collection must be non-null
 *
 * @see cn.hbu.cs.esearch.consumer.DataConsumer#consume(java.util.Collection)
 */// w  w w .  j a  v  a  2 s.  c o  m
@Override
public void consume(Collection<DataEvent<EsearchIndexable>> events) throws EsearchException {

    if (events == null) {
        return;
    }
    int eventCount = events.size();
    if (eventCount == 0) {
        return;
    }
    BaseSearchIndex<R> idx = getSearchIndex();

    if (idx == null) {
        throw new EsearchException("trying to consume to null index");
    }
    Long2ObjectMap<List<EsearchIndexable.IndexingReq>> addList = new Long2ObjectOpenHashMap<List<EsearchIndexable.IndexingReq>>();
    String version = idx.getVersion(); // current version

    LongSet delSet = new LongOpenHashSet();

    try {
        for (DataEvent<EsearchIndexable> evt : events) {
            if (evt == null) {
                continue;
            }
            version = version == null ? evt.getVersion()
                    : (_versionComparator.compare(version, evt.getVersion()) < 0 ? evt.getVersion() : version);

            // interpret and get get the indexable instance
            EsearchIndexable indexable = evt.getData();
            if (indexable == null || indexable.isSkip()) {
                continue;
            }

            long uid = indexable.getUID();
            delSet.add(uid);
            addList.remove(uid);
            if (!(indexable.isDeleted() || evt.isDelete())) // update event
            {
                try {
                    EsearchIndexable.IndexingReq[] reqs = indexable.buildIndexingReqs();
                    for (EsearchIndexable.IndexingReq req : reqs) {
                        if (req != null) // if doc is provided, interpret as
                        // a delete, e.g. update with
                        // nothing
                        {
                            Document doc = req.getDocument();
                            if (doc != null) {
                                EsearchSegmentReader.fillDocumentID(doc, uid);
                                if (indexable.isStorable()) {
                                    byte[] bytes = indexable.getStoreValue();
                                    if (bytes != null) {
                                        doc.add(new StoredField(AbstractEsearchIndexable.DOCUMENT_STORE_FIELD,
                                                bytes));
                                    }
                                }
                            }
                            // add to the insert list
                            List<EsearchIndexable.IndexingReq> docList = addList.get(uid);
                            if (docList == null) {
                                docList = new LinkedList<EsearchIndexable.IndexingReq>();
                                addList.put(uid, docList);
                            }
                            docList.add(req);
                        }
                    }
                } catch (Exception ex) {
                    LOGGER.error("Couldn't index the event with uid - " + uid, ex);
                }
            }
        }

        List<EsearchIndexable.IndexingReq> docList = new ArrayList<EsearchIndexable.IndexingReq>(
                addList.size());
        for (List<EsearchIndexable.IndexingReq> tmpList : addList.values()) {
            docList.addAll(tmpList);
        }

        purgeDocuments();
        idx.updateIndex(delSet, docList, _analyzer, _similarity);
        propagateDeletes(delSet);
        synchronized (_idxMgr) {
            idx.refresh();
            commitPropagatedDeletes();
        }
    } catch (IOException ioe) {
        EsearchHealth.setFatal();
        LOGGER.error("Problem indexing batch: " + ioe.getMessage(), ioe);
    } finally {
        try {
            if (idx != null) {
                idx.setVersion(version);
                idx.incrementEventCount(eventCount);
            }
        } catch (Exception e) // catch all exceptions, or it would screw
        // up jobs framework
        {
            LOGGER.warn(e.getMessage());
        } finally {
            if (idx instanceof DiskSearchIndex<?>) {
                LOGGER.info("disk indexing requests flushed.");
            }
        }
    }
}

From source file:cn.hbu.cs.esearch.store.LuceneStore.java

License:Apache License

@Override
protected void persist(long uid, byte[] data) throws IOException {
    Document doc = new Document();
    doc.add(new StoredField(field, data));
    EsearchSegmentReader.fillDocumentID(doc, uid);
    indexWriter.addDocument(doc);/*from   www  .  ja va  2  s . com*/
}

From source file:com.b2international.index.lucene.FloatIndexField.java

License:Apache License

@Override
public void addTo(Document doc, Float value) {
    super.addTo(doc, value);
    if (Store.YES == isStored()) {
        doc.add(new StoredField(fieldName(), value));
    }//  w  w  w. j  a  v a  2 s. c o  m
}

From source file:com.b2international.index.lucene.IntIndexField.java

License:Apache License

@Override
public void addTo(Document doc, Integer value) {
    super.addTo(doc, value);
    if (Store.YES == isStored()) {
        doc.add(new StoredField(fieldName(), value));
    }/*from  www .jav  a  2  s.co m*/
}

From source file:com.b2international.index.lucene.LongIndexField.java

License:Apache License

@Override
public void addTo(Document doc, Long value) {
    super.addTo(doc, value);
    if (Store.YES == isStored()) {
        doc.add(new StoredField(fieldName(), value));
    }// ww w  . j  a  v a2s .c  o m
}

From source file:com.b2international.index.lucene.StoredOnlyIndexField.java

License:Apache License

@Override
public void addTo(Document doc, T value) {
    if (value instanceof String) {
        doc.add(new StoredField(fieldName(), (String) value));
    } else if (value instanceof Long) {
        doc.add(new StoredField(fieldName(), (Long) value));
    } else if (value instanceof Integer) {
        doc.add(new StoredField(fieldName(), (Integer) value));
    } else if (value instanceof Float) {
        doc.add(new StoredField(fieldName(), (Float) value));
    }/*from  w  w  w . j a  v a2s. co m*/
}

From source file:com.berico.clavin.index.IndexDirectoryBuilder.java

License:Apache License

/**
 * Builds a Lucene document to be added to the index based on a
 * specified name for the location and the corresponding
 * {@link GeoName} object./*w w  w  .j a  v  a  2 s.c  o  m*/
 * 
 * @param name         name to serve as index key
 * @param geonameEntry   string from GeoNames gazetteer
 * @param geonameID      unique identifier (for quick look-up)
 * @param population   number of inhabitants (used for scoring)
 * @return
 */
private static Document buildDoc(String name, String geonameEntry, int geonameID, Long population) {

    // in case you're wondering, yes, this is a non-standard use of
    // the Lucene Document construct
    Document doc = new Document();

    // this is essentially the key we'll try to match location
    // names against
    doc.add(new TextField("indexName", name, Field.Store.YES));

    // this is the payload we'll return when matching location
    // names to gazetteer records
    doc.add(new StoredField("geoname", geonameEntry));

    // TODO: use geonameID to link administrative subdivisions to
    //       each other
    doc.add(new IntField("geonameID", geonameID, Field.Store.YES));

    // we'll initially sort match results based on population
    doc.add(new LongField("population", population, Field.Store.YES));

    logger.debug("Adding to index: " + name);

    return doc;
}

From source file:com.bericotech.clavin.index.IndexDirectoryBuilder.java

License:Apache License

/**
 * Builds a set of Lucene documents for the provided GeoName, indexing
 * each using all available names and storing the entire ancestry path
 * for each GeoName in the index.  See {@link IndexField} for descriptions
 * of the fields indexed for each document.
 *
 * @param geoName       the GeoName to index
 * @throws IOException  if an error occurs while indexing
 *//*  w  w  w.  j  ava 2 s  . co m*/
private void indexGeoName(final GeoName geoName) throws IOException {
    indexCount++;
    // find all unique names for this GeoName
    String nm = geoName.getName();
    String asciiNm = geoName.getAsciiName();
    Set<String> names = new HashSet<String>();
    names.add(nm);
    names.add(asciiNm);
    names.addAll(geoName.getAlternateNames());
    // if this is a top-level administrative division, add its primary and alternate country codes
    // if they are not already found in the name or alternate names
    if (geoName.isTopLevelAdminDivision()) {
        if (geoName.getPrimaryCountryCode() != null) {
            names.add(geoName.getPrimaryCountryCode().name());
        }
        for (CountryCode cc : geoName.getAlternateCountryCodes()) {
            names.add(cc.name());
        }
    }
    AlternateName preferredName = alternateNameMap.get(geoName.getGeonameID());
    // ensure preferred name is found in alternate names
    if (preferredName != null) {
        names.add(preferredName.name);
    }
    names.remove(null);
    names.remove("");

    // reuse a single Document and field instances
    Document doc = new Document();
    doc.add(new StoredField(GEONAME.key(),
            fullAncestry ? geoName.getGazetteerRecordWithAncestry() : geoName.getGazetteerRecord()));
    doc.add(new IntField(GEONAME_ID.key(), geoName.getGeonameID(), Field.Store.YES));
    // if the alternate names file was loaded and we found a preferred name for this GeoName, store it
    if (preferredName != null) {
        doc.add(new StoredField(PREFERRED_NAME.key(), preferredName.name));
    }
    // index the direct parent ID in the PARENT_ID field
    GeoName parent = geoName.getParent();
    if (parent != null) {
        doc.add(new IntField(PARENT_ID.key(), parent.getGeonameID(), Field.Store.YES));
    }
    // index all ancestor IDs in the ANCESTOR_IDS field; this is a secondary field
    // so it can be used to restrict searches and PARENT_ID can be used for ancestor
    // resolution
    while (parent != null) {
        doc.add(new IntField(ANCESTOR_IDS.key(), parent.getGeonameID(), Field.Store.YES));
        parent = parent.getParent();
    }
    doc.add(new LongField(POPULATION.key(), geoName.getPopulation(), Field.Store.YES));
    // set up sort field based on population and geographic feature type
    if (geoName.getFeatureClass().equals(FeatureClass.P) || geoName.getFeatureCode().name().startsWith("PCL")) {
        if (geoName.getGeonameID() != 2643741) // todo: temporary hack until GeoNames.org fixes the population for City of London
            // boost cities and countries when sorting results by population
            doc.add(new LongField(SORT_POP.key(), geoName.getPopulation() * 11, Field.Store.YES));
    } else {
        // don't boost anything else, because people rarely talk about other stuff
        // (e.g., Washington State's population is more than 10x that of Washington, DC
        // but Washington, DC is mentioned far more frequently than Washington State)
        doc.add(new LongField(SORT_POP.key(), geoName.getPopulation(), Field.Store.YES));
    }
    doc.add(new IntField(HISTORICAL.key(),
            IndexField.getBooleanIndexValue(geoName.getFeatureCode().isHistorical()), Field.Store.NO));
    doc.add(new StringField(FEATURE_CODE.key(), geoName.getFeatureCode().name(), Field.Store.NO));

    // create a unique Document for each name of this GeoName
    TextField nameField = new TextField(INDEX_NAME.key(), "", Field.Store.YES);
    doc.add(nameField);
    for (String name : names) {
        nameField.setStringValue(name);
        indexWriter.addDocument(doc);
    }
}