List of usage examples for org.apache.lucene.document StoredField StoredField
public StoredField(String name, double value)
From source file:BlockBuilding.MemoryBased.SchemaBased.AbstractSchemaBasedMethod.java
License:Open Source License
@Override protected void indexEntities(IndexWriter index, List<EntityProfile> entities) { System.out.println("Indexing " + entities.size() + " entities..."); try {//from w w w. j a va2s.com int counter = 0; for (EntityProfile profile : entities) { AbstractProfile aProfile = getAbstractProfile(profile); Document doc = new Document(); doc.add(new StoredField(DOC_ID, counter)); for (int keyId : blockingKeys) { for (String key : getBlockingKeys(keyId, aProfile)) { if (0 < key.trim().length()) { doc.add(new StringField(VALUE_LABEL, key.trim(), Field.Store.YES)); } } } index.addDocument(doc); counter++; } } catch (IOException ex) { ex.printStackTrace(); } }
From source file:br.bireme.ngrams.NGrams.java
private static Document createDocument(final Map<String, br.bireme.ngrams.Field> fields, final String[] flds) throws IOException { assert fields != null; assert flds != null; Document doc = new Document(); String dbName = null;/*from w w w .j a v a 2s.com*/ String id = null; final Set<String> names = new HashSet<>(); for (br.bireme.ngrams.Field fld : fields.values()) { final String content = flds[fld.pos]; final String fname = fld.name; if (fld instanceof IndexedNGramField) { if (names.contains(fname)) { doc = null; break; } final String ncontent = Tools.limitSize(Tools.normalize(content, OCC_SEPARATOR), MAX_NG_TEXT_SIZE) .trim(); doc.add(new TextField(fname, ncontent, Field.Store.YES)); doc.add(new StoredField(fname + NOT_NORMALIZED_FLD, content.trim())); } else if (fld instanceof DatabaseField) { if (names.contains(fname)) { doc = null; break; } dbName = Tools.limitSize(Tools.normalize(content, OCC_SEPARATOR), MAX_NG_TEXT_SIZE).trim(); doc.add(new StringField(fname, dbName, Field.Store.YES)); doc.add(new StoredField(fname + NOT_NORMALIZED_FLD, content.trim())); } else if (fld instanceof IdField) { if (names.contains(fname)) { doc = null; break; } id = Tools.limitSize(Tools.normalize(content, OCC_SEPARATOR), MAX_NG_TEXT_SIZE).trim(); doc.add(new StringField(fname, id, Field.Store.YES)); doc.add(new StoredField(fname + NOT_NORMALIZED_FLD, content.trim())); } else { final String ncontent = Tools.limitSize(Tools.normalize(content, OCC_SEPARATOR), MAX_NG_TEXT_SIZE) .trim(); doc.add(new StoredField(fname, ncontent)); doc.add(new StoredField(fname + NOT_NORMALIZED_FLD, content.trim())); } names.add(fname); } // Add field to avoid duplicated documents in the index if (dbName == null) { throw new IOException("dbName"); } if (id == null) { throw new IOException("id"); } if (doc != null) { doc.add(new StringField("db_id", Tools.normalize(dbName + "_" + id, OCC_SEPARATOR), Store.YES)); } return doc; }
From source file:cn.hbu.cs.esearch.index.LuceneIndexDataLoader.java
License:Apache License
/** * @param events incoming events sorted by version number * <br>every event in the events collection must be non-null * * @see cn.hbu.cs.esearch.consumer.DataConsumer#consume(java.util.Collection) */// w w w . j a v a 2 s. c o m @Override public void consume(Collection<DataEvent<EsearchIndexable>> events) throws EsearchException { if (events == null) { return; } int eventCount = events.size(); if (eventCount == 0) { return; } BaseSearchIndex<R> idx = getSearchIndex(); if (idx == null) { throw new EsearchException("trying to consume to null index"); } Long2ObjectMap<List<EsearchIndexable.IndexingReq>> addList = new Long2ObjectOpenHashMap<List<EsearchIndexable.IndexingReq>>(); String version = idx.getVersion(); // current version LongSet delSet = new LongOpenHashSet(); try { for (DataEvent<EsearchIndexable> evt : events) { if (evt == null) { continue; } version = version == null ? evt.getVersion() : (_versionComparator.compare(version, evt.getVersion()) < 0 ? evt.getVersion() : version); // interpret and get get the indexable instance EsearchIndexable indexable = evt.getData(); if (indexable == null || indexable.isSkip()) { continue; } long uid = indexable.getUID(); delSet.add(uid); addList.remove(uid); if (!(indexable.isDeleted() || evt.isDelete())) // update event { try { EsearchIndexable.IndexingReq[] reqs = indexable.buildIndexingReqs(); for (EsearchIndexable.IndexingReq req : reqs) { if (req != null) // if doc is provided, interpret as // a delete, e.g. update with // nothing { Document doc = req.getDocument(); if (doc != null) { EsearchSegmentReader.fillDocumentID(doc, uid); if (indexable.isStorable()) { byte[] bytes = indexable.getStoreValue(); if (bytes != null) { doc.add(new StoredField(AbstractEsearchIndexable.DOCUMENT_STORE_FIELD, bytes)); } } } // add to the insert list List<EsearchIndexable.IndexingReq> docList = addList.get(uid); if (docList == null) { docList = new LinkedList<EsearchIndexable.IndexingReq>(); addList.put(uid, docList); } docList.add(req); } } } catch (Exception ex) { LOGGER.error("Couldn't index the event with uid - " + uid, ex); } } } List<EsearchIndexable.IndexingReq> docList = new ArrayList<EsearchIndexable.IndexingReq>( addList.size()); for (List<EsearchIndexable.IndexingReq> tmpList : addList.values()) { docList.addAll(tmpList); } purgeDocuments(); idx.updateIndex(delSet, docList, _analyzer, _similarity); propagateDeletes(delSet); synchronized (_idxMgr) { idx.refresh(); commitPropagatedDeletes(); } } catch (IOException ioe) { EsearchHealth.setFatal(); LOGGER.error("Problem indexing batch: " + ioe.getMessage(), ioe); } finally { try { if (idx != null) { idx.setVersion(version); idx.incrementEventCount(eventCount); } } catch (Exception e) // catch all exceptions, or it would screw // up jobs framework { LOGGER.warn(e.getMessage()); } finally { if (idx instanceof DiskSearchIndex<?>) { LOGGER.info("disk indexing requests flushed."); } } } }
From source file:cn.hbu.cs.esearch.store.LuceneStore.java
License:Apache License
@Override protected void persist(long uid, byte[] data) throws IOException { Document doc = new Document(); doc.add(new StoredField(field, data)); EsearchSegmentReader.fillDocumentID(doc, uid); indexWriter.addDocument(doc);/*from www . ja va 2 s . com*/ }
From source file:com.b2international.index.lucene.FloatIndexField.java
License:Apache License
@Override public void addTo(Document doc, Float value) { super.addTo(doc, value); if (Store.YES == isStored()) { doc.add(new StoredField(fieldName(), value)); }// w w w. j a v a 2 s. c o m }
From source file:com.b2international.index.lucene.IntIndexField.java
License:Apache License
@Override public void addTo(Document doc, Integer value) { super.addTo(doc, value); if (Store.YES == isStored()) { doc.add(new StoredField(fieldName(), value)); }/*from www .jav a 2 s.co m*/ }
From source file:com.b2international.index.lucene.LongIndexField.java
License:Apache License
@Override public void addTo(Document doc, Long value) { super.addTo(doc, value); if (Store.YES == isStored()) { doc.add(new StoredField(fieldName(), value)); }// ww w . j a v a2s .c o m }
From source file:com.b2international.index.lucene.StoredOnlyIndexField.java
License:Apache License
@Override public void addTo(Document doc, T value) { if (value instanceof String) { doc.add(new StoredField(fieldName(), (String) value)); } else if (value instanceof Long) { doc.add(new StoredField(fieldName(), (Long) value)); } else if (value instanceof Integer) { doc.add(new StoredField(fieldName(), (Integer) value)); } else if (value instanceof Float) { doc.add(new StoredField(fieldName(), (Float) value)); }/*from w w w . j a v a2s. co m*/ }
From source file:com.berico.clavin.index.IndexDirectoryBuilder.java
License:Apache License
/** * Builds a Lucene document to be added to the index based on a * specified name for the location and the corresponding * {@link GeoName} object./*w w w .j a v a 2 s.c o m*/ * * @param name name to serve as index key * @param geonameEntry string from GeoNames gazetteer * @param geonameID unique identifier (for quick look-up) * @param population number of inhabitants (used for scoring) * @return */ private static Document buildDoc(String name, String geonameEntry, int geonameID, Long population) { // in case you're wondering, yes, this is a non-standard use of // the Lucene Document construct Document doc = new Document(); // this is essentially the key we'll try to match location // names against doc.add(new TextField("indexName", name, Field.Store.YES)); // this is the payload we'll return when matching location // names to gazetteer records doc.add(new StoredField("geoname", geonameEntry)); // TODO: use geonameID to link administrative subdivisions to // each other doc.add(new IntField("geonameID", geonameID, Field.Store.YES)); // we'll initially sort match results based on population doc.add(new LongField("population", population, Field.Store.YES)); logger.debug("Adding to index: " + name); return doc; }
From source file:com.bericotech.clavin.index.IndexDirectoryBuilder.java
License:Apache License
/** * Builds a set of Lucene documents for the provided GeoName, indexing * each using all available names and storing the entire ancestry path * for each GeoName in the index. See {@link IndexField} for descriptions * of the fields indexed for each document. * * @param geoName the GeoName to index * @throws IOException if an error occurs while indexing *//* w w w. j ava 2 s . co m*/ private void indexGeoName(final GeoName geoName) throws IOException { indexCount++; // find all unique names for this GeoName String nm = geoName.getName(); String asciiNm = geoName.getAsciiName(); Set<String> names = new HashSet<String>(); names.add(nm); names.add(asciiNm); names.addAll(geoName.getAlternateNames()); // if this is a top-level administrative division, add its primary and alternate country codes // if they are not already found in the name or alternate names if (geoName.isTopLevelAdminDivision()) { if (geoName.getPrimaryCountryCode() != null) { names.add(geoName.getPrimaryCountryCode().name()); } for (CountryCode cc : geoName.getAlternateCountryCodes()) { names.add(cc.name()); } } AlternateName preferredName = alternateNameMap.get(geoName.getGeonameID()); // ensure preferred name is found in alternate names if (preferredName != null) { names.add(preferredName.name); } names.remove(null); names.remove(""); // reuse a single Document and field instances Document doc = new Document(); doc.add(new StoredField(GEONAME.key(), fullAncestry ? geoName.getGazetteerRecordWithAncestry() : geoName.getGazetteerRecord())); doc.add(new IntField(GEONAME_ID.key(), geoName.getGeonameID(), Field.Store.YES)); // if the alternate names file was loaded and we found a preferred name for this GeoName, store it if (preferredName != null) { doc.add(new StoredField(PREFERRED_NAME.key(), preferredName.name)); } // index the direct parent ID in the PARENT_ID field GeoName parent = geoName.getParent(); if (parent != null) { doc.add(new IntField(PARENT_ID.key(), parent.getGeonameID(), Field.Store.YES)); } // index all ancestor IDs in the ANCESTOR_IDS field; this is a secondary field // so it can be used to restrict searches and PARENT_ID can be used for ancestor // resolution while (parent != null) { doc.add(new IntField(ANCESTOR_IDS.key(), parent.getGeonameID(), Field.Store.YES)); parent = parent.getParent(); } doc.add(new LongField(POPULATION.key(), geoName.getPopulation(), Field.Store.YES)); // set up sort field based on population and geographic feature type if (geoName.getFeatureClass().equals(FeatureClass.P) || geoName.getFeatureCode().name().startsWith("PCL")) { if (geoName.getGeonameID() != 2643741) // todo: temporary hack until GeoNames.org fixes the population for City of London // boost cities and countries when sorting results by population doc.add(new LongField(SORT_POP.key(), geoName.getPopulation() * 11, Field.Store.YES)); } else { // don't boost anything else, because people rarely talk about other stuff // (e.g., Washington State's population is more than 10x that of Washington, DC // but Washington, DC is mentioned far more frequently than Washington State) doc.add(new LongField(SORT_POP.key(), geoName.getPopulation(), Field.Store.YES)); } doc.add(new IntField(HISTORICAL.key(), IndexField.getBooleanIndexValue(geoName.getFeatureCode().isHistorical()), Field.Store.NO)); doc.add(new StringField(FEATURE_CODE.key(), geoName.getFeatureCode().name(), Field.Store.NO)); // create a unique Document for each name of this GeoName TextField nameField = new TextField(INDEX_NAME.key(), "", Field.Store.YES); doc.add(nameField); for (String name : names) { nameField.setStringValue(name); indexWriter.addDocument(doc); } }