List of usage examples for org.apache.lucene.util BytesRef BytesRef
public BytesRef(CharSequence text)
From source file:com.twentyn.patentSearch.DocumentIndexer.java
License:Open Source License
public Document patentDocToLuceneDoc(File path, PatentDocument patentDoc) { // With help from https://lucene.apache.org/core/5_2_0/demo/src-html/org/apache/lucene/demo/IndexFiles.html Document doc = new Document(); doc.add(new StringField("file_name", path.getName(), Field.Store.YES)); doc.add(new StringField("id", patentDoc.getFileId(), Field.Store.YES)); doc.add(new StringField("grant_date", patentDoc.getGrantDate(), Field.Store.YES)); doc.add(new StringField("main_classification", patentDoc.getMainClassification(), Field.Store.YES)); doc.add(new TextField("title", patentDoc.getTitle(), Field.Store.YES)); doc.add(new TextField("claims", StringUtils.join("\n", patentDoc.getClaimsText()), Field.Store.NO)); doc.add(new TextField("description", StringUtils.join("\n", patentDoc.getTextContent()), Field.Store.NO)); // TODO: verify that these are searchable as expected. for (String cls : patentDoc.getFurtherClassifications()) { doc.add(new SortedSetDocValuesField("further_classification", new BytesRef(cls))); }//from w w w . ja va2 s . c o m for (String cls : patentDoc.getSearchedClassifications()) { doc.add(new SortedSetDocValuesField("searched_classification", new BytesRef(cls))); } return doc; }
From source file:com.vmware.dcp.services.common.LuceneDocumentIndexService.java
License:Open Source License
protected void handleDocumentPostOrDelete(Operation postOrDelete) throws Throwable { UpdateIndexRequest r = postOrDelete.getBody(UpdateIndexRequest.class); ServiceDocument s = r.document;//www .j av a2 s. c om ServiceDocumentDescription desc = r.description; if (postOrDelete.isRemote()) { postOrDelete.fail(new IllegalStateException("Remote requests not allowed")); return; } if (s == null) { postOrDelete.fail(new IllegalArgumentException("document is required")); return; } String link = s.documentSelfLink; if (link == null) { postOrDelete.fail(new IllegalArgumentException("documentSelfLink is required")); return; } if (s.documentUpdateAction == null) { postOrDelete.fail(new IllegalArgumentException("documentUpdateAction is required")); return; } if (desc == null) { postOrDelete.fail(new IllegalArgumentException("description is required")); return; } s.documentDescription = null; Document doc = new Document(); Field refererField = new StringField(LUCENE_FIELD_NAME_REFERER, postOrDelete.getReferer().toString(), Field.Store.NO); doc.add(refererField); Field updateActionField = new StoredField(ServiceDocument.FIELD_NAME_UPDATE_ACTION, s.documentUpdateAction); doc.add(updateActionField); addBinaryStateFieldToDocument(s, desc, doc); Field selfLinkField = new StringField(ServiceDocument.FIELD_NAME_SELF_LINK, link.intern(), Field.Store.YES); doc.add(selfLinkField); Field sortedSelfLinkField = new SortedDocValuesField(ServiceDocument.FIELD_NAME_SELF_LINK, new BytesRef(link.intern().toString())); doc.add(sortedSelfLinkField); if (s.documentKind != null) { Field kindField = new StringField(ServiceDocument.FIELD_NAME_KIND, s.documentKind, Field.Store.NO); doc.add(kindField); } if (s.documentAuthPrincipalLink != null) { Field principalField = new StringField(ServiceDocument.FIELD_NAME_AUTH_PRINCIPAL_LINK, s.documentAuthPrincipalLink, Field.Store.NO); doc.add(principalField); } if (s.documentTransactionId != null) { Field transactionField = new StringField(ServiceDocument.FIELD_NAME_TRANSACTION_ID, s.documentTransactionId, Field.Store.NO); doc.add(transactionField); } Field timestampField = new LongField(ServiceDocument.FIELD_NAME_UPDATE_TIME_MICROS, s.documentUpdateTimeMicros, this.longStoredField); doc.add(timestampField); if (s.documentExpirationTimeMicros > 0) { Field expirationTimeMicrosField = new LongField(ServiceDocument.FIELD_NAME_EXPIRATION_TIME_MICROS, s.documentExpirationTimeMicros, this.longStoredField); doc.add(expirationTimeMicrosField); } Field versionField = new LongField(ServiceDocument.FIELD_NAME_VERSION, s.documentVersion, this.longStoredField); doc.add(versionField); if (desc.propertyDescriptions == null || desc.propertyDescriptions.isEmpty()) { // no additional property type information, so we will add the // document with common fields indexed plus the full body addDocumentToIndex(postOrDelete, doc, s, desc); return; } addIndexableFieldsToDocument(doc, s, desc); addDocumentToIndex(postOrDelete, doc, s, desc); if (hasOption(ServiceOption.INSTRUMENTATION)) { int fieldCount = doc.getFields().size(); ServiceStat st = getStat(STAT_NAME_INDEXED_FIELD_COUNT); adjustStat(st, fieldCount); st = getHistogramStat(STAT_NAME_FIELD_COUNT_PER_DOCUMENT); setStat(st, fieldCount); } }
From source file:com.vmware.dcp.services.common.LuceneDocumentIndexService.java
License:Open Source License
/** * Add single indexable field to the Lucene {@link Document}. * This function recurses if the field value is a PODO, map, array, or collection. *//*from w ww . j a v a 2 s. c o m*/ private void addIndexableFieldToDocument(Document doc, Object podo, PropertyDescription pd, String fieldName) { Field luceneField = null; Field luceneDocValuesField = null; Field.Store fsv = Field.Store.NO; boolean isSorted = false; Object v = podo; if (v == null) { return; } if (pd.indexingOptions.contains(PropertyIndexingOption.SORT)) { isSorted = true; } EnumSet<PropertyIndexingOption> opts = pd.indexingOptions; if (opts != null && opts.contains(PropertyIndexingOption.STORE_ONLY)) { return; } boolean expandField = opts != null && opts.contains(PropertyIndexingOption.EXPAND); if (v instanceof String) { if (opts != null && opts.contains(PropertyIndexingOption.TEXT)) { luceneField = new TextField(fieldName, v.toString(), fsv); } else { luceneField = new StringField(fieldName, v.toString(), fsv); } if (isSorted) { luceneDocValuesField = new SortedDocValuesField(fieldName, new BytesRef(v.toString())); } } else if (v instanceof URI) { String uriValue = QuerySpecification.toMatchValue((URI) v); luceneField = new StringField(fieldName, uriValue, fsv); if (isSorted) { luceneDocValuesField = new SortedDocValuesField(fieldName, new BytesRef(v.toString())); } } else if (pd.typeName.equals(TypeName.ENUM)) { String enumValue = QuerySpecification.toMatchValue((Enum<?>) v); luceneField = new StringField(fieldName, enumValue, fsv); if (isSorted) { luceneDocValuesField = new SortedDocValuesField(fieldName, new BytesRef(v.toString())); } } else if (pd.typeName.equals(TypeName.LONG)) { if (v instanceof Integer) { int i = (int) v; v = i * 1L; } luceneField = new LongField(fieldName, (long) v, fsv == Store.NO ? this.longUnStoredField : this.longStoredField); } else if (pd.typeName.equals(TypeName.DATE)) { // Index as microseconds since UNIX epoch Date dt = (Date) v; luceneField = new LongField(fieldName, dt.getTime() * 1000, fsv == Store.NO ? this.longUnStoredField : this.longStoredField); } else if (pd.typeName.equals(TypeName.DOUBLE)) { luceneField = new DoubleField(fieldName, (double) v, fsv == Store.NO ? this.doubleUnStoredField : this.doubleStoredField); } else if (pd.typeName.equals(TypeName.BOOLEAN)) { String booleanValue = QuerySpecification.toMatchValue((boolean) v); luceneField = new StringField(fieldName, booleanValue, fsv); if (isSorted) { luceneDocValuesField = new SortedDocValuesField(fieldName, new BytesRef((booleanValue))); } } else if (pd.typeName.equals(TypeName.BYTES)) { // Don't store bytes in the index } else if (pd.typeName.equals(TypeName.PODO)) { // Ignore all complex fields if they are not explicitly marked with EXPAND. // We special case all fields of TaskState to ensure task based services have // a guaranteed minimum level indexing and query behavior if (!(v instanceof TaskState) && !expandField) { return; } addObjectIndexableFieldToDocument(doc, v, pd, fieldName); return; } else if (expandField && pd.typeName.equals(TypeName.MAP)) { addMapIndexableFieldToDocument(doc, v, pd, fieldName); return; } else if (expandField && (pd.typeName.equals(TypeName.COLLECTION) || pd.typeName.equals(TypeName.ARRAY))) { addCollectionIndexableFieldToDocument(doc, v, pd, fieldName); return; } else { luceneField = new StringField(fieldName, v.toString(), fsv); if (isSorted) { luceneDocValuesField = new SortedDocValuesField(fieldName, new BytesRef(v.toString())); } } if (luceneField != null) { doc.add(luceneField); } if (luceneDocValuesField != null) { doc.add(luceneDocValuesField); } }
From source file:com.vmware.demo.sgf.lucene.impl.LuceneGemFireRepositoryImpl.java
License:Apache License
@Override public Object save(Object entity) { if (!(entity instanceof SearchableGemFireEntity)) throw new IllegalArgumentException( "Can only accept SearchableGemFireEntity data type for this repository."); SearchableGemFireEntity gfEntity = (SearchableGemFireEntity) entity; if (searchableFields == null) setupAttributeReflection(gfEntity.getValue()); try {// w w w.ja va 2 s .c o m Document doc = new Document(); // Add reflected fields to document iff search value is not null for (java.lang.reflect.Field f : searchableFields) { String fieldName = f.getName().intern(); String searchText = (String) f.get(gfEntity.getValue()); if (searchText != null) doc.add(new Field(fieldName, searchText, TextField.TYPE_STORED)); } doc.add(new Field(savedField, savedFieldValue, TextField.TYPE_STORED)); doc.add(new StoredField(GEMFIRE_KEY, new BytesRef(ObjectSerializer.serialize(gfEntity.getKey())))); indexWriter.addDocument(doc); searchManager.maybeRefresh(); indexWriter.commit(); } catch (IOException e) { gfEntity = null; e.printStackTrace(); } catch (Exception e) { gfEntity = null; e.printStackTrace(); } return gfEntity; }
From source file:com.vmware.demo.sgf.lucene.impl.LuceneGemFireRepositoryImpl.java
License:Apache License
@Override public Iterable save(Iterable entities) { boolean failed = false; try {/*w w w. j a v a2 s. co m*/ for (Object entity : entities) { if (!(entity instanceof SearchableGemFireEntity)) throw new IllegalArgumentException( "Can only accept SearchableGemFireEntity data type for this repository."); SearchableGemFireEntity gfEntity = (SearchableGemFireEntity) entity; if (searchableFields == null) setupAttributeReflection(gfEntity.getValue()); Document doc = new Document(); // Add reflected fields to document iff search value is not null for (java.lang.reflect.Field f : searchableFields) { String fieldName = f.getName().intern(); String searchText = (String) f.get(gfEntity.getValue()); if (searchText != null) doc.add(new Field(fieldName, searchText, TextField.TYPE_STORED)); } doc.add(new Field(savedField, savedFieldValue, TextField.TYPE_STORED)); doc.add(new StoredField(GEMFIRE_KEY, new BytesRef(ObjectSerializer.serialize(gfEntity.getKey())))); // StraightBytesDocValuesField indexWriter.addDocument(doc); } searchManager.maybeRefresh(); indexWriter.commit(); } catch (IOException e) { e.printStackTrace(); try { indexWriter.rollback(); } catch (IOException e1) { e1.printStackTrace(); } failed = true; } catch (Exception e) { e.printStackTrace(); try { indexWriter.rollback(); } catch (IOException e1) { e1.printStackTrace(); } failed = true; } return (failed) ? null : entities; }
From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java
License:Open Source License
protected void updateIndex(Operation updateOp) throws Throwable { UpdateIndexRequest r = updateOp.getBody(UpdateIndexRequest.class); ServiceDocument s = r.document;//from w w w .j av a 2s.c o m ServiceDocumentDescription desc = r.description; if (updateOp.isRemote()) { updateOp.fail(new IllegalStateException("Remote requests not allowed")); return; } if (s == null) { updateOp.fail(new IllegalArgumentException("document is required")); return; } String link = s.documentSelfLink; if (link == null) { updateOp.fail(new IllegalArgumentException("documentSelfLink is required")); return; } if (s.documentUpdateAction == null) { updateOp.fail(new IllegalArgumentException("documentUpdateAction is required")); return; } if (desc == null) { updateOp.fail(new IllegalArgumentException("description is required")); return; } s.documentDescription = null; Document doc = new Document(); Field updateActionField = new StoredField(ServiceDocument.FIELD_NAME_UPDATE_ACTION, s.documentUpdateAction); doc.add(updateActionField); addBinaryStateFieldToDocument(s, r.serializedDocument, desc, doc); Field selfLinkField = new StringField(ServiceDocument.FIELD_NAME_SELF_LINK, link, Field.Store.YES); doc.add(selfLinkField); Field sortedSelfLinkField = new SortedDocValuesField(ServiceDocument.FIELD_NAME_SELF_LINK, new BytesRef(link)); doc.add(sortedSelfLinkField); String kind = s.documentKind; if (kind != null) { Field kindField = new StringField(ServiceDocument.FIELD_NAME_KIND, kind, Field.Store.NO); doc.add(kindField); } if (s.documentAuthPrincipalLink != null) { Field principalField = new StringField(ServiceDocument.FIELD_NAME_AUTH_PRINCIPAL_LINK, s.documentAuthPrincipalLink, Field.Store.NO); doc.add(principalField); } if (s.documentTransactionId != null) { Field transactionField = new StringField(ServiceDocument.FIELD_NAME_TRANSACTION_ID, s.documentTransactionId, Field.Store.NO); doc.add(transactionField); } addNumericField(doc, ServiceDocument.FIELD_NAME_UPDATE_TIME_MICROS, s.documentUpdateTimeMicros, true); if (s.documentExpirationTimeMicros > 0) { addNumericField(doc, ServiceDocument.FIELD_NAME_EXPIRATION_TIME_MICROS, s.documentExpirationTimeMicros, true); } addNumericField(doc, ServiceDocument.FIELD_NAME_VERSION, s.documentVersion, true); if (desc.propertyDescriptions == null || desc.propertyDescriptions.isEmpty()) { // no additional property type information, so we will add the // document with common fields indexed plus the full body addDocumentToIndex(updateOp, doc, s, desc); return; } addIndexableFieldsToDocument(doc, s, desc); addDocumentToIndex(updateOp, doc, s, desc); if (hasOption(ServiceOption.INSTRUMENTATION)) { int fieldCount = doc.getFields().size(); ServiceStat st = getStat(STAT_NAME_INDEXED_FIELD_COUNT); adjustStat(st, fieldCount); st = getHistogramStat(STAT_NAME_FIELD_COUNT_PER_DOCUMENT); setStat(st, fieldCount); } }
From source file:com.vmware.xenon.services.common.LuceneDocumentIndexService.java
License:Open Source License
/** * Add single indexable field to the Lucene {@link Document}. * This function recurses if the field value is a PODO, map, array, or collection. */// ww w . jav a 2 s .c om private void addIndexableFieldToDocument(Document doc, Object podo, PropertyDescription pd, String fieldName) { Field luceneField = null; Field luceneDocValuesField = null; Field.Store fsv = Field.Store.NO; boolean isSorted = false; boolean expandField = false; Object v = podo; if (v == null) { return; } EnumSet<PropertyIndexingOption> opts = pd.indexingOptions; if (opts != null) { if (opts.contains(PropertyIndexingOption.STORE_ONLY)) { return; } if (opts.contains(PropertyIndexingOption.SORT)) { isSorted = true; } if (opts.contains(PropertyIndexingOption.EXPAND)) { expandField = true; } } if (pd.usageOptions != null) { if (pd.usageOptions.contains(PropertyUsageOption.LINK)) { fsv = Field.Store.YES; } if (pd.usageOptions.contains(PropertyUsageOption.LINKS)) { expandField = true; } } boolean isStored = fsv == Field.Store.YES; if (v instanceof String) { String stringValue = v.toString(); if (opts == null) { luceneField = new StringField(fieldName, stringValue, fsv); } else { if (opts.contains(PropertyIndexingOption.CASE_INSENSITIVE)) { stringValue = stringValue.toLowerCase(); } if (opts.contains(PropertyIndexingOption.TEXT)) { luceneField = new TextField(fieldName, stringValue, fsv); } else { luceneField = new StringField(fieldName, stringValue, fsv); } } if (isSorted) { luceneDocValuesField = new SortedDocValuesField(fieldName, new BytesRef(stringValue)); } } else if (v instanceof URI) { String uriValue = QuerySpecification.toMatchValue((URI) v); luceneField = new StringField(fieldName, uriValue, fsv); if (isSorted) { luceneDocValuesField = new SortedDocValuesField(fieldName, new BytesRef(v.toString())); } } else if (pd.typeName.equals(TypeName.ENUM)) { String enumValue = QuerySpecification.toMatchValue((Enum<?>) v); luceneField = new StringField(fieldName, enumValue, fsv); if (isSorted) { luceneDocValuesField = new SortedDocValuesField(fieldName, new BytesRef(v.toString())); } } else if (pd.typeName.equals(TypeName.LONG)) { long value = ((Number) v).longValue(); addNumericField(doc, fieldName, value, isStored); } else if (pd.typeName.equals(TypeName.DATE)) { // Index as microseconds since UNIX epoch long value = ((Date) v).getTime() * 1000; addNumericField(doc, fieldName, value, isStored); } else if (pd.typeName.equals(TypeName.DOUBLE)) { double value = ((Number) v).doubleValue(); addNumericField(doc, fieldName, value, isStored); } else if (pd.typeName.equals(TypeName.BOOLEAN)) { String booleanValue = QuerySpecification.toMatchValue((boolean) v); luceneField = new StringField(fieldName, booleanValue, fsv); if (isSorted) { luceneDocValuesField = new SortedDocValuesField(fieldName, new BytesRef((booleanValue))); } } else if (pd.typeName.equals(TypeName.BYTES)) { // Don't store bytes in the index } else if (pd.typeName.equals(TypeName.PODO)) { // Ignore all complex fields if they are not explicitly marked with EXPAND. // We special case all fields of TaskState to ensure task based services have // a guaranteed minimum level indexing and query behavior if (!(v instanceof TaskState) && !expandField) { return; } addObjectIndexableFieldToDocument(doc, v, pd, fieldName); return; } else if (expandField && pd.typeName.equals(TypeName.MAP)) { addMapIndexableFieldToDocument(doc, v, pd, fieldName); return; } else if (expandField && (pd.typeName.equals(TypeName.COLLECTION))) { addCollectionIndexableFieldToDocument(doc, v, pd, fieldName); return; } else { luceneField = new StringField(fieldName, v.toString(), fsv); if (isSorted) { luceneDocValuesField = new SortedDocValuesField(fieldName, new BytesRef(v.toString())); } } if (luceneField != null) { doc.add(luceneField); } if (luceneDocValuesField != null) { doc.add(luceneDocValuesField); } }
From source file:com.vmware.xenon.services.common.LuceneIndexDocumentHelper.java
License:Open Source License
void addSelfLinkField(String selfLink) { StringFieldContext ctx = this.selfLinkField; ctx.stringField.setStringValue(selfLink); ctx.sortedField.setBytesValue(new BytesRef(selfLink)); this.doc.add(ctx.stringField); this.doc.add(ctx.sortedField); }
From source file:com.vmware.xenon.services.common.LuceneIndexDocumentHelper.java
License:Open Source License
private Field getAndSetSortedStoredField(String name, String value) { Field f = this.sortedStringFields.computeIfAbsent(name, (k) -> { return new SortedDocValuesField(name, new BytesRef(value)); });/*from ww w. j ava 2 s . c om*/ f.setBytesValue(new BytesRef(value)); return f; }
From source file:com.xiaomi.linden.core.search.LindenDocParser.java
License:Apache License
public static Document parse(LindenDocument lindenDoc, LindenConfig config) { if (!lindenDoc.isSetFields()) { return null; }/*from w ww . ja va 2 s. c o m*/ Document doc = new Document(); doc.add(new StringField(config.getSchema().getId(), lindenDoc.getId(), Field.Store.YES)); for (LindenField field : lindenDoc.getFields()) { LindenFieldSchema schema = field.getSchema(); Field.Store isStored = schema.isStored() ? Field.Store.YES : Field.Store.NO; String name = field.getSchema().getName(); Object value; if (!schema.isIndexed() && schema.isStored()) { doc.add(new Field(name, field.getValue(), STORED_ONLY)); } switch (schema.getType()) { case INTEGER: value = Integer.valueOf(field.getValue()); if (schema.isIndexed()) { doc.add(new IntField(name, (Integer) value, isStored)); } if (schema.isDocValues()) { long docValuesBits = ((Integer) value).longValue(); doc.add(new NumericDocValuesField(name, docValuesBits)); } break; case LONG: value = Long.valueOf(field.getValue()); if (schema.isIndexed()) { doc.add(new LongField(name, (Long) value, isStored)); } if (schema.isDocValues()) { doc.add(new NumericDocValuesField(name, (long) value)); } break; case DOUBLE: value = Double.valueOf(field.getValue()); if (schema.isIndexed()) { doc.add(new DoubleField(name, (Double) value, isStored)); } if (schema.isDocValues()) { long docValuesBits = Double.doubleToLongBits((Double) value); doc.add(new NumericDocValuesField(name, docValuesBits)); } break; case FLOAT: value = Float.valueOf(field.getValue()); if (schema.isIndexed()) { doc.add(new FloatField(name, (Float) value, isStored)); } if (schema.isDocValues()) { long docValuesBits = Float.floatToIntBits((Float) value); doc.add(new NumericDocValuesField(name, docValuesBits)); } break; case STRING: if (Strings.isNullOrEmpty(field.getValue())) { break; } if (schema.isIndexed()) { FieldType type = new FieldType(); type.setTokenized(schema.isTokenized()); type.setIndexed(schema.isIndexed()); type.setStored(schema.isStored()); type.setOmitNorms(schema.isOmitNorms()); if (schema.isSnippet()) { type.setIndexOptions(FieldInfo.IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); // snippet will use the stored info. type.setStored(true); } if (schema.isOmitFreqs()) { type.setIndexOptions(FieldInfo.IndexOptions.DOCS_ONLY); } doc.add(new Field(name, field.getValue(), type)); } if (schema.isDocValues()) { BytesRef bytes = new BytesRef(field.getValue()); doc.add(new BinaryDocValuesField(name, bytes)); } break; case FACET: String[] facetPath = field.getValue().split("/"); doc.add(new FacetField(name, facetPath)); if (schema.isIndexed()) { doc.add(new StringField(name, field.getValue(), isStored)); } if (schema.isDocValues()) { doc.add(new BinaryDocValuesField(name, new BytesRef(field.getValue()))); } break; default: } } if (lindenDoc.isSetCoordinate()) { Coordinate coord = lindenDoc.getCoordinate(); Shape shape = SpatialContext.GEO.makePoint(coord.getLongitude(), coord.getLatitude()); for (IndexableField field : config.getSpatialStrategy().createIndexableFields(shape)) { doc.add(field); } } return doc; }