List of usage examples for org.apache.lucene.index IndexOptions DOCS
IndexOptions DOCS
To view the source code for org.apache.lucene.index IndexOptions DOCS.
Click Source Link
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
private void addCommonDocumentFields(Document document, FeatureIndexEntry entry, final Long featureFileId) { document.add(new SortedStringField(FeatureIndexFields.FEATURE_ID.getFieldName(), entry.getFeatureId())); FieldType fieldType = new FieldType(); fieldType.setOmitNorms(true);/*from w w w . ja va2 s. co m*/ fieldType.setIndexOptions(IndexOptions.DOCS); fieldType.setStored(true); fieldType.setTokenized(false); fieldType.setDocValuesType(DocValuesType.SORTED); fieldType.freeze(); Field field = new Field(FeatureIndexFields.CHROMOSOME_ID.getFieldName(), entry.getChromosome() != null ? new BytesRef(entry.getChromosome().getId().toString()) : new BytesRef(""), fieldType); document.add(field); document.add(new SortedStringField(FeatureIndexFields.CHROMOSOME_NAME.getFieldName(), entry.getChromosome().getName(), true)); document.add(new SortedIntPoint(FeatureIndexFields.START_INDEX.getFieldName(), entry.getStartIndex())); document.add(new StoredField(FeatureIndexFields.START_INDEX.getFieldName(), entry.getStartIndex())); document.add(new SortedDocValuesField(FeatureIndexFields.START_INDEX.getGroupName(), new BytesRef(entry.getStartIndex().toString()))); document.add(new SortedIntPoint(FeatureIndexFields.END_INDEX.getFieldName(), entry.getEndIndex())); document.add(new StoredField(FeatureIndexFields.END_INDEX.getFieldName(), entry.getEndIndex())); document.add(new SortedDocValuesField(FeatureIndexFields.END_INDEX.getGroupName(), new BytesRef(entry.getStartIndex().toString()))); document.add(new StringField(FeatureIndexFields.FEATURE_TYPE.getFieldName(), entry.getFeatureType() != null ? entry.getFeatureType().getFileValue() : "", Field.Store.YES)); document.add(new StringField(FeatureIndexFields.FILE_ID.getFieldName(), featureFileId.toString(), Field.Store.YES)); document.add(new StringField(FeatureIndexFields.FEATURE_NAME.getFieldName(), entry.getFeatureName() != null ? entry.getFeatureName().toLowerCase() : "", Field.Store.YES)); document.add(new SortedDocValuesField(FeatureIndexFields.FEATURE_NAME.getFieldName(), new BytesRef(entry.getFeatureName() != null ? entry.getFeatureName() : ""))); document.add(new SortedSetDocValuesFacetField(FeatureIndexFields.CHR_ID.getFieldName(), entry.getChromosome().getId().toString())); document.add(new SortedStringField(FeatureIndexFields.UID.getFieldName(), entry.getUuid().toString())); document.add(new SortedSetDocValuesFacetField(FeatureIndexFields.F_UID.getFieldName(), entry.getUuid().toString())); }
From source file:com.orientechnologies.spatial.engine.OLuceneSpatialIndexEngineAbstract.java
License:Apache License
protected Document newGeoDocument(OIdentifiable oIdentifiable, Shape shape) { FieldType ft = new FieldType(); ft.setIndexOptions(IndexOptions.DOCS); ft.setStored(true);/*from w w w . j av a 2 s. co m*/ Document doc = new Document(); doc.add(OLuceneIndexType.createField(RID, oIdentifiable.getIdentity().toString(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS)); for (IndexableField f : strategy.createIndexableFields(shape)) { doc.add(f); } doc.add(new StoredField(strategy.getFieldName(), ctx.toString(shape))); return doc; }
From source file:com.ponysdk.sample.client.page.addon.SelectizeAddon.java
License:Apache License
private void addTenor(final IndexWriter writer) throws IOException { final String[] tenors = new String[] { "Y1", "Y2", "Y3", "Y4", "Y5", "Y6", "Y7", "Y8" }; for (final String tenor : tenors) { final Document doc = new Document(); final FieldType fieldType1 = new FieldType(); fieldType1.setIndexOptions(IndexOptions.NONE); fieldType1.setStored(true);/*w w w .j av a 2 s . c o m*/ fieldType1.setTokenized(false); doc.add(new Field("id", tenor, fieldType1)); final FieldType fieldType2 = new FieldType(); fieldType2.setIndexOptions(IndexOptions.DOCS); fieldType2.setStored(true); fieldType2.setTokenized(false); doc.add(new Field("fieldname", tenor, fieldType2)); doc.add(new Field("desc", "tenor", TextField.TYPE_STORED)); doc.add(new Field("type", Type.TENOR.name(), TextField.TYPE_STORED)); writer.addDocument(doc); } }
From source file:com.rocana.lucene.codec.v1.RocanaBlockTreeTermsReader.java
License:Apache License
/** Sole constructor. */ public RocanaBlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state) throws IOException { boolean success = false; IndexInput indexIn = null;// ww w . java2 s .c o m this.postingsReader = postingsReader; this.segment = state.segmentInfo.name; String termsName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_EXTENSION); try { termsIn = state.directory.openInput(termsName, state.context); version = CodecUtil.checkIndexHeader(termsIn, TERMS_CODEC_NAME, VERSION_START, VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); if (version < VERSION_AUTO_PREFIX_TERMS) { // Old (pre-5.2.0) index, no auto-prefix terms: this.anyAutoPrefixTerms = false; } else if (version == VERSION_AUTO_PREFIX_TERMS) { // 5.2.x index, might have auto-prefix terms: this.anyAutoPrefixTerms = true; } else { // 5.3.x index, we record up front if we may have written any auto-prefix terms: assert version >= VERSION_AUTO_PREFIX_TERMS_COND; byte b = termsIn.readByte(); if (b == 0) { this.anyAutoPrefixTerms = false; } else if (b == 1) { this.anyAutoPrefixTerms = true; } else { throw new CorruptIndexException("invalid anyAutoPrefixTerms: expected 0 or 1 but got " + b, termsIn); } } String indexName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION); indexIn = state.directory.openInput(indexName, state.context); CodecUtil.checkIndexHeader(indexIn, TERMS_INDEX_CODEC_NAME, version, version, state.segmentInfo.getId(), state.segmentSuffix); // IMPORTANT: comment out this one line to prevent checksumming the entire file. // This is the reason we have a custom Lucene codec and forked Lucene classes. //CodecUtil.checksumEntireFile(indexIn); // Have PostingsReader init itself postingsReader.init(termsIn, state); // NOTE: data file is too costly to verify checksum against all the bytes on open, // but for now we at least verify proper structure of the checksum footer: which looks // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption // such as file truncation. CodecUtil.retrieveChecksum(termsIn); // Read per-field details seekDir(termsIn, dirOffset); seekDir(indexIn, indexDirOffset); final int numFields = termsIn.readVInt(); if (numFields < 0) { throw new CorruptIndexException("invalid numFields: " + numFields, termsIn); } for (int i = 0; i < numFields; ++i) { final int field = termsIn.readVInt(); final long numTerms = termsIn.readVLong(); if (numTerms <= 0) { throw new CorruptIndexException("Illegal numTerms for field number: " + field, termsIn); } final int numBytes = termsIn.readVInt(); if (numBytes < 0) { throw new CorruptIndexException( "invalid rootCode for field number: " + field + ", numBytes=" + numBytes, termsIn); } final BytesRef rootCode = new BytesRef(new byte[numBytes]); termsIn.readBytes(rootCode.bytes, 0, numBytes); rootCode.length = numBytes; final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field); if (fieldInfo == null) { throw new CorruptIndexException("invalid field number: " + field, termsIn); } final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1 : termsIn.readVLong(); final long sumDocFreq = termsIn.readVLong(); final int docCount = termsIn.readVInt(); final int longsSize = termsIn.readVInt(); if (longsSize < 0) { throw new CorruptIndexException( "invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize, termsIn); } BytesRef minTerm = readBytesRef(termsIn); BytesRef maxTerm = readBytesRef(termsIn); if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs throw new CorruptIndexException( "invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.maxDoc(), termsIn); } if (sumDocFreq < docCount) { // #postings must be >= #docs with field throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount, termsIn); } if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings throw new CorruptIndexException( "invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq, termsIn); } final long indexStartFP = indexIn.readVLong(); RocanaFieldReader previous = fields.put(fieldInfo.name, new RocanaFieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq, docCount, indexStartFP, longsSize, indexIn, minTerm, maxTerm)); if (previous != null) { throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsIn); } } indexIn.close(); success = true; } finally { if (!success) { // this.close() will close in: IOUtils.closeWhileHandlingException(indexIn, this); } } }
From source file:com.rocana.lucene.codec.v1.RocanaIntersectTermsEnumFrame.java
License:Apache License
public void decodeMetaData() throws IOException { // lazily catch up on metadata decode: final int limit = getTermBlockOrd(); boolean absolute = metaDataUpto == 0; assert limit > 0; // TODO: better API would be "jump straight to term=N"??? while (metaDataUpto < limit) { // TODO: we could make "tiers" of metadata, ie, // decode docFreq/totalTF but don't decode postings // metadata; this way caller could get // docFreq/totalTF w/o paying decode cost for // postings // TODO: if docFreq were bulk decoded we could // just skipN here: // stats//from w w w . java 2 s . com termState.docFreq = statsReader.readVInt(); if (ite.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) { termState.totalTermFreq = termState.docFreq + statsReader.readVLong(); } // metadata for (int i = 0; i < ite.fr.longsSize; i++) { longs[i] = bytesReader.readVLong(); } ite.fr.parent.postingsReader.decodeTerm(longs, bytesReader, ite.fr.fieldInfo, termState, absolute); metaDataUpto++; absolute = false; } termState.termBlockOrd = metaDataUpto; }
From source file:com.rocana.lucene.codec.v1.RocanaSegmentTermsEnumFrame.java
License:Apache License
public void decodeMetaData() throws IOException { //if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + segment + " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd); // lazily catch up on metadata decode: final int limit = getTermBlockOrd(); boolean absolute = metaDataUpto == 0; assert limit > 0; // TODO: better API would be "jump straight to term=N"??? while (metaDataUpto < limit) { // TODO: we could make "tiers" of metadata, ie, // decode docFreq/totalTF but don't decode postings // metadata; this way caller could get // docFreq/totalTF w/o paying decode cost for // postings // TODO: if docFreq were bulk decoded we could // just skipN here: // stats//from w w w. j a v a 2 s.com state.docFreq = statsReader.readVInt(); //if (DEBUG) System.out.println(" dF=" + state.docFreq); if (ste.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) { state.totalTermFreq = state.docFreq + statsReader.readVLong(); //if (DEBUG) System.out.println(" totTF=" + state.totalTermFreq); } // metadata for (int i = 0; i < ste.fr.longsSize; i++) { longs[i] = bytesReader.readVLong(); } ste.fr.parent.postingsReader.decodeTerm(longs, bytesReader, ste.fr.fieldInfo, state, absolute); metaDataUpto++; absolute = false; } state.termBlockOrd = metaDataUpto; }
From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java
License:Apache License
public void test() throws Exception { Directory dir = newDirectory();//from ww w . jav a 2 s .c o m Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) { @Override protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new MockTokenizer(); if (fieldName.contains("payloadsFixed")) { TokenFilter filter = new MockFixedLengthPayloadFilter(new Random(0), tokenizer, 1); return new TokenStreamComponents(tokenizer, filter); } else if (fieldName.contains("payloadsVariable")) { TokenFilter filter = new MockVariableLengthPayloadFilter(new Random(0), tokenizer); return new TokenStreamComponents(tokenizer, filter); } else { return new TokenStreamComponents(tokenizer); } } }; IndexWriterConfig iwc = newIndexWriterConfig(analyzer); iwc.setCodec(TestUtil.alwaysPostingsFormat(new RocanaLucene50PostingsFormat())); // TODO we could actually add more fields implemented with different PFs // or, just put this test into the usual rotation? RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc); Document doc = new Document(); FieldType docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsOnlyType.setStoreTermVectors(true); docsOnlyType.setIndexOptions(IndexOptions.DOCS); FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED); // turn this on for a cross-check docsAndFreqsType.setStoreTermVectors(true); docsAndFreqsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS); FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED); // turn these on for a cross-check positionsType.setStoreTermVectors(true); positionsType.setStoreTermVectorPositions(true); positionsType.setStoreTermVectorOffsets(true); positionsType.setStoreTermVectorPayloads(true); FieldType offsetsType = new FieldType(positionsType); offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); Field field1 = new Field("field1docs", "", docsOnlyType); Field field2 = new Field("field2freqs", "", docsAndFreqsType); Field field3 = new Field("field3positions", "", positionsType); Field field4 = new Field("field4offsets", "", offsetsType); Field field5 = new Field("field5payloadsFixed", "", positionsType); Field field6 = new Field("field6payloadsVariable", "", positionsType); Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType); Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType); doc.add(field1); doc.add(field2); doc.add(field3); doc.add(field4); doc.add(field5); doc.add(field6); doc.add(field7); doc.add(field8); for (int i = 0; i < MAXDOC; i++) { String stringValue = Integer.toString(i) + " verycommon " + English.intToEnglish(i).replace('-', ' ') + " " + TestUtil.randomSimpleString(random()); field1.setStringValue(stringValue); field2.setStringValue(stringValue); field3.setStringValue(stringValue); field4.setStringValue(stringValue); field5.setStringValue(stringValue); field6.setStringValue(stringValue); field7.setStringValue(stringValue); field8.setStringValue(stringValue); iw.addDocument(doc); } iw.close(); verify(dir); TestUtil.checkIndex(dir); // for some extra coverage, checkIndex before we forceMerge iwc = newIndexWriterConfig(analyzer); iwc.setCodec(TestUtil.alwaysPostingsFormat(new RocanaLucene50PostingsFormat())); iwc.setOpenMode(OpenMode.APPEND); IndexWriter iw2 = new IndexWriter(dir, iwc); iw2.forceMerge(1); iw2.close(); verify(dir); dir.close(); }
From source file:com.spike.text.lucene.util.LuceneTestBookIndexingUtil.java
License:Apache License
public static Document getDocument(String rootDir, File file) throws IOException { Properties props = new Properties(); props.load(new FileInputStream(file)); Document doc = new Document(); // category comes from relative path below the base directory String category = file.getParent().substring(rootDir.length()); category = category.replace(File.separatorChar, '/'); String isbn = props.getProperty("isbn"); String title = props.getProperty("title"); String author = props.getProperty("author"); String url = props.getProperty("url"); String subject = props.getProperty("subject"); String pubmonth = props.getProperty("pubmonth"); System.out.println(//from w w w .j a v a 2 s .c o m title + "\n" + author + "\n" + subject + "\n" + pubmonth + "\n" + category + "\n---------"); // doc.add(new Field("isbn", isbn, Field.Store.YES, // Field.Index.NOT_ANALYZED)); doc.add(LuceneAppUtil.createStringField("isbn", isbn, Store.YES, false, IndexOptions.DOCS, false)); // doc.add(new Field("category", category, Field.Store.YES, // Field.Index.NOT_ANALYZED)); doc.add(LuceneAppUtil.createStringField("category", category, Store.YES, false, IndexOptions.DOCS, false)); // doc.add(new Field("title", title, Field.Store.YES, // Field.Index.ANALYZED, // Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.add(LuceneAppUtil.createStringField("title", title, Store.YES, true, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, false)); // doc.add(new Field("title2", title.toLowerCase(), Field.Store.YES, // Field.Index.NOT_ANALYZED_NO_NORMS, // Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.add(LuceneAppUtil.createStringField("title2", title.toLowerCase(), Store.YES, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, true)); // split multiple authors into unique field instances String[] authors = author.split(","); for (String a : authors) { // doc.add(new Field("author", a, Field.Store.YES, // Field.Index.NOT_ANALYZED, // Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.add(LuceneAppUtil.createStringField("author", a, Store.YES, false, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, false)); } // doc.add(new Field("url", url, Field.Store.YES, // Field.Index.NOT_ANALYZED_NO_NORMS)); doc.add(LuceneAppUtil.createStringField("url", url, Store.YES, false, IndexOptions.DOCS, true)); // doc.add(new Field("subject", subject, Field.Store.YES, // Field.Index.ANALYZED, // Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.add(LuceneAppUtil.createStringField("subject", subject, Store.YES, true, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, true)); Field pubmonthField = new IntField("pubmonth", Integer.parseInt(pubmonth), Store.YES); doc.add(pubmonthField); Date d; try { d = DateTools.stringToDate(pubmonth); } catch (ParseException pe) { throw new RuntimeException(pe); } Field pubmonthAsDayField = new IntField("pubmonthAsDay", (int) (d.getTime() / (1000 * 3600 * 24)), Store.YES); doc.add(pubmonthAsDayField); for (String text : new String[] { title, subject, author, category }) { // doc.add(new Field("contents", text, Field.Store.NO, // Field.Index.ANALYZED, // Field.TermVector.WITH_POSITIONS_OFFSETS)); doc.add(LuceneAppUtil.createStringField("contents", text, Store.NO, true, IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, false)); } return doc; }
From source file:com.vmware.dcp.services.common.LuceneDocumentIndexService.java
License:Open Source License
public static FieldType numericDocType(FieldType.NumericType type, boolean store) { FieldType t = new FieldType(); t.setStored(store);//from w w w. j ava2 s .c om t.setDocValuesType(DocValuesType.NUMERIC); t.setIndexOptions(IndexOptions.DOCS); t.setNumericType(type); return t; }
From source file:com.vmware.xenon.services.common.Lucene60FieldInfosFormatWithCache.java
License:Open Source License
private static IndexOptions getIndexOptions(IndexInput input, byte b) throws IOException { switch (b) {/*from w ww . ja va2 s . c o m*/ case 0: return IndexOptions.NONE; case 1: return IndexOptions.DOCS; case 2: return IndexOptions.DOCS_AND_FREQS; case 3: return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS; case 4: return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS; default: // BUG throw new CorruptIndexException("invalid IndexOptions byte: " + b, input); } }