Example usage for org.apache.lucene.index IndexOptions DOCS

List of usage examples for org.apache.lucene.index IndexOptions DOCS

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexOptions DOCS.

Prototype

IndexOptions DOCS

To view the source code for org.apache.lucene.index IndexOptions DOCS.

Click Source Link

Document

Only documents are indexed: term frequencies and positions are omitted.

Usage

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

private void addCommonDocumentFields(Document document, FeatureIndexEntry entry, final Long featureFileId) {
    document.add(new SortedStringField(FeatureIndexFields.FEATURE_ID.getFieldName(), entry.getFeatureId()));

    FieldType fieldType = new FieldType();
    fieldType.setOmitNorms(true);/*from  w w  w .  ja  va2 s.  co  m*/
    fieldType.setIndexOptions(IndexOptions.DOCS);
    fieldType.setStored(true);
    fieldType.setTokenized(false);
    fieldType.setDocValuesType(DocValuesType.SORTED);
    fieldType.freeze();
    Field field = new Field(FeatureIndexFields.CHROMOSOME_ID.getFieldName(),
            entry.getChromosome() != null ? new BytesRef(entry.getChromosome().getId().toString())
                    : new BytesRef(""),
            fieldType);
    document.add(field);
    document.add(new SortedStringField(FeatureIndexFields.CHROMOSOME_NAME.getFieldName(),
            entry.getChromosome().getName(), true));

    document.add(new SortedIntPoint(FeatureIndexFields.START_INDEX.getFieldName(), entry.getStartIndex()));
    document.add(new StoredField(FeatureIndexFields.START_INDEX.getFieldName(), entry.getStartIndex()));
    document.add(new SortedDocValuesField(FeatureIndexFields.START_INDEX.getGroupName(),
            new BytesRef(entry.getStartIndex().toString())));

    document.add(new SortedIntPoint(FeatureIndexFields.END_INDEX.getFieldName(), entry.getEndIndex()));
    document.add(new StoredField(FeatureIndexFields.END_INDEX.getFieldName(), entry.getEndIndex()));
    document.add(new SortedDocValuesField(FeatureIndexFields.END_INDEX.getGroupName(),
            new BytesRef(entry.getStartIndex().toString())));

    document.add(new StringField(FeatureIndexFields.FEATURE_TYPE.getFieldName(),
            entry.getFeatureType() != null ? entry.getFeatureType().getFileValue() : "", Field.Store.YES));
    document.add(new StringField(FeatureIndexFields.FILE_ID.getFieldName(), featureFileId.toString(),
            Field.Store.YES));

    document.add(new StringField(FeatureIndexFields.FEATURE_NAME.getFieldName(),
            entry.getFeatureName() != null ? entry.getFeatureName().toLowerCase() : "", Field.Store.YES));
    document.add(new SortedDocValuesField(FeatureIndexFields.FEATURE_NAME.getFieldName(),
            new BytesRef(entry.getFeatureName() != null ? entry.getFeatureName() : "")));

    document.add(new SortedSetDocValuesFacetField(FeatureIndexFields.CHR_ID.getFieldName(),
            entry.getChromosome().getId().toString()));

    document.add(new SortedStringField(FeatureIndexFields.UID.getFieldName(), entry.getUuid().toString()));
    document.add(new SortedSetDocValuesFacetField(FeatureIndexFields.F_UID.getFieldName(),
            entry.getUuid().toString()));
}

From source file:com.orientechnologies.spatial.engine.OLuceneSpatialIndexEngineAbstract.java

License:Apache License

protected Document newGeoDocument(OIdentifiable oIdentifiable, Shape shape) {

    FieldType ft = new FieldType();
    ft.setIndexOptions(IndexOptions.DOCS);
    ft.setStored(true);/*from   w  w w  .  j  av a  2 s. co m*/

    Document doc = new Document();

    doc.add(OLuceneIndexType.createField(RID, oIdentifiable.getIdentity().toString(), Field.Store.YES,
            Field.Index.NOT_ANALYZED_NO_NORMS));
    for (IndexableField f : strategy.createIndexableFields(shape)) {
        doc.add(f);
    }

    doc.add(new StoredField(strategy.getFieldName(), ctx.toString(shape)));
    return doc;
}

From source file:com.ponysdk.sample.client.page.addon.SelectizeAddon.java

License:Apache License

private void addTenor(final IndexWriter writer) throws IOException {
    final String[] tenors = new String[] { "Y1", "Y2", "Y3", "Y4", "Y5", "Y6", "Y7", "Y8" };

    for (final String tenor : tenors) {
        final Document doc = new Document();
        final FieldType fieldType1 = new FieldType();
        fieldType1.setIndexOptions(IndexOptions.NONE);
        fieldType1.setStored(true);/*w  w  w  .j  av  a  2 s . c o  m*/
        fieldType1.setTokenized(false);
        doc.add(new Field("id", tenor, fieldType1));

        final FieldType fieldType2 = new FieldType();
        fieldType2.setIndexOptions(IndexOptions.DOCS);
        fieldType2.setStored(true);
        fieldType2.setTokenized(false);
        doc.add(new Field("fieldname", tenor, fieldType2));
        doc.add(new Field("desc", "tenor", TextField.TYPE_STORED));
        doc.add(new Field("type", Type.TENOR.name(), TextField.TYPE_STORED));

        writer.addDocument(doc);
    }

}

From source file:com.rocana.lucene.codec.v1.RocanaBlockTreeTermsReader.java

License:Apache License

/** Sole constructor. */
public RocanaBlockTreeTermsReader(PostingsReaderBase postingsReader, SegmentReadState state)
        throws IOException {
    boolean success = false;
    IndexInput indexIn = null;// ww w  . java2  s  .c  o m

    this.postingsReader = postingsReader;
    this.segment = state.segmentInfo.name;

    String termsName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_EXTENSION);
    try {
        termsIn = state.directory.openInput(termsName, state.context);
        version = CodecUtil.checkIndexHeader(termsIn, TERMS_CODEC_NAME, VERSION_START, VERSION_CURRENT,
                state.segmentInfo.getId(), state.segmentSuffix);

        if (version < VERSION_AUTO_PREFIX_TERMS) {
            // Old (pre-5.2.0) index, no auto-prefix terms:
            this.anyAutoPrefixTerms = false;
        } else if (version == VERSION_AUTO_PREFIX_TERMS) {
            // 5.2.x index, might have auto-prefix terms:
            this.anyAutoPrefixTerms = true;
        } else {
            // 5.3.x index, we record up front if we may have written any auto-prefix terms:
            assert version >= VERSION_AUTO_PREFIX_TERMS_COND;
            byte b = termsIn.readByte();
            if (b == 0) {
                this.anyAutoPrefixTerms = false;
            } else if (b == 1) {
                this.anyAutoPrefixTerms = true;
            } else {
                throw new CorruptIndexException("invalid anyAutoPrefixTerms: expected 0 or 1 but got " + b,
                        termsIn);
            }
        }

        String indexName = IndexFileNames.segmentFileName(segment, state.segmentSuffix, TERMS_INDEX_EXTENSION);
        indexIn = state.directory.openInput(indexName, state.context);
        CodecUtil.checkIndexHeader(indexIn, TERMS_INDEX_CODEC_NAME, version, version, state.segmentInfo.getId(),
                state.segmentSuffix);

        // IMPORTANT: comment out this one line to prevent checksumming the entire file.
        //            This is the reason we have a custom Lucene codec and forked Lucene classes.
        //CodecUtil.checksumEntireFile(indexIn);

        // Have PostingsReader init itself
        postingsReader.init(termsIn, state);

        // NOTE: data file is too costly to verify checksum against all the bytes on open,
        // but for now we at least verify proper structure of the checksum footer: which looks
        // for FOOTER_MAGIC + algorithmID. This is cheap and can detect some forms of corruption
        // such as file truncation.
        CodecUtil.retrieveChecksum(termsIn);

        // Read per-field details
        seekDir(termsIn, dirOffset);
        seekDir(indexIn, indexDirOffset);

        final int numFields = termsIn.readVInt();
        if (numFields < 0) {
            throw new CorruptIndexException("invalid numFields: " + numFields, termsIn);
        }

        for (int i = 0; i < numFields; ++i) {
            final int field = termsIn.readVInt();
            final long numTerms = termsIn.readVLong();
            if (numTerms <= 0) {
                throw new CorruptIndexException("Illegal numTerms for field number: " + field, termsIn);
            }
            final int numBytes = termsIn.readVInt();
            if (numBytes < 0) {
                throw new CorruptIndexException(
                        "invalid rootCode for field number: " + field + ", numBytes=" + numBytes, termsIn);
            }
            final BytesRef rootCode = new BytesRef(new byte[numBytes]);
            termsIn.readBytes(rootCode.bytes, 0, numBytes);
            rootCode.length = numBytes;
            final FieldInfo fieldInfo = state.fieldInfos.fieldInfo(field);
            if (fieldInfo == null) {
                throw new CorruptIndexException("invalid field number: " + field, termsIn);
            }
            final long sumTotalTermFreq = fieldInfo.getIndexOptions() == IndexOptions.DOCS ? -1
                    : termsIn.readVLong();
            final long sumDocFreq = termsIn.readVLong();
            final int docCount = termsIn.readVInt();
            final int longsSize = termsIn.readVInt();
            if (longsSize < 0) {
                throw new CorruptIndexException(
                        "invalid longsSize for field: " + fieldInfo.name + ", longsSize=" + longsSize, termsIn);
            }
            BytesRef minTerm = readBytesRef(termsIn);
            BytesRef maxTerm = readBytesRef(termsIn);
            if (docCount < 0 || docCount > state.segmentInfo.maxDoc()) { // #docs with field must be <= #docs
                throw new CorruptIndexException(
                        "invalid docCount: " + docCount + " maxDoc: " + state.segmentInfo.maxDoc(), termsIn);
            }
            if (sumDocFreq < docCount) { // #postings must be >= #docs with field
                throw new CorruptIndexException("invalid sumDocFreq: " + sumDocFreq + " docCount: " + docCount,
                        termsIn);
            }
            if (sumTotalTermFreq != -1 && sumTotalTermFreq < sumDocFreq) { // #positions must be >= #postings
                throw new CorruptIndexException(
                        "invalid sumTotalTermFreq: " + sumTotalTermFreq + " sumDocFreq: " + sumDocFreq,
                        termsIn);
            }
            final long indexStartFP = indexIn.readVLong();
            RocanaFieldReader previous = fields.put(fieldInfo.name,
                    new RocanaFieldReader(this, fieldInfo, numTerms, rootCode, sumTotalTermFreq, sumDocFreq,
                            docCount, indexStartFP, longsSize, indexIn, minTerm, maxTerm));
            if (previous != null) {
                throw new CorruptIndexException("duplicate field: " + fieldInfo.name, termsIn);
            }
        }

        indexIn.close();
        success = true;
    } finally {
        if (!success) {
            // this.close() will close in:
            IOUtils.closeWhileHandlingException(indexIn, this);
        }
    }
}

From source file:com.rocana.lucene.codec.v1.RocanaIntersectTermsEnumFrame.java

License:Apache License

public void decodeMetaData() throws IOException {

    // lazily catch up on metadata decode:
    final int limit = getTermBlockOrd();
    boolean absolute = metaDataUpto == 0;
    assert limit > 0;

    // TODO: better API would be "jump straight to term=N"???
    while (metaDataUpto < limit) {

        // TODO: we could make "tiers" of metadata, ie,
        // decode docFreq/totalTF but don't decode postings
        // metadata; this way caller could get
        // docFreq/totalTF w/o paying decode cost for
        // postings

        // TODO: if docFreq were bulk decoded we could
        // just skipN here:

        // stats//from w  w w .  java  2 s . com
        termState.docFreq = statsReader.readVInt();
        if (ite.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
            termState.totalTermFreq = termState.docFreq + statsReader.readVLong();
        }
        // metadata
        for (int i = 0; i < ite.fr.longsSize; i++) {
            longs[i] = bytesReader.readVLong();
        }
        ite.fr.parent.postingsReader.decodeTerm(longs, bytesReader, ite.fr.fieldInfo, termState, absolute);

        metaDataUpto++;
        absolute = false;
    }
    termState.termBlockOrd = metaDataUpto;
}

From source file:com.rocana.lucene.codec.v1.RocanaSegmentTermsEnumFrame.java

License:Apache License

public void decodeMetaData() throws IOException {

    //if (DEBUG) System.out.println("\nBTTR.decodeMetadata seg=" + segment + " mdUpto=" + metaDataUpto + " vs termBlockOrd=" + state.termBlockOrd);

    // lazily catch up on metadata decode:
    final int limit = getTermBlockOrd();
    boolean absolute = metaDataUpto == 0;
    assert limit > 0;

    // TODO: better API would be "jump straight to term=N"???
    while (metaDataUpto < limit) {

        // TODO: we could make "tiers" of metadata, ie,
        // decode docFreq/totalTF but don't decode postings
        // metadata; this way caller could get
        // docFreq/totalTF w/o paying decode cost for
        // postings

        // TODO: if docFreq were bulk decoded we could
        // just skipN here:

        // stats//from   w w  w. j a v a 2 s.com
        state.docFreq = statsReader.readVInt();
        //if (DEBUG) System.out.println("    dF=" + state.docFreq);
        if (ste.fr.fieldInfo.getIndexOptions() != IndexOptions.DOCS) {
            state.totalTermFreq = state.docFreq + statsReader.readVLong();
            //if (DEBUG) System.out.println("    totTF=" + state.totalTermFreq);
        }
        // metadata
        for (int i = 0; i < ste.fr.longsSize; i++) {
            longs[i] = bytesReader.readVLong();
        }
        ste.fr.parent.postingsReader.decodeTerm(longs, bytesReader, ste.fr.fieldInfo, state, absolute);

        metaDataUpto++;
        absolute = false;
    }
    state.termBlockOrd = metaDataUpto;
}

From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java

License:Apache License

public void test() throws Exception {
    Directory dir = newDirectory();//from  ww  w .  jav a  2 s  .c  o  m
    Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer();
            if (fieldName.contains("payloadsFixed")) {
                TokenFilter filter = new MockFixedLengthPayloadFilter(new Random(0), tokenizer, 1);
                return new TokenStreamComponents(tokenizer, filter);
            } else if (fieldName.contains("payloadsVariable")) {
                TokenFilter filter = new MockVariableLengthPayloadFilter(new Random(0), tokenizer);
                return new TokenStreamComponents(tokenizer, filter);
            } else {
                return new TokenStreamComponents(tokenizer);
            }
        }
    };
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setCodec(TestUtil.alwaysPostingsFormat(new RocanaLucene50PostingsFormat()));
    // TODO we could actually add more fields implemented with different PFs
    // or, just put this test into the usual rotation?
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    Document doc = new Document();
    FieldType docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED);
    // turn this on for a cross-check
    docsOnlyType.setStoreTermVectors(true);
    docsOnlyType.setIndexOptions(IndexOptions.DOCS);

    FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED);
    // turn this on for a cross-check
    docsAndFreqsType.setStoreTermVectors(true);
    docsAndFreqsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);

    FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED);
    // turn these on for a cross-check
    positionsType.setStoreTermVectors(true);
    positionsType.setStoreTermVectorPositions(true);
    positionsType.setStoreTermVectorOffsets(true);
    positionsType.setStoreTermVectorPayloads(true);
    FieldType offsetsType = new FieldType(positionsType);
    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    Field field1 = new Field("field1docs", "", docsOnlyType);
    Field field2 = new Field("field2freqs", "", docsAndFreqsType);
    Field field3 = new Field("field3positions", "", positionsType);
    Field field4 = new Field("field4offsets", "", offsetsType);
    Field field5 = new Field("field5payloadsFixed", "", positionsType);
    Field field6 = new Field("field6payloadsVariable", "", positionsType);
    Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType);
    Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType);
    doc.add(field1);
    doc.add(field2);
    doc.add(field3);
    doc.add(field4);
    doc.add(field5);
    doc.add(field6);
    doc.add(field7);
    doc.add(field8);
    for (int i = 0; i < MAXDOC; i++) {
        String stringValue = Integer.toString(i) + " verycommon " + English.intToEnglish(i).replace('-', ' ')
                + " " + TestUtil.randomSimpleString(random());
        field1.setStringValue(stringValue);
        field2.setStringValue(stringValue);
        field3.setStringValue(stringValue);
        field4.setStringValue(stringValue);
        field5.setStringValue(stringValue);
        field6.setStringValue(stringValue);
        field7.setStringValue(stringValue);
        field8.setStringValue(stringValue);
        iw.addDocument(doc);
    }
    iw.close();
    verify(dir);
    TestUtil.checkIndex(dir); // for some extra coverage, checkIndex before we forceMerge
    iwc = newIndexWriterConfig(analyzer);
    iwc.setCodec(TestUtil.alwaysPostingsFormat(new RocanaLucene50PostingsFormat()));
    iwc.setOpenMode(OpenMode.APPEND);
    IndexWriter iw2 = new IndexWriter(dir, iwc);
    iw2.forceMerge(1);
    iw2.close();
    verify(dir);
    dir.close();
}

From source file:com.spike.text.lucene.util.LuceneTestBookIndexingUtil.java

License:Apache License

public static Document getDocument(String rootDir, File file) throws IOException {
    Properties props = new Properties();
    props.load(new FileInputStream(file));

    Document doc = new Document();

    // category comes from relative path below the base directory
    String category = file.getParent().substring(rootDir.length());
    category = category.replace(File.separatorChar, '/');

    String isbn = props.getProperty("isbn");
    String title = props.getProperty("title");
    String author = props.getProperty("author");
    String url = props.getProperty("url");
    String subject = props.getProperty("subject");

    String pubmonth = props.getProperty("pubmonth");

    System.out.println(//from w  w  w  .j  a v  a  2 s .c o  m
            title + "\n" + author + "\n" + subject + "\n" + pubmonth + "\n" + category + "\n---------");

    // doc.add(new Field("isbn", isbn, Field.Store.YES,
    // Field.Index.NOT_ANALYZED));
    doc.add(LuceneAppUtil.createStringField("isbn", isbn, Store.YES, false, IndexOptions.DOCS, false));

    // doc.add(new Field("category", category, Field.Store.YES,
    // Field.Index.NOT_ANALYZED));
    doc.add(LuceneAppUtil.createStringField("category", category, Store.YES, false, IndexOptions.DOCS, false));

    // doc.add(new Field("title", title, Field.Store.YES,
    // Field.Index.ANALYZED,
    // Field.TermVector.WITH_POSITIONS_OFFSETS));
    doc.add(LuceneAppUtil.createStringField("title", title, Store.YES, true,
            IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, false));

    // doc.add(new Field("title2", title.toLowerCase(), Field.Store.YES,
    // Field.Index.NOT_ANALYZED_NO_NORMS,
    // Field.TermVector.WITH_POSITIONS_OFFSETS));
    doc.add(LuceneAppUtil.createStringField("title2", title.toLowerCase(), Store.YES, false,
            IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, true));

    // split multiple authors into unique field instances
    String[] authors = author.split(",");
    for (String a : authors) {
        // doc.add(new Field("author", a, Field.Store.YES,
        // Field.Index.NOT_ANALYZED,
        // Field.TermVector.WITH_POSITIONS_OFFSETS));
        doc.add(LuceneAppUtil.createStringField("author", a, Store.YES, false,
                IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, false));
    }

    // doc.add(new Field("url", url, Field.Store.YES,
    // Field.Index.NOT_ANALYZED_NO_NORMS));
    doc.add(LuceneAppUtil.createStringField("url", url, Store.YES, false, IndexOptions.DOCS, true));
    // doc.add(new Field("subject", subject, Field.Store.YES,
    // Field.Index.ANALYZED,
    // Field.TermVector.WITH_POSITIONS_OFFSETS));
    doc.add(LuceneAppUtil.createStringField("subject", subject, Store.YES, true,
            IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, true));

    Field pubmonthField = new IntField("pubmonth", Integer.parseInt(pubmonth), Store.YES);
    doc.add(pubmonthField);

    Date d;
    try {
        d = DateTools.stringToDate(pubmonth);
    } catch (ParseException pe) {
        throw new RuntimeException(pe);
    }

    Field pubmonthAsDayField = new IntField("pubmonthAsDay", (int) (d.getTime() / (1000 * 3600 * 24)),
            Store.YES);
    doc.add(pubmonthAsDayField);

    for (String text : new String[] { title, subject, author, category }) {
        // doc.add(new Field("contents", text, Field.Store.NO,
        // Field.Index.ANALYZED,
        // Field.TermVector.WITH_POSITIONS_OFFSETS));
        doc.add(LuceneAppUtil.createStringField("contents", text, Store.NO, true,
                IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, false));

    }

    return doc;
}

From source file:com.vmware.dcp.services.common.LuceneDocumentIndexService.java

License:Open Source License

public static FieldType numericDocType(FieldType.NumericType type, boolean store) {
    FieldType t = new FieldType();
    t.setStored(store);//from  w w w. j ava2 s .c  om
    t.setDocValuesType(DocValuesType.NUMERIC);
    t.setIndexOptions(IndexOptions.DOCS);
    t.setNumericType(type);
    return t;
}

From source file:com.vmware.xenon.services.common.Lucene60FieldInfosFormatWithCache.java

License:Open Source License

private static IndexOptions getIndexOptions(IndexInput input, byte b) throws IOException {
    switch (b) {/*from w  ww  . ja va2  s . c o m*/
    case 0:
        return IndexOptions.NONE;
    case 1:
        return IndexOptions.DOCS;
    case 2:
        return IndexOptions.DOCS_AND_FREQS;
    case 3:
        return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
    case 4:
        return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
    default:
        // BUG
        throw new CorruptIndexException("invalid IndexOptions byte: " + b, input);
    }
}