Example usage for org.apache.lucene.index IndexOptions DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS

List of usage examples for org.apache.lucene.index IndexOptions DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexOptions DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS.

Prototype

IndexOptions DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS

To view the source code for org.apache.lucene.index IndexOptions DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS.

Click Source Link

Document

Indexes documents, frequencies, positions and offsets.

Usage

From source file:alix.lucene.Alix.java

License:Open Source License

/**
 * Parse field type String/* w  ww  .  j a va 2s.c o m*/
 * 
 * @param name Name of the field
 * @param value Value of the field
 * @param options a string composed of letters in any order following Luke convention to describe fields
 * IdfpoPSV
 * I: Indexed
 * d: docs
 * f: freqs
 * p: pos
 * o: offset
 * P: payloads
 * S: Stored
 * V: TermVector
 */
public static FieldType fieldType(String options) {
    FieldType type;
    if (options == null)
        return new FieldType();
    if ("S".equals(options)) {
        type = new FieldType();
        type.setStored(true);
        return type;
    }
    if (options.contains("S")) {
        type = new FieldType(TextField.TYPE_STORED);
    } else {
        type = new FieldType(TextField.TYPE_NOT_STORED);
    }
    // optimize ?
    type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    if (options.contains("p")) {
        type.setStoreTermVectorPositions(true);
    }

    if (options.contains("o")) {
        type.setTokenized(true);
        type.setStoreTermVectors(true);
        type.setStoreTermVectorOffsets(true);
    }
    if (options.contains("P")) {
        type.setTokenized(true);
        type.setStoreTermVectors(true);
        type.setStoreTermVectorPositions(true);
        type.setStoreTermVectorPayloads(true);
    }
    if (options.contains("V")) {
        type.setTokenized(true);
        type.setStoreTermVectors(true);
    }
    return type;
}

From source file:api.startup.PDFIndexer.java

License:Open Source License

/**
 * Indexes a single document and writes it to the given index writer
 * @param writer - the index writer to writer
 * @param metadata - the document//  w w  w  . j  a v  a2 s .co m
 * @throws IOException
 */
static void indexDoc(IndexWriter writer, DocumentMetadata metadata) throws IOException {
    Path file = Paths.get(metadata.getFilename());
    try {
        Document doc = new Document();

        Field pathField = new StringField(Constants.FIELD_PATH, file.toString(), Field.Store.YES);
        doc.add(pathField);

        // Add Document metadata //
        doc.add(new StringField(Constants.FIELD_AUTHOR, metadata.getAuthor(), Field.Store.YES));
        doc.add(new StringField(Constants.FIELD_TITLE, metadata.getTitle(), Field.Store.YES));
        doc.add(new StringField(Constants.FIELD_CONFERENCE, metadata.getConference(), Field.Store.YES));
        // End of Document Metadata //

        Field modified = new LongField(Constants.FIELD_MODIFIED, Files.getLastModifiedTime(file).toMillis(),
                Field.Store.YES);
        doc.add(modified);

        PDFTextExtractor extractor = new PDFTextExtractor();
        // Get the string contents
        String textContents = extractor.extractText(file.toString());

        // Store the string contents
        FieldType contentsType = new FieldType();
        contentsType.setStored(true);
        contentsType.setTokenized(true);
        contentsType.setStoreTermVectors(true);
        contentsType.setStoreTermVectorPositions(true);
        contentsType.setStoreTermVectorPayloads(true);
        contentsType.setStoreTermVectorOffsets(true);
        contentsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
        Field contents = new Field(Constants.FIELD_CONTENTS, textContents, contentsType);
        doc.add(contents);

        if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            log.info("adding " + file + " to index");
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so
            // we use updateDocument instead to replace the old one matching the exact
            // path, if present:
            log.info("updating " + file + " in index");
            writer.updateDocument(new Term(Constants.FIELD_PATH, file.toString()), doc);
        }
    } catch (IOException e) {
        log.error("Failed to read file " + metadata.getFilename());
    }

}

From source file:com.lucure.core.codec.LucurePostingsWriter.java

License:Apache License

@Override
public int setField(FieldInfo fieldInfo) {
    IndexOptions indexOptions = fieldInfo.getIndexOptions();
    fieldHasFreqs = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS) >= 0;
    fieldHasPositions = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS) >= 0;
    fieldHasOffsets = indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
    fieldHasPayloads = fieldInfo.hasPayloads();
    skipWriter.setField(fieldHasPositions, fieldHasOffsets, fieldHasPayloads);
    lastState = emptyState;/*from  w w  w.  j  a  v a  2 s .  c  o m*/
    if (fieldHasPositions) {
        if (fieldHasPayloads || fieldHasOffsets) {
            return 3; // doc + pos + pay FP
        } else {
            return 2; // doc + pos FP
        }
    } else {
        return 1; // doc FP
    }
}

From source file:com.o19s.es.ltr.query.LtrQueryTests.java

License:Apache License

private Field newField(String name, String value, Store stored) {
    FieldType tagsFieldType = new FieldType();
    tagsFieldType.setStored(stored == Store.YES);
    IndexOptions idxOptions = IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
    tagsFieldType.setIndexOptions(idxOptions);
    return new Field(name, value, tagsFieldType);
}

From source file:com.rapidminer.search.GlobalSearchUtilities.java

License:Open Source License

GlobalSearchUtilities() {
    titleFieldType = new FieldType();
    titleFieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    titleFieldType.setStored(true);//from  w ww . jav a2 s  . c  o  m
    titleFieldType.setTokenized(true);

    textFieldType = new FieldType();
    textFieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    textFieldType.setStored(true);
    textFieldType.setTokenized(true);
    textFieldType.setStoreTermVectors(true);
    textFieldType.setStoreTermVectorPositions(true);
    textFieldType.setStoreTermVectorOffsets(true);
    textFieldType.setStoreTermVectorPayloads(true);
}

From source file:com.rocana.lucene.codec.v1.RocanaFieldReader.java

License:Apache License

@Override
public boolean hasOffsets() {
    return fieldInfo.getIndexOptions().compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0;
}

From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java

License:Apache License

public void test() throws Exception {
    Directory dir = newDirectory();/*from ww w  .  j av  a  2  s.co m*/
    Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer();
            if (fieldName.contains("payloadsFixed")) {
                TokenFilter filter = new MockFixedLengthPayloadFilter(new Random(0), tokenizer, 1);
                return new TokenStreamComponents(tokenizer, filter);
            } else if (fieldName.contains("payloadsVariable")) {
                TokenFilter filter = new MockVariableLengthPayloadFilter(new Random(0), tokenizer);
                return new TokenStreamComponents(tokenizer, filter);
            } else {
                return new TokenStreamComponents(tokenizer);
            }
        }
    };
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setCodec(TestUtil.alwaysPostingsFormat(new RocanaLucene50PostingsFormat()));
    // TODO we could actually add more fields implemented with different PFs
    // or, just put this test into the usual rotation?
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    Document doc = new Document();
    FieldType docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED);
    // turn this on for a cross-check
    docsOnlyType.setStoreTermVectors(true);
    docsOnlyType.setIndexOptions(IndexOptions.DOCS);

    FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED);
    // turn this on for a cross-check
    docsAndFreqsType.setStoreTermVectors(true);
    docsAndFreqsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);

    FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED);
    // turn these on for a cross-check
    positionsType.setStoreTermVectors(true);
    positionsType.setStoreTermVectorPositions(true);
    positionsType.setStoreTermVectorOffsets(true);
    positionsType.setStoreTermVectorPayloads(true);
    FieldType offsetsType = new FieldType(positionsType);
    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    Field field1 = new Field("field1docs", "", docsOnlyType);
    Field field2 = new Field("field2freqs", "", docsAndFreqsType);
    Field field3 = new Field("field3positions", "", positionsType);
    Field field4 = new Field("field4offsets", "", offsetsType);
    Field field5 = new Field("field5payloadsFixed", "", positionsType);
    Field field6 = new Field("field6payloadsVariable", "", positionsType);
    Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType);
    Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType);
    doc.add(field1);
    doc.add(field2);
    doc.add(field3);
    doc.add(field4);
    doc.add(field5);
    doc.add(field6);
    doc.add(field7);
    doc.add(field8);
    for (int i = 0; i < MAXDOC; i++) {
        String stringValue = Integer.toString(i) + " verycommon " + English.intToEnglish(i).replace('-', ' ')
                + " " + TestUtil.randomSimpleString(random());
        field1.setStringValue(stringValue);
        field2.setStringValue(stringValue);
        field3.setStringValue(stringValue);
        field4.setStringValue(stringValue);
        field5.setStringValue(stringValue);
        field6.setStringValue(stringValue);
        field7.setStringValue(stringValue);
        field8.setStringValue(stringValue);
        iw.addDocument(doc);
    }
    iw.close();
    verify(dir);
    TestUtil.checkIndex(dir); // for some extra coverage, checkIndex before we forceMerge
    iwc = newIndexWriterConfig(analyzer);
    iwc.setCodec(TestUtil.alwaysPostingsFormat(new RocanaLucene50PostingsFormat()));
    iwc.setOpenMode(OpenMode.APPEND);
    IndexWriter iw2 = new IndexWriter(dir, iwc);
    iw2.forceMerge(1);
    iw2.close();
    verify(dir);
    dir.close();
}

From source file:com.sindicetech.siren.index.codecs.siren10.Siren10PostingsWriter.java

License:Open Source License

@Override
public int setField(final FieldInfo fieldInfo) {
    this.fieldInfo = fieldInfo;
    this.indexOptions = fieldInfo.getIndexOptions();
    if (indexOptions.compareTo(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS) >= 0) {
        throw new UnsupportedOperationException("this codec cannot index offsets");
    }//from  w  w  w  .jav  a 2  s  . c om
    skipWriter.setIndexOptions(indexOptions);
    lastSkipFP = 0;
    lastState = setEmptyState();
    return 0;
}

From source file:com.spike.text.lucene.util.LuceneTestBookIndexingUtil.java

License:Apache License

public static Document getDocument(String rootDir, File file) throws IOException {
    Properties props = new Properties();
    props.load(new FileInputStream(file));

    Document doc = new Document();

    // category comes from relative path below the base directory
    String category = file.getParent().substring(rootDir.length());
    category = category.replace(File.separatorChar, '/');

    String isbn = props.getProperty("isbn");
    String title = props.getProperty("title");
    String author = props.getProperty("author");
    String url = props.getProperty("url");
    String subject = props.getProperty("subject");

    String pubmonth = props.getProperty("pubmonth");

    System.out.println(/*from  ww  w  .java  2 s  .  co m*/
            title + "\n" + author + "\n" + subject + "\n" + pubmonth + "\n" + category + "\n---------");

    // doc.add(new Field("isbn", isbn, Field.Store.YES,
    // Field.Index.NOT_ANALYZED));
    doc.add(LuceneAppUtil.createStringField("isbn", isbn, Store.YES, false, IndexOptions.DOCS, false));

    // doc.add(new Field("category", category, Field.Store.YES,
    // Field.Index.NOT_ANALYZED));
    doc.add(LuceneAppUtil.createStringField("category", category, Store.YES, false, IndexOptions.DOCS, false));

    // doc.add(new Field("title", title, Field.Store.YES,
    // Field.Index.ANALYZED,
    // Field.TermVector.WITH_POSITIONS_OFFSETS));
    doc.add(LuceneAppUtil.createStringField("title", title, Store.YES, true,
            IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, false));

    // doc.add(new Field("title2", title.toLowerCase(), Field.Store.YES,
    // Field.Index.NOT_ANALYZED_NO_NORMS,
    // Field.TermVector.WITH_POSITIONS_OFFSETS));
    doc.add(LuceneAppUtil.createStringField("title2", title.toLowerCase(), Store.YES, false,
            IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, true));

    // split multiple authors into unique field instances
    String[] authors = author.split(",");
    for (String a : authors) {
        // doc.add(new Field("author", a, Field.Store.YES,
        // Field.Index.NOT_ANALYZED,
        // Field.TermVector.WITH_POSITIONS_OFFSETS));
        doc.add(LuceneAppUtil.createStringField("author", a, Store.YES, false,
                IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, false));
    }

    // doc.add(new Field("url", url, Field.Store.YES,
    // Field.Index.NOT_ANALYZED_NO_NORMS));
    doc.add(LuceneAppUtil.createStringField("url", url, Store.YES, false, IndexOptions.DOCS, true));
    // doc.add(new Field("subject", subject, Field.Store.YES,
    // Field.Index.ANALYZED,
    // Field.TermVector.WITH_POSITIONS_OFFSETS));
    doc.add(LuceneAppUtil.createStringField("subject", subject, Store.YES, true,
            IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, true));

    Field pubmonthField = new IntField("pubmonth", Integer.parseInt(pubmonth), Store.YES);
    doc.add(pubmonthField);

    Date d;
    try {
        d = DateTools.stringToDate(pubmonth);
    } catch (ParseException pe) {
        throw new RuntimeException(pe);
    }

    Field pubmonthAsDayField = new IntField("pubmonthAsDay", (int) (d.getTime() / (1000 * 3600 * 24)),
            Store.YES);
    doc.add(pubmonthAsDayField);

    for (String text : new String[] { title, subject, author, category }) {
        // doc.add(new Field("contents", text, Field.Store.NO,
        // Field.Index.ANALYZED,
        // Field.TermVector.WITH_POSITIONS_OFFSETS));
        doc.add(LuceneAppUtil.createStringField("contents", text, Store.NO, true,
                IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS, false));

    }

    return doc;
}

From source file:com.vmware.xenon.services.common.Lucene60FieldInfosFormatWithCache.java

License:Open Source License

private static IndexOptions getIndexOptions(IndexInput input, byte b) throws IOException {
    switch (b) {//from  w  w w. j  a v a2  s  .  com
    case 0:
        return IndexOptions.NONE;
    case 1:
        return IndexOptions.DOCS;
    case 2:
        return IndexOptions.DOCS_AND_FREQS;
    case 3:
        return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS;
    case 4:
        return IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS;
    default:
        // BUG
        throw new CorruptIndexException("invalid IndexOptions byte: " + b, input);
    }
}