Example usage for org.apache.lucene.document FieldType setStoreTermVectorPayloads

List of usage examples for org.apache.lucene.document FieldType setStoreTermVectorPayloads

Introduction

In this page you can find the example usage for org.apache.lucene.document FieldType setStoreTermVectorPayloads.

Prototype

public void setStoreTermVectorPayloads(boolean value) 

Source Link

Document

Set to true to also store token payloads into the term vector for this field.

Usage

From source file:alix.lucene.Alix.java

License:Open Source License

/**
 * Parse field type String//  w  ww.ja  va2 s. c om
 * 
 * @param name Name of the field
 * @param value Value of the field
 * @param options a string composed of letters in any order following Luke convention to describe fields
 * IdfpoPSV
 * I: Indexed
 * d: docs
 * f: freqs
 * p: pos
 * o: offset
 * P: payloads
 * S: Stored
 * V: TermVector
 */
public static FieldType fieldType(String options) {
    FieldType type;
    if (options == null)
        return new FieldType();
    if ("S".equals(options)) {
        type = new FieldType();
        type.setStored(true);
        return type;
    }
    if (options.contains("S")) {
        type = new FieldType(TextField.TYPE_STORED);
    } else {
        type = new FieldType(TextField.TYPE_NOT_STORED);
    }
    // optimize ?
    type.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    if (options.contains("p")) {
        type.setStoreTermVectorPositions(true);
    }

    if (options.contains("o")) {
        type.setTokenized(true);
        type.setStoreTermVectors(true);
        type.setStoreTermVectorOffsets(true);
    }
    if (options.contains("P")) {
        type.setTokenized(true);
        type.setStoreTermVectors(true);
        type.setStoreTermVectorPositions(true);
        type.setStoreTermVectorPayloads(true);
    }
    if (options.contains("V")) {
        type.setTokenized(true);
        type.setStoreTermVectors(true);
    }
    return type;
}

From source file:api.startup.PDFIndexer.java

License:Open Source License

/**
 * Indexes a single document and writes it to the given index writer
 * @param writer - the index writer to writer
 * @param metadata - the document/* w  w w. j  a v a  2  s  . c  o  m*/
 * @throws IOException
 */
static void indexDoc(IndexWriter writer, DocumentMetadata metadata) throws IOException {
    Path file = Paths.get(metadata.getFilename());
    try {
        Document doc = new Document();

        Field pathField = new StringField(Constants.FIELD_PATH, file.toString(), Field.Store.YES);
        doc.add(pathField);

        // Add Document metadata //
        doc.add(new StringField(Constants.FIELD_AUTHOR, metadata.getAuthor(), Field.Store.YES));
        doc.add(new StringField(Constants.FIELD_TITLE, metadata.getTitle(), Field.Store.YES));
        doc.add(new StringField(Constants.FIELD_CONFERENCE, metadata.getConference(), Field.Store.YES));
        // End of Document Metadata //

        Field modified = new LongField(Constants.FIELD_MODIFIED, Files.getLastModifiedTime(file).toMillis(),
                Field.Store.YES);
        doc.add(modified);

        PDFTextExtractor extractor = new PDFTextExtractor();
        // Get the string contents
        String textContents = extractor.extractText(file.toString());

        // Store the string contents
        FieldType contentsType = new FieldType();
        contentsType.setStored(true);
        contentsType.setTokenized(true);
        contentsType.setStoreTermVectors(true);
        contentsType.setStoreTermVectorPositions(true);
        contentsType.setStoreTermVectorPayloads(true);
        contentsType.setStoreTermVectorOffsets(true);
        contentsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
        Field contents = new Field(Constants.FIELD_CONTENTS, textContents, contentsType);
        doc.add(contents);

        if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            log.info("adding " + file + " to index");
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so
            // we use updateDocument instead to replace the old one matching the exact
            // path, if present:
            log.info("updating " + file + " in index");
            writer.updateDocument(new Term(Constants.FIELD_PATH, file.toString()), doc);
        }
    } catch (IOException e) {
        log.error("Failed to read file " + metadata.getFilename());
    }

}

From source file:com.github.hotware.lucene.extension.bean.field.BeanInformationCacheImpl.java

License:BEER-WARE LICENSE

private FieldInformation buildFieldInformation(BeanField bf, Field field, Class<?> fieldClass) {
    com.github.hotware.lucene.extension.bean.type.Type typeWrapper;
    try {// w ww  . ja  v a  2  s  . c om
        // TODO: maybe cache these?
        typeWrapper = (com.github.hotware.lucene.extension.bean.type.Type) bf.type().newInstance();
    } catch (InstantiationException | IllegalAccessException e) {
        throw new RuntimeException(e);
    }
    FieldType fieldType = new FieldType();
    fieldType.setIndexed(bf.index());
    fieldType.setStored(bf.store());
    fieldType.setTokenized(bf.tokenized());
    fieldType.setStoreTermVectors(bf.storeTermVectors());
    fieldType.setStoreTermVectorPositions(bf.storeTermVectorPositions());
    fieldType.setStoreTermVectorOffsets(bf.storeTermVectorOffsets());
    fieldType.setStoreTermVectorPayloads(bf.storeTermVectorPayloads());
    fieldType.setOmitNorms(bf.omitNorms());
    fieldType.setIndexOptions(bf.indexOptions());
    typeWrapper.configureFieldType(fieldType);
    fieldType.freeze();
    return new FieldInformation(new FrozenField(field), fieldClass, fieldType, bf);
}

From source file:com.qwazr.search.field.CustomFieldType.java

License:Apache License

@Override
final public void fillValue(final Object value, final FieldConsumer consumer) {
    final FieldType type = new FieldType();
    if (fieldDef.stored != null)
        type.setStored(fieldDef.stored);
    if (fieldDef.tokenized != null)
        type.setTokenized(fieldDef.tokenized);
    if (fieldDef.store_termvectors != null)
        type.setStoreTermVectors(fieldDef.store_termvectors);
    if (fieldDef.store_termvector_offsets != null)
        type.setStoreTermVectorOffsets(fieldDef.store_termvector_offsets);
    if (fieldDef.store_termvector_positions != null)
        type.setStoreTermVectorPositions(fieldDef.store_termvector_positions);
    if (fieldDef.store_termvector_payloads != null)
        type.setStoreTermVectorPayloads(fieldDef.store_termvector_payloads);
    if (fieldDef.omit_norms != null)
        type.setOmitNorms(fieldDef.omit_norms);
    if (fieldDef.numeric_type != null)
        type.setNumericType(fieldDef.numeric_type);
    if (fieldDef.index_options != null)
        type.setIndexOptions(fieldDef.index_options);
    if (fieldDef.docvalues_type != null)
        type.setDocValuesType(fieldDef.docvalues_type);
    consumer.accept(new CustomField(fieldName, type, value));
}

From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat2.java

License:Apache License

private Document newDocument() {
    Document doc = new Document();
    for (IndexOptions option : IndexOptions.values()) {
        if (option == IndexOptions.NONE) {
            continue;
        }/*from ww  w  . j  av a 2s.com*/
        FieldType ft = new FieldType(TextField.TYPE_NOT_STORED);
        // turn on tvs for a cross-check, since we rely upon checkindex in this test (for now)
        ft.setStoreTermVectors(true);
        ft.setStoreTermVectorOffsets(true);
        ft.setStoreTermVectorPositions(true);
        ft.setStoreTermVectorPayloads(true);
        ft.setIndexOptions(option);
        doc.add(new Field(option.toString(), "", ft));
    }
    return doc;
}

From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat3.java

License:Apache License

public void test() throws Exception {
    Directory dir = newDirectory();/*ww  w .  j  a  v a 2 s . c  o m*/
    Analyzer analyzer = new Analyzer(Analyzer.PER_FIELD_REUSE_STRATEGY) {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer();
            if (fieldName.contains("payloadsFixed")) {
                TokenFilter filter = new MockFixedLengthPayloadFilter(new Random(0), tokenizer, 1);
                return new TokenStreamComponents(tokenizer, filter);
            } else if (fieldName.contains("payloadsVariable")) {
                TokenFilter filter = new MockVariableLengthPayloadFilter(new Random(0), tokenizer);
                return new TokenStreamComponents(tokenizer, filter);
            } else {
                return new TokenStreamComponents(tokenizer);
            }
        }
    };
    IndexWriterConfig iwc = newIndexWriterConfig(analyzer);
    iwc.setCodec(TestUtil.alwaysPostingsFormat(new RocanaLucene50PostingsFormat()));
    // TODO we could actually add more fields implemented with different PFs
    // or, just put this test into the usual rotation?
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    Document doc = new Document();
    FieldType docsOnlyType = new FieldType(TextField.TYPE_NOT_STORED);
    // turn this on for a cross-check
    docsOnlyType.setStoreTermVectors(true);
    docsOnlyType.setIndexOptions(IndexOptions.DOCS);

    FieldType docsAndFreqsType = new FieldType(TextField.TYPE_NOT_STORED);
    // turn this on for a cross-check
    docsAndFreqsType.setStoreTermVectors(true);
    docsAndFreqsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS);

    FieldType positionsType = new FieldType(TextField.TYPE_NOT_STORED);
    // turn these on for a cross-check
    positionsType.setStoreTermVectors(true);
    positionsType.setStoreTermVectorPositions(true);
    positionsType.setStoreTermVectorOffsets(true);
    positionsType.setStoreTermVectorPayloads(true);
    FieldType offsetsType = new FieldType(positionsType);
    offsetsType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    Field field1 = new Field("field1docs", "", docsOnlyType);
    Field field2 = new Field("field2freqs", "", docsAndFreqsType);
    Field field3 = new Field("field3positions", "", positionsType);
    Field field4 = new Field("field4offsets", "", offsetsType);
    Field field5 = new Field("field5payloadsFixed", "", positionsType);
    Field field6 = new Field("field6payloadsVariable", "", positionsType);
    Field field7 = new Field("field7payloadsFixedOffsets", "", offsetsType);
    Field field8 = new Field("field8payloadsVariableOffsets", "", offsetsType);
    doc.add(field1);
    doc.add(field2);
    doc.add(field3);
    doc.add(field4);
    doc.add(field5);
    doc.add(field6);
    doc.add(field7);
    doc.add(field8);
    for (int i = 0; i < MAXDOC; i++) {
        String stringValue = Integer.toString(i) + " verycommon " + English.intToEnglish(i).replace('-', ' ')
                + " " + TestUtil.randomSimpleString(random());
        field1.setStringValue(stringValue);
        field2.setStringValue(stringValue);
        field3.setStringValue(stringValue);
        field4.setStringValue(stringValue);
        field5.setStringValue(stringValue);
        field6.setStringValue(stringValue);
        field7.setStringValue(stringValue);
        field8.setStringValue(stringValue);
        iw.addDocument(doc);
    }
    iw.close();
    verify(dir);
    TestUtil.checkIndex(dir); // for some extra coverage, checkIndex before we forceMerge
    iwc = newIndexWriterConfig(analyzer);
    iwc.setCodec(TestUtil.alwaysPostingsFormat(new RocanaLucene50PostingsFormat()));
    iwc.setOpenMode(OpenMode.APPEND);
    IndexWriter iw2 = new IndexWriter(dir, iwc);
    iw2.forceMerge(1);
    iw2.close();
    verify(dir);
    dir.close();
}

From source file:com.tuplejump.stargate.cassandra.CassandraUtils.java

License:Apache License

public static FieldType fieldType(Properties properties, AbstractType validator) {
    FieldType fieldType = new FieldType();
    fieldType.setIndexed(properties.isIndexed());
    fieldType.setTokenized(properties.isTokenized());
    fieldType.setStored(properties.isStored());
    fieldType.setStoreTermVectors(properties.isStoreTermVectors());
    fieldType.setStoreTermVectorOffsets(properties.isStoreTermVectorOffsets());
    fieldType.setStoreTermVectorPayloads(properties.isStoreTermVectorPayloads());
    fieldType.setStoreTermVectorPositions(properties.isStoreTermVectorPositions());
    fieldType.setOmitNorms(properties.isOmitNorms());
    fieldType.setIndexOptions(properties.getIndexOptions());
    Fields.setNumericType(validator, fieldType);
    if (fieldType.numericType() != null) {
        fieldType.setNumericPrecisionStep(properties.getNumericPrecisionStep());
    }/* w  w w  . jav a 2s  . c  o  m*/
    return fieldType;
}

From source file:com.tuplejump.stargate.lucene.LuceneUtils.java

License:Apache License

public static FieldType dynamicFieldType(Properties properties) {
    FieldType fieldType = new FieldType();
    fieldType.setIndexed(properties.isIndexed());
    fieldType.setTokenized(properties.isTokenized());
    fieldType.setStored(properties.isStored());
    fieldType.setStoreTermVectors(properties.isStoreTermVectors());
    fieldType.setStoreTermVectorOffsets(properties.isStoreTermVectorOffsets());
    fieldType.setStoreTermVectorPayloads(properties.isStoreTermVectorPayloads());
    fieldType.setStoreTermVectorPositions(properties.isStoreTermVectorPositions());
    fieldType.setOmitNorms(properties.isOmitNorms());
    fieldType.setIndexOptions(properties.getIndexOptions());
    if (properties.getType().isNumeric()) {
        switch (properties.getType()) {
        case integer:
            fieldType.setNumericType(FieldType.NumericType.INT);
            break;
        case bigint:
            fieldType.setNumericType(FieldType.NumericType.LONG);
            break;
        case decimal:
            fieldType.setNumericType(FieldType.NumericType.FLOAT);
            break;
        default://from w  w  w  . j  av a  2s.c o  m
            fieldType.setNumericType(FieldType.NumericType.DOUBLE);
            break;
        }
        fieldType.setNumericPrecisionStep(properties.getNumericPrecisionStep());
    }
    return fieldType;
}

From source file:edu.co.usbcali.ir.processes.Indexer.java

private Document getDocument(File file) throws IOException {
    Document document = new Document();

    FieldType type = new FieldType();
    type.setIndexOptions(IndexOptions.DOCS);
    type.setStored(true);/*from ww  w  .j a va2  s .  c  om*/
    type.setTokenized(true);
    type.setStoreTermVectors(true);
    type.setStoreTermVectorPositions(true);
    type.setStoreTermVectorOffsets(true);
    type.setStoreTermVectorPayloads(true);

    Field contentField = new Field(LuceneConstants.CONTENTS, getContent(file), type);
    Field fileNameField = new Field(LuceneConstants.FILE_NAME, file.getName(), type);
    Field filePathField = new Field(LuceneConstants.FILE_PATH, file.getCanonicalPath(), type);

    document.add(contentField);
    document.add(fileNameField);
    document.add(filePathField);

    return document;
}

From source file:lab_mri.CranIndexer.java

/**
 * doc_file index_dir//from   w w  w.j av  a  2  s  . c  om
 * 
 * 
 * @param args
 */
public static void main(String args[]) {
    File index_dir = new File("/home/luigi/NetBeansProjects/LAB_mri/inv_index");
    String doc_file = "/home/luigi/NetBeansProjects/LAB_mri/CRAN/cran.all.1400";
    try {
        SearchEngine se = new SearchEngine(index_dir);
        se.open();
        File inputFile = new File(doc_file);
        BufferedReader reader = new BufferedReader(new FileReader(inputFile));

        FieldType ft = new FieldType();
        ft.stored();
        ft.setIndexed(true); //done as default
        ft.setStoreTermVectors(true);
        ft.setStoreTermVectorPositions(true);
        ft.setStoreTermVectorOffsets(true);
        ft.setStoreTermVectorPayloads(true);

        String id = null;
        StringBuilder title = new StringBuilder();
        StringBuilder authors = new StringBuilder();
        StringBuilder affiliation = new StringBuilder();
        StringBuilder abst = new StringBuilder();
        char code = ' ';
        int c = 0;
        while (reader.ready()) {
            String line = reader.readLine();
            if (line.startsWith(".I")) {
                if (id != null) {
                    System.out.println(id + "\t" + abst);

                    Document doc = new Document();
                    doc.add(new StringField("id", id, Field.Store.YES));
                    doc.add(new TextField("title", title.toString(), Field.Store.NO));
                    doc.add(new TextField("authors", authors.toString(), Field.Store.NO));
                    doc.add(new TextField("affiliation", affiliation.toString(), Field.Store.NO));
                    doc.add(new Field("abst", abst.toString(), ft));
                    se.addDocument(doc);

                    c++;
                    title = new StringBuilder();
                    authors = new StringBuilder();
                    affiliation = new StringBuilder();
                    abst = new StringBuilder();
                }
                id = line.substring(2).trim();

            } else if (line.startsWith(".T")) {
                code = 'T';
            } else if (line.startsWith(".A")) {
                code = 'A';
            } else if (line.startsWith(".B")) {
                code = 'B';
            } else if (line.startsWith(".W")) {
                code = 'W';
            } else {
                switch (code) {
                case 'T':
                    title.append(line).append(" ");
                    break;
                case 'A':
                    authors.append(line).append(" ");
                    break;
                case 'B':
                    affiliation.append(line).append(" ");
                    break;
                case 'W':
                    abst.append(line).append(" ");
                    break;
                default:
                    break;
                }
            }
        }
        reader.close();
        //store last documents
        if (id != null) {
            System.out.println(id + "\t" + title);
            //store document
            //put index code here
            Document doc = new Document();
            doc.add(new StringField("id", id, Field.Store.YES));
            doc.add(new TextField("title", title.toString(), Field.Store.NO));
            doc.add(new TextField("authors", authors.toString(), Field.Store.NO));
            doc.add(new TextField("affiliation", affiliation.toString(), Field.Store.NO));
            doc.add(new TextField("abst", abst.toString(), Field.Store.NO));
            se.addDocument(doc);
            c++;
        }
        System.out.println("Total docs: " + c);

        se.close();
    } catch (IOException ioex) {
        Logger.getLogger(CranIndexer.class.getName()).log(Level.SEVERE, null, ioex);
    }

}