Example usage for org.apache.lucene.analysis.payloads TypeAsPayloadTokenFilter TypeAsPayloadTokenFilter

List of usage examples for org.apache.lucene.analysis.payloads TypeAsPayloadTokenFilter TypeAsPayloadTokenFilter

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.payloads TypeAsPayloadTokenFilter TypeAsPayloadTokenFilter.

Prototype

public TypeAsPayloadTokenFilter(TokenStream input) 

Source Link

Usage

From source file:com.github.le11.nls.lucene.payloads.UIMAPayloadsAnalyzer.java

License:Apache License

@Override
public final TokenStream tokenStream(String fieldName, Reader reader) {
    return new TypeAsPayloadTokenFilter(
            new UIMATypeAwareTokenizer(descriptorPath, "org.apache.uima.TokenAnnotation", "posTag", reader));
}

From source file:org.apache.solr.analysis.TypeAsPayloadTokenFilterFactory.java

License:Apache License

public TypeAsPayloadTokenFilter create(TokenStream input) {
    return new TypeAsPayloadTokenFilter(input);
}

From source file:org.elasticsearch.action.termvector.AbstractTermVectorTests.java

License:Apache License

protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException {

    Map<String, Analyzer> mapping = new HashMap<String, Analyzer>();
    for (TestFieldSetting field : testDocs[0].fieldSettings) {
        if (field.storedPayloads) {
            mapping.put(field.name, new Analyzer() {
                @Override//from w w w  . j a  va2  s  . co  m
                protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
                    Tokenizer tokenizer = new StandardTokenizer(Version.CURRENT.luceneVersion, reader);
                    TokenFilter filter = new LowerCaseFilter(Version.CURRENT.luceneVersion, tokenizer);
                    filter = new TypeAsPayloadTokenFilter(filter);
                    return new TokenStreamComponents(tokenizer, filter);
                }

            });
        }
    }
    PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(
            new StandardAnalyzer(Version.CURRENT.luceneVersion, CharArraySet.EMPTY_SET), mapping);

    Directory dir = new RAMDirectory();
    IndexWriterConfig conf = new IndexWriterConfig(Version.CURRENT.luceneVersion, wrapper);

    conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, conf);

    for (TestDoc doc : testDocs) {
        Document d = new Document();
        d.add(new Field("id", doc.id, StringField.TYPE_STORED));
        for (int i = 0; i < doc.fieldContent.length; i++) {
            FieldType type = new FieldType(TextField.TYPE_STORED);
            TestFieldSetting fieldSetting = doc.fieldSettings[i];

            type.setStoreTermVectorOffsets(fieldSetting.storedOffset);
            type.setStoreTermVectorPayloads(fieldSetting.storedPayloads);
            type.setStoreTermVectorPositions(
                    fieldSetting.storedPositions || fieldSetting.storedPayloads || fieldSetting.storedOffset);
            type.setStoreTermVectors(true);
            type.freeze();
            d.add(new Field(fieldSetting.name, doc.fieldContent[i], type));
        }
        writer.updateDocument(new Term("id", doc.id), d);
        writer.commit();
    }
    writer.close();

    return DirectoryReader.open(dir);
}

From source file:org.elasticsearch.action.termvectors.AbstractTermVectorsTestCase.java

License:Apache License

protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException {
    Map<String, Analyzer> mapping = new HashMap<>();
    for (TestFieldSetting field : testDocs[0].fieldSettings) {
        if (field.storedPayloads) {
            mapping.put(field.name, new Analyzer() {
                @Override/*from w  w w.j  a  va  2 s.  c  o  m*/
                protected TokenStreamComponents createComponents(String fieldName) {
                    Tokenizer tokenizer = new StandardTokenizer();
                    TokenFilter filter = new LowerCaseFilter(tokenizer);
                    filter = new TypeAsPayloadTokenFilter(filter);
                    return new TokenStreamComponents(tokenizer, filter);
                }

            });
        }
    }
    PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(CharArraySet.EMPTY_SET),
            mapping);

    Directory dir = new RAMDirectory();
    IndexWriterConfig conf = new IndexWriterConfig(wrapper);

    conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, conf);

    for (TestDoc doc : testDocs) {
        Document d = new Document();
        d.add(new Field("id", doc.id, StringField.TYPE_STORED));
        for (int i = 0; i < doc.fieldContent.length; i++) {
            FieldType type = new FieldType(TextField.TYPE_STORED);
            TestFieldSetting fieldSetting = doc.fieldSettings[i];

            type.setStoreTermVectorOffsets(fieldSetting.storedOffset);
            type.setStoreTermVectorPayloads(fieldSetting.storedPayloads);
            type.setStoreTermVectorPositions(
                    fieldSetting.storedPositions || fieldSetting.storedPayloads || fieldSetting.storedOffset);
            type.setStoreTermVectors(true);
            type.freeze();
            d.add(new Field(fieldSetting.name, doc.fieldContent[i], type));
        }
        writer.updateDocument(new Term("id", doc.id), d);
        writer.commit();
    }
    writer.close();

    return DirectoryReader.open(dir);
}

From source file:org.elasticsearch.termvectors.AbstractTermVectorTests.java

License:Apache License

protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException {

    Map<String, Analyzer> mapping = new HashMap<String, Analyzer>();
    for (TestFieldSetting field : testDocs[0].fieldSettings) {
        if (field.storedPayloads) {
            mapping.put(field.name, new Analyzer() {
                @Override/*from   w w w. j a  va  2  s. c o m*/
                protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
                    Tokenizer tokenizer = new StandardTokenizer(Version.CURRENT.luceneVersion, reader);
                    TokenFilter filter = new LowerCaseFilter(Version.CURRENT.luceneVersion, tokenizer);
                    filter = new TypeAsPayloadTokenFilter(filter);
                    return new TokenStreamComponents(tokenizer, filter);
                }

            });
        }
    }
    PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(
            new StandardAnalyzer(Version.CURRENT.luceneVersion), mapping);

    Directory dir = new RAMDirectory();
    IndexWriterConfig conf = new IndexWriterConfig(Version.CURRENT.luceneVersion, wrapper);

    conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, conf);

    for (TestDoc doc : testDocs) {
        Document d = new Document();
        d.add(new Field("id", doc.id, StringField.TYPE_STORED));
        for (int i = 0; i < doc.fieldContent.length; i++) {
            FieldType type = new FieldType(TextField.TYPE_STORED);
            TestFieldSetting fieldSetting = doc.fieldSettings[i];

            type.setStoreTermVectorOffsets(fieldSetting.storedOffset);
            type.setStoreTermVectorPayloads(fieldSetting.storedPayloads);
            type.setStoreTermVectorPositions(
                    fieldSetting.storedPositions || fieldSetting.storedPayloads || fieldSetting.storedOffset);
            type.setStoreTermVectors(true);
            type.freeze();
            d.add(new Field(fieldSetting.name, doc.fieldContent[i], type));
        }
        writer.updateDocument(new Term("id", doc.id), d);
        writer.commit();
    }
    writer.close();

    return DirectoryReader.open(dir);
}

From source file:org.elasticsearch.test.integration.termvectors.GetTermVectorTests.java

License:Apache License

private Fields buildWithLuceneAndReturnFields(String docId, String[] fields, String[] content,
        boolean[] withPositions, boolean[] withOffsets, boolean[] withPayloads) throws IOException {
    assert (fields.length == withPayloads.length);
    assert (content.length == withPayloads.length);
    assert (withPositions.length == withPayloads.length);
    assert (withOffsets.length == withPayloads.length);

    Map<String, Analyzer> mapping = new HashMap<String, Analyzer>();
    for (int i = 0; i < withPayloads.length; i++) {
        if (withPayloads[i]) {
            mapping.put(fields[i], new Analyzer() {
                @Override//w  w  w . j  a  va  2s.c  om
                protected TokenStreamComponents createComponents(String fieldName, Reader reader) {
                    Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, reader);
                    TokenFilter filter = new LowerCaseFilter(TEST_VERSION_CURRENT, tokenizer);
                    filter = new TypeAsPayloadTokenFilter(filter);
                    return new TokenStreamComponents(tokenizer, filter);
                }

            });
        }
    }
    PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(TEST_VERSION_CURRENT),
            mapping);

    Directory dir = FSDirectory.open(new File("/tmp/foo"));
    IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, wrapper);

    conf.setOpenMode(OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, conf);

    Document d = new Document();
    for (int i = 0; i < fields.length; i++) {
        d.add(new Field("id", docId, StringField.TYPE_STORED));
        FieldType type = new FieldType(TextField.TYPE_STORED);
        type.setStoreTermVectorOffsets(withOffsets[i]);
        type.setStoreTermVectorPayloads(withPayloads[i]);
        type.setStoreTermVectorPositions(withPositions[i] || withOffsets[i] || withPayloads[i]);
        type.setStoreTermVectors(true);
        type.freeze();
        d.add(new Field(fields[i], content[i], type));
        writer.updateDocument(new Term("id", docId), d);
        writer.commit();
    }
    writer.close();

    DirectoryReader dr = DirectoryReader.open(dir);
    IndexSearcher s = new IndexSearcher(dr);
    TopDocs search = s.search(new TermQuery(new Term("id", docId)), 1);

    ScoreDoc[] scoreDocs = search.scoreDocs;
    assert (scoreDocs.length == 1);
    int doc = scoreDocs[0].doc;
    Fields returnFields = dr.getTermVectors(doc);
    return returnFields;

}