List of usage examples for org.apache.lucene.analysis.payloads TypeAsPayloadTokenFilter TypeAsPayloadTokenFilter
public TypeAsPayloadTokenFilter(TokenStream input)
From source file:com.github.le11.nls.lucene.payloads.UIMAPayloadsAnalyzer.java
License:Apache License
@Override public final TokenStream tokenStream(String fieldName, Reader reader) { return new TypeAsPayloadTokenFilter( new UIMATypeAwareTokenizer(descriptorPath, "org.apache.uima.TokenAnnotation", "posTag", reader)); }
From source file:org.apache.solr.analysis.TypeAsPayloadTokenFilterFactory.java
License:Apache License
public TypeAsPayloadTokenFilter create(TokenStream input) { return new TypeAsPayloadTokenFilter(input); }
From source file:org.elasticsearch.action.termvector.AbstractTermVectorTests.java
License:Apache License
protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException { Map<String, Analyzer> mapping = new HashMap<String, Analyzer>(); for (TestFieldSetting field : testDocs[0].fieldSettings) { if (field.storedPayloads) { mapping.put(field.name, new Analyzer() { @Override//from w w w . j a va2 s . co m protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new StandardTokenizer(Version.CURRENT.luceneVersion, reader); TokenFilter filter = new LowerCaseFilter(Version.CURRENT.luceneVersion, tokenizer); filter = new TypeAsPayloadTokenFilter(filter); return new TokenStreamComponents(tokenizer, filter); } }); } } PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper( new StandardAnalyzer(Version.CURRENT.luceneVersion, CharArraySet.EMPTY_SET), mapping); Directory dir = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(Version.CURRENT.luceneVersion, wrapper); conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); for (TestDoc doc : testDocs) { Document d = new Document(); d.add(new Field("id", doc.id, StringField.TYPE_STORED)); for (int i = 0; i < doc.fieldContent.length; i++) { FieldType type = new FieldType(TextField.TYPE_STORED); TestFieldSetting fieldSetting = doc.fieldSettings[i]; type.setStoreTermVectorOffsets(fieldSetting.storedOffset); type.setStoreTermVectorPayloads(fieldSetting.storedPayloads); type.setStoreTermVectorPositions( fieldSetting.storedPositions || fieldSetting.storedPayloads || fieldSetting.storedOffset); type.setStoreTermVectors(true); type.freeze(); d.add(new Field(fieldSetting.name, doc.fieldContent[i], type)); } writer.updateDocument(new Term("id", doc.id), d); writer.commit(); } writer.close(); return DirectoryReader.open(dir); }
From source file:org.elasticsearch.action.termvectors.AbstractTermVectorsTestCase.java
License:Apache License
protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException { Map<String, Analyzer> mapping = new HashMap<>(); for (TestFieldSetting field : testDocs[0].fieldSettings) { if (field.storedPayloads) { mapping.put(field.name, new Analyzer() { @Override/*from w w w.j a va 2 s. c o m*/ protected TokenStreamComponents createComponents(String fieldName) { Tokenizer tokenizer = new StandardTokenizer(); TokenFilter filter = new LowerCaseFilter(tokenizer); filter = new TypeAsPayloadTokenFilter(filter); return new TokenStreamComponents(tokenizer, filter); } }); } } PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(CharArraySet.EMPTY_SET), mapping); Directory dir = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(wrapper); conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); for (TestDoc doc : testDocs) { Document d = new Document(); d.add(new Field("id", doc.id, StringField.TYPE_STORED)); for (int i = 0; i < doc.fieldContent.length; i++) { FieldType type = new FieldType(TextField.TYPE_STORED); TestFieldSetting fieldSetting = doc.fieldSettings[i]; type.setStoreTermVectorOffsets(fieldSetting.storedOffset); type.setStoreTermVectorPayloads(fieldSetting.storedPayloads); type.setStoreTermVectorPositions( fieldSetting.storedPositions || fieldSetting.storedPayloads || fieldSetting.storedOffset); type.setStoreTermVectors(true); type.freeze(); d.add(new Field(fieldSetting.name, doc.fieldContent[i], type)); } writer.updateDocument(new Term("id", doc.id), d); writer.commit(); } writer.close(); return DirectoryReader.open(dir); }
From source file:org.elasticsearch.termvectors.AbstractTermVectorTests.java
License:Apache License
protected DirectoryReader indexDocsWithLucene(TestDoc[] testDocs) throws IOException { Map<String, Analyzer> mapping = new HashMap<String, Analyzer>(); for (TestFieldSetting field : testDocs[0].fieldSettings) { if (field.storedPayloads) { mapping.put(field.name, new Analyzer() { @Override/*from w w w. j a va 2 s. c o m*/ protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new StandardTokenizer(Version.CURRENT.luceneVersion, reader); TokenFilter filter = new LowerCaseFilter(Version.CURRENT.luceneVersion, tokenizer); filter = new TypeAsPayloadTokenFilter(filter); return new TokenStreamComponents(tokenizer, filter); } }); } } PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper( new StandardAnalyzer(Version.CURRENT.luceneVersion), mapping); Directory dir = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(Version.CURRENT.luceneVersion, wrapper); conf.setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); for (TestDoc doc : testDocs) { Document d = new Document(); d.add(new Field("id", doc.id, StringField.TYPE_STORED)); for (int i = 0; i < doc.fieldContent.length; i++) { FieldType type = new FieldType(TextField.TYPE_STORED); TestFieldSetting fieldSetting = doc.fieldSettings[i]; type.setStoreTermVectorOffsets(fieldSetting.storedOffset); type.setStoreTermVectorPayloads(fieldSetting.storedPayloads); type.setStoreTermVectorPositions( fieldSetting.storedPositions || fieldSetting.storedPayloads || fieldSetting.storedOffset); type.setStoreTermVectors(true); type.freeze(); d.add(new Field(fieldSetting.name, doc.fieldContent[i], type)); } writer.updateDocument(new Term("id", doc.id), d); writer.commit(); } writer.close(); return DirectoryReader.open(dir); }
From source file:org.elasticsearch.test.integration.termvectors.GetTermVectorTests.java
License:Apache License
private Fields buildWithLuceneAndReturnFields(String docId, String[] fields, String[] content, boolean[] withPositions, boolean[] withOffsets, boolean[] withPayloads) throws IOException { assert (fields.length == withPayloads.length); assert (content.length == withPayloads.length); assert (withPositions.length == withPayloads.length); assert (withOffsets.length == withPayloads.length); Map<String, Analyzer> mapping = new HashMap<String, Analyzer>(); for (int i = 0; i < withPayloads.length; i++) { if (withPayloads[i]) { mapping.put(fields[i], new Analyzer() { @Override//w w w . j a va 2s.c om protected TokenStreamComponents createComponents(String fieldName, Reader reader) { Tokenizer tokenizer = new StandardTokenizer(TEST_VERSION_CURRENT, reader); TokenFilter filter = new LowerCaseFilter(TEST_VERSION_CURRENT, tokenizer); filter = new TypeAsPayloadTokenFilter(filter); return new TokenStreamComponents(tokenizer, filter); } }); } } PerFieldAnalyzerWrapper wrapper = new PerFieldAnalyzerWrapper(new StandardAnalyzer(TEST_VERSION_CURRENT), mapping); Directory dir = FSDirectory.open(new File("/tmp/foo")); IndexWriterConfig conf = new IndexWriterConfig(TEST_VERSION_CURRENT, wrapper); conf.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, conf); Document d = new Document(); for (int i = 0; i < fields.length; i++) { d.add(new Field("id", docId, StringField.TYPE_STORED)); FieldType type = new FieldType(TextField.TYPE_STORED); type.setStoreTermVectorOffsets(withOffsets[i]); type.setStoreTermVectorPayloads(withPayloads[i]); type.setStoreTermVectorPositions(withPositions[i] || withOffsets[i] || withPayloads[i]); type.setStoreTermVectors(true); type.freeze(); d.add(new Field(fields[i], content[i], type)); writer.updateDocument(new Term("id", docId), d); writer.commit(); } writer.close(); DirectoryReader dr = DirectoryReader.open(dir); IndexSearcher s = new IndexSearcher(dr); TopDocs search = s.search(new TermQuery(new Term("id", docId)), 1); ScoreDoc[] scoreDocs = search.scoreDocs; assert (scoreDocs.length == 1); int doc = scoreDocs[0].doc; Fields returnFields = dr.getTermVectors(doc); return returnFields; }