List of usage examples for org.apache.lucene.index IndexableField tokenStream
public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse);
From source file:SimpleNaiveBayesDocumentClassifier.java
License:Apache License
/** * This methods performs the analysis for the seed document and extract the boosts if present. * This is done only one time for the Seed Document. * * @param inputDocument the seed unseen document * @param fieldName2tokensArray a map that associated to a field name the list of token arrays for all its values * @param fieldName2boost a map that associates the boost to the field * @throws IOException If there is a low-level I/O error *//*from w w w .j ava 2 s . co m*/ private void analyzeSeedDocument(Document inputDocument, Map<String, List<String[]>> fieldName2tokensArray, Map<String, Float> fieldName2boost) throws IOException { for (int i = 0; i < textFieldNames.length; i++) { String fieldName = textFieldNames[i]; float boost = 1; List<String[]> tokenizedValues = new LinkedList<>(); if (fieldName.contains("^")) { String[] field2boost = fieldName.split("\\^"); fieldName = field2boost[0]; boost = Float.parseFloat(field2boost[1]); } IndexableField[] fieldValues = inputDocument.getFields(fieldName); for (IndexableField fieldValue : fieldValues) { TokenStream fieldTokens = fieldValue.tokenStream(field2analyzer.get(fieldName), null); String[] fieldTokensArray = getTokenArray(fieldTokens); tokenizedValues.add(fieldTokensArray); } fieldName2tokensArray.put(fieldName, tokenizedValues); fieldName2boost.put(fieldName, boost); textFieldNames[i] = fieldName; } }
From source file:org.alfresco.solr.query.Solr4QueryParser.java
License:Open Source License
private ArrayList<String> getTokens(IndexableField indexableField) throws IOException { ArrayList<String> tokens = new ArrayList<String>(); TokenStream ts = indexableField.tokenStream(schema.getIndexAnalyzer(), null); CharTermAttribute termAttribute = ts.getAttribute(CharTermAttribute.class); ts.reset();/*from w ww . jav a2 s . com*/ while (ts.incrementToken()) { String token = new String(termAttribute.buffer(), 0, termAttribute.length()); tokens.add(token); } ts.end(); ts.close(); return tokens; }
From source file:org.elasticsearch.index.mapper.AllFieldMapperTests.java
License:Apache License
public void testBoostWithOmitPositions() throws Exception { String mapping = copyToStringFromClasspath( "/org/elasticsearch/index/mapper/all/mapping_boost_omit_positions_on_all.json"); DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse("person", new CompressedXContent(mapping)); byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json"); Document doc = docMapper.parse("test", "person", "1", new BytesArray(json)).rootDoc(); IndexableField[] fields = doc.getFields("_all"); assertThat(fields.length, equalTo(3)); for (IndexableField field : fields) { // _all field omits positions, so we should not get AllTokenStream even though fields are boosted assertThat(field.tokenStream(docMapper.mappers().indexAnalyzer(), null), Matchers.not(Matchers.instanceOf(AllTokenStream.class))); }// w ww .ja v a 2 s. c o m }
From source file:org.elasticsearch.index.mapper.AllFieldMapperTests.java
License:Apache License
public void testNoBoost() throws Exception { String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/all/noboost-mapping.json"); DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse("person", new CompressedXContent(mapping)); byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json"); Document doc = docMapper.parse("test", "person", "1", new BytesArray(json)).rootDoc(); IndexableField[] fields = doc.getFields("_all"); assertThat(fields.length, equalTo(3)); for (IndexableField field : fields) { // no fields have boost, so we should not see AllTokenStream: assertThat(field.tokenStream(docMapper.mappers().indexAnalyzer(), null), Matchers.not(Matchers.instanceOf(AllTokenStream.class))); }/*from w ww .ja va2 s . com*/ }
From source file:org.elasticsearch.index.mapper.LegacyNumberFieldMapperTests.java
License:Apache License
/** checks precisionstep on both the fieldtype and the tokenstream */ private static void assertPrecisionStepEquals(int expected, IndexableField field) throws IOException { assertNotNull(field);/*from w w w .j a v a 2s . co m*/ assertThat(field, instanceOf(Field.class)); // check fieldtype's precisionstep assertEquals(expected, ((Field) field).fieldType().numericPrecisionStep()); // check the tokenstream actually used by the indexer TokenStream ts = field.tokenStream(null, null); assertThat(ts, instanceOf(LegacyNumericTokenStream.class)); assertEquals(expected, ((LegacyNumericTokenStream) ts).getPrecisionStep()); }
From source file:org.elasticsearch.index.query.PercolatorQueryBuilder.java
License:Apache License
private void indexDoc(DocumentMapper documentMapper, Analyzer defaultAnalyzer, ParseContext.Document document, MemoryIndex memoryIndex) {//from w ww .j a va 2 s . c o m for (IndexableField field : document.getFields()) { if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) { continue; } Analyzer analyzer = defaultAnalyzer; if (documentMapper != null && documentMapper.mappers().getMapper(field.name()) != null) { analyzer = documentMapper.mappers().indexAnalyzer(); } try { try (TokenStream tokenStream = field.tokenStream(analyzer, null)) { if (tokenStream != null) { memoryIndex.addField(field.name(), tokenStream, field.boost()); } } } catch (IOException e) { throw new ElasticsearchException("Failed to create token stream", e); } } }