Example usage for org.apache.lucene.index IndexableField tokenStream

List of usage examples for org.apache.lucene.index IndexableField tokenStream

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexableField tokenStream.

Prototype

public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse);

Source Link

Document

Creates the TokenStream used for indexing this field.

Usage

From source file:SimpleNaiveBayesDocumentClassifier.java

License:Apache License

/**
 * This methods performs the analysis for the seed document and extract the boosts if present.
 * This is done only one time for the Seed Document.
 *
 * @param inputDocument         the seed unseen document
 * @param fieldName2tokensArray a map that associated to a field name the list of token arrays for all its values
 * @param fieldName2boost       a map that associates the boost to the field
 * @throws IOException If there is a low-level I/O error
 *//*from  w w w .j  ava  2  s  . co  m*/
private void analyzeSeedDocument(Document inputDocument, Map<String, List<String[]>> fieldName2tokensArray,
        Map<String, Float> fieldName2boost) throws IOException {
    for (int i = 0; i < textFieldNames.length; i++) {
        String fieldName = textFieldNames[i];
        float boost = 1;
        List<String[]> tokenizedValues = new LinkedList<>();
        if (fieldName.contains("^")) {
            String[] field2boost = fieldName.split("\\^");
            fieldName = field2boost[0];
            boost = Float.parseFloat(field2boost[1]);
        }
        IndexableField[] fieldValues = inputDocument.getFields(fieldName);
        for (IndexableField fieldValue : fieldValues) {
            TokenStream fieldTokens = fieldValue.tokenStream(field2analyzer.get(fieldName), null);
            String[] fieldTokensArray = getTokenArray(fieldTokens);
            tokenizedValues.add(fieldTokensArray);
        }
        fieldName2tokensArray.put(fieldName, tokenizedValues);
        fieldName2boost.put(fieldName, boost);
        textFieldNames[i] = fieldName;
    }
}

From source file:org.alfresco.solr.query.Solr4QueryParser.java

License:Open Source License

private ArrayList<String> getTokens(IndexableField indexableField) throws IOException {
    ArrayList<String> tokens = new ArrayList<String>();

    TokenStream ts = indexableField.tokenStream(schema.getIndexAnalyzer(), null);
    CharTermAttribute termAttribute = ts.getAttribute(CharTermAttribute.class);
    ts.reset();/*from w  ww .  jav  a2 s  .  com*/
    while (ts.incrementToken()) {
        String token = new String(termAttribute.buffer(), 0, termAttribute.length());
        tokens.add(token);
    }
    ts.end();
    ts.close();

    return tokens;
}

From source file:org.elasticsearch.index.mapper.AllFieldMapperTests.java

License:Apache License

public void testBoostWithOmitPositions() throws Exception {
    String mapping = copyToStringFromClasspath(
            "/org/elasticsearch/index/mapper/all/mapping_boost_omit_positions_on_all.json");
    DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse("person",
            new CompressedXContent(mapping));
    byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json");
    Document doc = docMapper.parse("test", "person", "1", new BytesArray(json)).rootDoc();
    IndexableField[] fields = doc.getFields("_all");
    assertThat(fields.length, equalTo(3));
    for (IndexableField field : fields) {
        // _all field omits positions, so we should not get AllTokenStream even though fields are boosted
        assertThat(field.tokenStream(docMapper.mappers().indexAnalyzer(), null),
                Matchers.not(Matchers.instanceOf(AllTokenStream.class)));
    }//  w ww .ja  v  a  2 s.  c  o m
}

From source file:org.elasticsearch.index.mapper.AllFieldMapperTests.java

License:Apache License

public void testNoBoost() throws Exception {
    String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/all/noboost-mapping.json");
    DocumentMapper docMapper = createIndex("test").mapperService().documentMapperParser().parse("person",
            new CompressedXContent(mapping));
    byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/all/test1.json");
    Document doc = docMapper.parse("test", "person", "1", new BytesArray(json)).rootDoc();
    IndexableField[] fields = doc.getFields("_all");
    assertThat(fields.length, equalTo(3));
    for (IndexableField field : fields) {
        // no fields have boost, so we should not see AllTokenStream:
        assertThat(field.tokenStream(docMapper.mappers().indexAnalyzer(), null),
                Matchers.not(Matchers.instanceOf(AllTokenStream.class)));
    }/*from w ww .ja va2 s .  com*/
}

From source file:org.elasticsearch.index.mapper.LegacyNumberFieldMapperTests.java

License:Apache License

/** checks precisionstep on both the fieldtype and the tokenstream */
private static void assertPrecisionStepEquals(int expected, IndexableField field) throws IOException {
    assertNotNull(field);/*from w w  w .j  a v a 2s .  co m*/
    assertThat(field, instanceOf(Field.class));

    // check fieldtype's precisionstep
    assertEquals(expected, ((Field) field).fieldType().numericPrecisionStep());

    // check the tokenstream actually used by the indexer
    TokenStream ts = field.tokenStream(null, null);
    assertThat(ts, instanceOf(LegacyNumericTokenStream.class));
    assertEquals(expected, ((LegacyNumericTokenStream) ts).getPrecisionStep());
}

From source file:org.elasticsearch.index.query.PercolatorQueryBuilder.java

License:Apache License

private void indexDoc(DocumentMapper documentMapper, Analyzer defaultAnalyzer, ParseContext.Document document,
        MemoryIndex memoryIndex) {//from   w  ww  .j a  va 2  s  . c o m
    for (IndexableField field : document.getFields()) {
        if (field.fieldType().indexOptions() == IndexOptions.NONE && field.name().equals(UidFieldMapper.NAME)) {
            continue;
        }

        Analyzer analyzer = defaultAnalyzer;
        if (documentMapper != null && documentMapper.mappers().getMapper(field.name()) != null) {
            analyzer = documentMapper.mappers().indexAnalyzer();
        }
        try {
            try (TokenStream tokenStream = field.tokenStream(analyzer, null)) {
                if (tokenStream != null) {
                    memoryIndex.addField(field.name(), tokenStream, field.boost());
                }
            }
        } catch (IOException e) {
            throw new ElasticsearchException("Failed to create token stream", e);
        }
    }
}