Example usage for org.apache.lucene.document Field tokenStreamValue

List of usage examples for org.apache.lucene.document Field tokenStreamValue

Introduction

In this page you can find the example usage for org.apache.lucene.document Field tokenStreamValue.

Prototype

public TokenStream tokenStreamValue() 

Source Link

Document

The TokenStream for this field to be used when indexing, or null.

Usage

From source file:engine.easy.indexer.writer.EasySearchIndexWriter.java

License:Apache License

/**
 * Read the extra data field information
 * /* w w w.  j  a  va  2s  .  c om*/
 * @return it returns the no: of token streams for the extra data field information.
  * @throws IOException if the file would have any IO operation.
 */
private int[] extraData(Field field, Analyzer analyzer) throws IOException {
    if (!field.isIndexed())
        return null;
    if (!field.isTokenized())
        return (new int[] { 1, 1 });
    String strv = field.stringValue();
    int v[];
    if (strv == null) {
        Reader readerv = field.readerValue();
        if (readerv == null) {
            TokenStream tsv = field.tokenStreamValue();
            if (tsv == null) {
                throw new IllegalArgumentException(
                        (new StringBuilder("Cannot obtain field value. field_name: ")).append(field.name())
                                .append(".").toString());
            } else {
                v = countTokenStream(tsv);
                return v;
            }
        }
        strv = readAll(readerv);
        if (strv == null)
            throw new IllegalArgumentException((new StringBuilder("Cannot obtain field value. field_name: "))
                    .append(field.name()).append(".").toString());

        field.setValue(strv);
    }
    BufferedReader reader = new BufferedReader(new StringReader(strv));
    TokenStream ts = analyzer.tokenStream(field.name(), reader);
    v = countTokenStream(ts);
    ts.close();
    reader.close();
    return v;
}

From source file:org.apache.solr.schema.JsonPreAnalyzedParser.java

License:Apache License

@Override
public String toFormattedString(Field f) throws IOException {
    Map<String, Object> map = new LinkedHashMap<String, Object>();
    map.put(VERSION_KEY, VERSION);/*ww w . j av a2s.  co m*/
    if (f.fieldType().stored()) {
        String stringValue = f.stringValue();
        if (stringValue != null) {
            map.put(STRING_KEY, stringValue);
        }
        BytesRef binaryValue = f.binaryValue();
        if (binaryValue != null) {
            map.put(BINARY_KEY,
                    Base64.byteArrayToBase64(binaryValue.bytes, binaryValue.offset, binaryValue.length));
        }
    }
    TokenStream ts = f.tokenStreamValue();
    if (ts != null) {
        List<Map<String, Object>> tokens = new LinkedList<Map<String, Object>>();
        while (ts.incrementToken()) {
            Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator();
            String cTerm = null;
            String tTerm = null;
            Map<String, Object> tok = new TreeMap<String, Object>();
            while (it.hasNext()) {
                Class<? extends Attribute> cl = it.next();
                if (!ts.hasAttribute(cl)) {
                    continue;
                }
                Attribute att = ts.getAttribute(cl);
                if (cl.isAssignableFrom(CharTermAttribute.class)) {
                    CharTermAttribute catt = (CharTermAttribute) att;
                    cTerm = new String(catt.buffer(), 0, catt.length());
                } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) {
                    TermToBytesRefAttribute tatt = (TermToBytesRefAttribute) att;
                    tTerm = tatt.getBytesRef().utf8ToString();
                } else {
                    if (cl.isAssignableFrom(FlagsAttribute.class)) {
                        tok.put(FLAGS_KEY, Integer.toHexString(((FlagsAttribute) att).getFlags()));
                    } else if (cl.isAssignableFrom(OffsetAttribute.class)) {
                        tok.put(OFFSET_START_KEY, ((OffsetAttribute) att).startOffset());
                        tok.put(OFFSET_END_KEY, ((OffsetAttribute) att).endOffset());
                    } else if (cl.isAssignableFrom(PayloadAttribute.class)) {
                        BytesRef p = ((PayloadAttribute) att).getPayload();
                        if (p != null && p.length > 0) {
                            tok.put(PAYLOAD_KEY, Base64.byteArrayToBase64(p.bytes, p.offset, p.length));
                        }
                    } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) {
                        tok.put(POSINCR_KEY, ((PositionIncrementAttribute) att).getPositionIncrement());
                    } else if (cl.isAssignableFrom(TypeAttribute.class)) {
                        tok.put(TYPE_KEY, ((TypeAttribute) att).type());
                    } else {
                        tok.put(cl.getName(), att.toString());
                    }
                }
            }
            String term = null;
            if (cTerm != null) {
                term = cTerm;
            } else {
                term = tTerm;
            }
            if (term != null && term.length() > 0) {
                tok.put(TOKEN_KEY, term);
            }
            tokens.add(tok);
        }
        map.put(TOKENS_KEY, tokens);
    }
    return JSONUtil.toJSON(map, -1);
}

From source file:org.apache.solr.schema.SimplePreAnalyzedParser.java

License:Apache License

@Override
public String toFormattedString(Field f) throws IOException {
    StringBuilder sb = new StringBuilder();
    sb.append(VERSION + " ");
    if (f.fieldType().stored()) {
        String s = f.stringValue();
        if (s != null) {
            // encode the equals sign
            s = s.replaceAll("=", "\\=");
            sb.append('=');
            sb.append(s);//from w ww. jav  a2s.c o  m
            sb.append('=');
        }
    }
    TokenStream ts = f.tokenStreamValue();
    if (ts != null) {
        StringBuilder tok = new StringBuilder();
        boolean next = false;
        while (ts.incrementToken()) {
            if (next) {
                sb.append(' ');
            } else {
                next = true;
            }
            tok.setLength(0);
            Iterator<Class<? extends Attribute>> it = ts.getAttributeClassesIterator();
            String cTerm = null;
            String tTerm = null;
            while (it.hasNext()) {
                Class<? extends Attribute> cl = it.next();
                if (!ts.hasAttribute(cl)) {
                    continue;
                }
                Attribute att = ts.getAttribute(cl);
                if (cl.isAssignableFrom(CharTermAttribute.class)) {
                    CharTermAttribute catt = (CharTermAttribute) att;
                    cTerm = escape(catt.buffer(), catt.length());
                } else if (cl.isAssignableFrom(TermToBytesRefAttribute.class)) {
                    TermToBytesRefAttribute tatt = (TermToBytesRefAttribute) att;
                    char[] tTermChars = tatt.getBytesRef().utf8ToString().toCharArray();
                    tTerm = escape(tTermChars, tTermChars.length);
                } else {
                    if (tok.length() > 0)
                        tok.append(',');
                    if (cl.isAssignableFrom(FlagsAttribute.class)) {
                        tok.append("f=" + Integer.toHexString(((FlagsAttribute) att).getFlags()));
                    } else if (cl.isAssignableFrom(OffsetAttribute.class)) {
                        tok.append("s=" + ((OffsetAttribute) att).startOffset() + ",e="
                                + ((OffsetAttribute) att).endOffset());
                    } else if (cl.isAssignableFrom(PayloadAttribute.class)) {
                        BytesRef p = ((PayloadAttribute) att).getPayload();
                        if (p != null && p.length > 0) {
                            tok.append("p=" + bytesToHex(p.bytes, p.offset, p.length));
                        } else if (tok.length() > 0) {
                            tok.setLength(tok.length() - 1); // remove the last comma
                        }
                    } else if (cl.isAssignableFrom(PositionIncrementAttribute.class)) {
                        tok.append("i=" + ((PositionIncrementAttribute) att).getPositionIncrement());
                    } else if (cl.isAssignableFrom(TypeAttribute.class)) {
                        tok.append("y=" + escape(((TypeAttribute) att).type()));
                    } else {

                        tok.append(cl.getName() + "=" + escape(att.toString()));
                    }
                }
            }
            String term = null;
            if (cTerm != null) {
                term = cTerm;
            } else {
                term = tTerm;
            }
            if (term != null && term.length() > 0) {
                if (tok.length() > 0) {
                    tok.insert(0, term + ",");
                } else {
                    tok.insert(0, term);
                }
            }
            sb.append(tok);
        }
    }
    return sb.toString();
}

From source file:org.apache.solr.update.processor.PreAnalyzedUpdateProcessorTest.java

License:Apache License

private void test(String chain, String[] title, String[] teststop) throws Exception {
    SolrInputDocument doc = processAdd(chain,
            doc(f("id", "1"), f("title", title[0]), f("teststop", teststop[0]), f("nonexistent", "foobar"),
                    f("ssto", teststop[0]), f("sind", teststop[0])));
    assertEquals("title should be unchanged", title[0], doc.getFieldValue("title"));
    assertTrue("teststop should be a Field", doc.getFieldValue("teststop") instanceof Field);
    Field f = (Field) doc.getFieldValue("teststop");
    assertEquals("teststop should have stringValue", "this is a test.", f.stringValue());
    assertNotNull("teststop should have tokensStreamValue", f.tokenStreamValue());
    assertNull("nonexistent should be dropped", doc.getField("nonexistent"));
    // check how SchemaField type affects stored/indexed part processing
    f = (Field) doc.getFieldValue("ssto");
    assertNotNull("should have ssto", f);
    assertNotNull("should have stringValue", f.stringValue());
    assertNull("should not have tokenStreamValue", f.tokenStreamValue());
    f = (Field) doc.getFieldValue("sind");
    assertNotNull("should have sind", f);
    assertNull("should not have stringValue: '" + f.stringValue() + "'", f.stringValue());
    assertNotNull("should have tokenStreamValue", f.tokenStreamValue());

    doc = processAdd(chain, doc(f("id", "2"), f("title", title[1]), f("teststop", teststop[1]),
            f("nonexistent", "foobar"), f("ssto", teststop[1]), f("sind", teststop[1])));
    assertTrue("title should be a Field", doc.getFieldValue("title") instanceof Field);
    assertTrue("teststop should be a Field", doc.getFieldValue("teststop") instanceof Field);
    f = (Field) doc.getFieldValue("teststop");
    assertEquals("teststop should have stringValue", "this is a test.", f.stringValue());
    assertNotNull("teststop should have tokensStreamValue", f.tokenStreamValue());
    assertNull("nonexistent should be dropped", doc.getField("nonexistent"));
    // check how SchemaField type affects stored/indexed part processing
    f = (Field) doc.getFieldValue("ssto");
    assertNotNull("should have ssto", f);
    assertNotNull("should have stringValue", f.stringValue());
    assertNull("should not have tokenStreamValue", f.tokenStreamValue());
    f = (Field) doc.getFieldValue("sind");
    assertNotNull("should have sind", f);
    assertNull("should not have stringValue: '" + f.stringValue() + "'", f.stringValue());
    assertNotNull("should have tokenStreamValue", f.tokenStreamValue());

    assertU(commit());//from  ww  w  . j a v  a2s  .  c  om
    assertQ(req("teststop:\"one two three\""), "//str[@name='id'][.='1']",
            "//str[@name='teststop'][.='this is a test.']");
    assertQ(req("teststop:three"), "//*[@numFound='2']", "//result/doc[1]/str[@name='id'][.='1']",
            "//result/doc[1]/str[@name='title'][.='not pre-analyzed']",
            "//result/doc[2]/str[@name='id'][.='2']",
            "//result/doc[2]/arr[@name='title']/str[.='string value']");
    assertQ(req("ssto:three"), "//*[@numFound='0']");
    assertQ(req("sind:three"), "//*[@numFound='2']");
}

From source file:org.apache.uima.lucas.indexer.FieldBuilderTest.java

License:Apache License

@Test
public void testCreateFieldConcatenated() throws Exception {

    fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);

    TokenStream tokenStream = createMock(TokenStream.class);
    expect(filterBuilder.filter(isA(TokenStreamConcatenator.class), isA(Collection.class)))
            .andReturn(tokenStream);/*from ww w .ja v a  2  s . c o  m*/
    replay(filterBuilder);

    Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
    verify(filterBuilder);
    Iterator<Field> fieldIterator = fields.iterator();
    Field field1 = fieldIterator.next();
    assertEquals("field1", field1.name());
    assertEquals(tokenStream, field1.tokenStreamValue());

}

From source file:org.apache.uima.lucas.indexer.FieldBuilderTest.java

License:Apache License

@Test
public void testCreateFieldMerged() throws Exception {

    fieldDescription.setMerge(true);/*  w w w.  j  a va2  s.  c o  m*/
    fieldDescription.setIndex(FieldBuilder.FIELD_INDEX_YES);
    TokenStream tokenStream = createMock(TokenStream.class);
    expect(filterBuilder.filter(isA(TokenStreamMerger.class), isA(Collection.class))).andReturn(tokenStream);
    replay(filterBuilder);

    Collection<Field> fields = fieldBuilder.createFields(tokenStreams, fieldDescription);
    verify(filterBuilder);

    Iterator<Field> fieldIterator = fields.iterator();
    Field field1 = fieldIterator.next();
    assertEquals("field1", field1.name());
    assertEquals(tokenStream, field1.tokenStreamValue());
}

From source file:org.apache.uima.lucas.ProspectiveSearchAE.java

License:Apache License

@Override
public void process(CAS aCAS) throws AnalysisEngineProcessException {

    // First create the index of the document text
    MemoryIndex index = new MemoryIndex();

    List fields = createDocument(aCAS).getFields();

    for (Iterator it = fields.iterator(); it.hasNext();) {
        Field field = (Field) it.next();

        if (field.isIndexed() && field.tokenStreamValue() != null) {
            index.addField(field.name(), field.tokenStreamValue());
        }/*  w  ww .  j  av  a  2  s  .c  o  m*/
    }

    // Search all queries against the one document index
    for (SearchQuery query : searchQueryProvider.getSearchQueries(aCAS)) {

        float score = index.search(query.query());

        if (score > matchingThreshold) {

            // Add a FS to the CAS with the search result
            FeatureStructure searchResult = aCAS.createFS(searchResultType);
            searchResult.setLongValue(searchResultIdFeature, query.id());
            aCAS.addFsToIndexes(searchResult);

            // Find matching tokens and link their annotations
            // in case the user wants search term highlighting
            if (searchResultMatchingTextFeature != null) {

                fields = createDocument(aCAS).getFields();

                for (Iterator it = fields.iterator(); it.hasNext();) {

                    Field field = (Field) it.next();

                    if (field.isIndexed() && field.tokenStreamValue() != null) {

                        TokenStream tokenStream = field.tokenStreamValue();

                        Collection<AnnotationFS> matchingTextAnnotations = new LinkedList<AnnotationFS>();

                        QueryScorer scorer = new QueryScorer(query.query(), field.name());
                        scorer.startFragment(new TextFragment(new StringBuffer(aCAS.getDocumentText()), 0, 0));

                        try {
                            scorer.init(tokenStream);

                            OffsetAttribute offsetAttr = null;
                            while (tokenStream.incrementToken()) {
                                offsetAttr = (OffsetAttribute) tokenStream.getAttribute(OffsetAttribute.class);
                                float tokenScore = scorer.getTokenScore();
                                if (tokenScore > 0) {
                                    AnnotationFS annotation = aCAS.createAnnotation(matchingTextType,
                                            offsetAttr.startOffset(), offsetAttr.endOffset());

                                    matchingTextAnnotations.add(annotation);
                                }
                            }
                        } catch (IOException e) {
                            throw new AnalysisEngineProcessException(e);
                        }

                        ArrayFS matchtingTextArray = aCAS.createArrayFS(matchingTextAnnotations.size());

                        int matchtingTextArrayIndex = 0;
                        for (AnnotationFS matchingTextAnnotation : matchingTextAnnotations) {
                            matchtingTextArray.set(matchtingTextArrayIndex++, matchingTextAnnotation);
                        }

                        searchResult.setFeatureValue(searchResultMatchingTextFeature, matchtingTextArray);
                    }
                }
            }
        }
    }
}

From source file:org.elasticsearch.index.mapper.json.all.SimpleAllMapperTests.java

License:Apache License

@Test
public void testSimpleAllMappers() throws Exception {
    String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/json/all/mapping.json");
    JsonDocumentMapper docMapper = (JsonDocumentMapper) new JsonDocumentMapperParser(
            new AnalysisService(new Index("test"))).parse(mapping);
    byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/json/all/test1.json");
    Document doc = docMapper.parse(json).doc();
    Field field = doc.getField("_all");
    AllEntries allEntries = ((AllTokenFilter) field.tokenStreamValue()).allEntries();
    assertThat(allEntries.fields().size(), equalTo(2));
    assertThat(allEntries.fields().contains("name.last"), equalTo(true));
    assertThat(allEntries.fields().contains("simple1"), equalTo(true));
}

From source file:org.elasticsearch.index.mapper.json.all.SimpleAllMapperTests.java

License:Apache License

@Test
public void testSimpleAllMappersWithReparse() throws Exception {
    String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/json/all/mapping.json");
    JsonDocumentMapper docMapper = (JsonDocumentMapper) new JsonDocumentMapperParser(
            new AnalysisService(new Index("test"))).parse(mapping);
    String builtMapping = docMapper.buildSource();
    //        System.out.println(builtMapping);
    // reparse it
    JsonDocumentMapper builtDocMapper = (JsonDocumentMapper) new JsonDocumentMapperParser(
            new AnalysisService(new Index("test"))).parse(builtMapping);
    byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/json/all/test1.json");
    Document doc = builtDocMapper.parse(json).doc();

    Field field = doc.getField("_all");
    AllEntries allEntries = ((AllTokenFilter) field.tokenStreamValue()).allEntries();
    assertThat(allEntries.fields().size(), equalTo(2));
    assertThat(allEntries.fields().contains("name.last"), equalTo(true));
    assertThat(allEntries.fields().contains("simple1"), equalTo(true));
}

From source file:org.elasticsearch.index.mapper.json.all.SimpleAllMapperTests.java

License:Apache License

@Test
public void testSimpleAllMappersWithStore() throws Exception {
    String mapping = copyToStringFromClasspath("/org/elasticsearch/index/mapper/json/all/store-mapping.json");
    JsonDocumentMapper docMapper = (JsonDocumentMapper) new JsonDocumentMapperParser(
            new AnalysisService(new Index("test"))).parse(mapping);
    byte[] json = copyToBytesFromClasspath("/org/elasticsearch/index/mapper/json/all/test1.json");
    Document doc = docMapper.parse(json).doc();
    Field field = doc.getField("_all");
    AllEntries allEntries = ((AllTokenFilter) field.tokenStreamValue()).allEntries();
    assertThat(allEntries.fields().size(), equalTo(2));
    assertThat(allEntries.fields().contains("name.last"), equalTo(true));
    assertThat(allEntries.fields().contains("simple1"), equalTo(true));

    String text = field.stringValue();
    assertThat(text, equalTo(allEntries.buildText()));
}