Example usage for org.apache.lucene.analysis.tokenattributes PositionLengthAttribute getPositionLength

List of usage examples for org.apache.lucene.analysis.tokenattributes PositionLengthAttribute getPositionLength

Introduction

In this page you can find the example usage for org.apache.lucene.analysis.tokenattributes PositionLengthAttribute getPositionLength.

Prototype

public int getPositionLength();

Source Link

Document

Returns the position length of this Token.

Usage

From source file:com.github.bibreen.mecab_ko_lucene_analyzer.MeCabKoStandardTokenizerTest.java

License:Apache License

private String tokenizerToString(Tokenizer tokenizer) throws Exception {
    OffsetAttribute extOffset = tokenizer.addAttribute(OffsetAttribute.class);
    PositionIncrementAttribute posIncrAtt = tokenizer.addAttribute(PositionIncrementAttribute.class);
    PositionLengthAttribute posLengthAtt = tokenizer.addAttribute(PositionLengthAttribute.class);
    CharTermAttribute term = (CharTermAttribute) tokenizer.addAttribute(CharTermAttribute.class);
    TypeAttribute type = (TypeAttribute) tokenizer.addAttribute(TypeAttribute.class);
    SemanticClassAttribute semanticClass = (SemanticClassAttribute) tokenizer
            .addAttribute(SemanticClassAttribute.class);
    PartOfSpeechAttribute pos = (PartOfSpeechAttribute) tokenizer.addAttribute(PartOfSpeechAttribute.class);

    StringBuilder result = new StringBuilder();
    while (tokenizer.incrementToken() == true) {
        result.append(new String(term.buffer(), 0, term.length())).append(":");
        result.append(type.type()).append(":");
        result.append(pos.partOfSpeech()).append(":");
        result.append(semanticClass.semanticClass()).append(":");
        result.append(String.valueOf(posIncrAtt.getPositionIncrement())).append(":");
        result.append(String.valueOf(posLengthAtt.getPositionLength())).append(":");
        result.append(String.valueOf(extOffset.startOffset())).append(":");
        result.append(String.valueOf(extOffset.endOffset()));
        result.append(",");
    }//from  w w w.  jav  a 2  s .  c o m
    tokenizer.end();
    return result.toString();
}

From source file:com.qwazr.search.index.TermDefinition.java

License:Apache License

TermDefinition(CharTermAttribute charTermAttr, FlagsAttribute flagsAttr, OffsetAttribute offsetAttr,
        PositionIncrementAttribute posIncAttr, PositionLengthAttribute posLengthAttr, TypeAttribute typeAttr,
        KeywordAttribute keywordAttr) {/*from   ww w. j a  v  a  2s  . c  om*/
    char_term = charTermAttr == null ? null : charTermAttr.toString();
    if (offsetAttr != null) {
        start_offset = offsetAttr.startOffset();
        end_offset = offsetAttr.endOffset();
    } else {
        start_offset = null;
        end_offset = null;
    }
    flags = flagsAttr == null ? null : flagsAttr.getFlags();
    position_increment = posIncAttr == null ? null : posIncAttr.getPositionIncrement();
    position_length = posLengthAttr == null ? null : posLengthAttr.getPositionLength();
    type = typeAttr == null ? null : typeAttr.type();
    is_keyword = keywordAttr == null ? null : keywordAttr.isKeyword();
}

From source file:com.shaie.SynonymFilterExample.java

License:Apache License

@SuppressWarnings("resource")
public static void main(String[] args) throws Exception {
    final Tokenizer tok = new WhitespaceTokenizer();
    tok.setReader(new StringReader("dark sea green sea green"));

    final SynonymMap.Builder builder = new SynonymMap.Builder(true);
    addSynonym("dark sea green", "color", builder);
    addSynonym("green", "color", builder);
    addSynonym("dark sea", "color", builder);
    addSynonym("sea green", "color", builder);
    final SynonymMap synMap = builder.build();
    final TokenStream ts = new SynonymGraphFilter(tok, synMap, true);

    final CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class);
    final PositionIncrementAttribute posIncrAtt = ts.addAttribute(PositionIncrementAttribute.class);
    final PositionLengthAttribute posLengthAtt = ts.addAttribute(PositionLengthAttribute.class);

    ts.reset();//  w  ww .j a v  a 2 s .c om
    int pos = -1;
    while (ts.incrementToken()) {
        pos += posIncrAtt.getPositionIncrement();
        System.out.println("term=" + termAtt + ", pos=" + pos + ", posLen=" + posLengthAtt.getPositionLength());
    }
    ts.end();
    ts.close();
}

From source file:org.bitbucket.eunjeon.mecab_ko_lucene_analyzer.MeCabKoStandardTokenizerTest.java

License:Apache License

private String tokenizerToString(Tokenizer tokenizer) throws Exception {
    OffsetAttribute extOffset = tokenizer.addAttribute(OffsetAttribute.class);
    PositionIncrementAttribute posIncrAtt = tokenizer.addAttribute(PositionIncrementAttribute.class);
    PositionLengthAttribute posLengthAtt = tokenizer.addAttribute(PositionLengthAttribute.class);
    CharTermAttribute term = tokenizer.addAttribute(CharTermAttribute.class);
    TypeAttribute type = tokenizer.addAttribute(TypeAttribute.class);
    SemanticClassAttribute semanticClass = tokenizer.addAttribute(SemanticClassAttribute.class);
    PartOfSpeechAttribute pos = tokenizer.addAttribute(PartOfSpeechAttribute.class);

    StringBuilder result = new StringBuilder();
    tokenizer.reset();/*from   ww w  .ja  v  a2  s. c o  m*/
    while (tokenizer.incrementToken() == true) {
        result.append(new String(term.buffer(), 0, term.length())).append(":");
        result.append(type.type()).append(":");
        result.append(pos.partOfSpeech()).append(":");
        result.append(semanticClass.semanticClass()).append(":");
        result.append(String.valueOf(posIncrAtt.getPositionIncrement())).append(":");
        result.append(String.valueOf(posLengthAtt.getPositionLength())).append(":");
        result.append(String.valueOf(extOffset.startOffset())).append(":");
        result.append(String.valueOf(extOffset.endOffset()));
        result.append(",");
    }
    tokenizer.end();
    return result.toString();
}

From source file:uk.gov.nationalarchives.discovery.taxonomy.common.repository.lucene.analyzer.TaxonomyGeneralAnalyzerTest.java

License:Mozilla Public License

public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[],
        int endOffsets[], String types[], int posIncrements[], int posLengths[], Integer finalOffset,
        Integer finalPosInc, boolean[] keywordAtts, boolean offsetsAreCorrect) throws IOException {
    assertNotNull(output);/*from ww  w . j av  a  2s . c  o m*/
    CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class);

    CharTermAttribute termAtt = null;
    if (output.length > 0) {
        assertTrue("has no CharTermAttribute", ts.hasAttribute(CharTermAttribute.class));
        termAtt = ts.getAttribute(CharTermAttribute.class);
    }

    OffsetAttribute offsetAtt = null;
    if (startOffsets != null || endOffsets != null || finalOffset != null) {
        assertTrue("has no OffsetAttribute", ts.hasAttribute(OffsetAttribute.class));
        offsetAtt = ts.getAttribute(OffsetAttribute.class);
    }

    TypeAttribute typeAtt = null;
    if (types != null) {
        assertTrue("has no TypeAttribute", ts.hasAttribute(TypeAttribute.class));
        typeAtt = ts.getAttribute(TypeAttribute.class);
    }

    PositionIncrementAttribute posIncrAtt = null;
    if (posIncrements != null || finalPosInc != null) {
        assertTrue("has no PositionIncrementAttribute", ts.hasAttribute(PositionIncrementAttribute.class));
        posIncrAtt = ts.getAttribute(PositionIncrementAttribute.class);
    }

    PositionLengthAttribute posLengthAtt = null;
    if (posLengths != null) {
        assertTrue("has no PositionLengthAttribute", ts.hasAttribute(PositionLengthAttribute.class));
        posLengthAtt = ts.getAttribute(PositionLengthAttribute.class);
    }

    KeywordAttribute keywordAtt = null;
    if (keywordAtts != null) {
        assertTrue("has no KeywordAttribute", ts.hasAttribute(KeywordAttribute.class));
        keywordAtt = ts.getAttribute(KeywordAttribute.class);
    }

    // Maps position to the start/end offset:
    final Map<Integer, Integer> posToStartOffset = new HashMap<>();
    final Map<Integer, Integer> posToEndOffset = new HashMap<>();

    ts.reset();
    int pos = -1;
    int lastStartOffset = 0;
    for (int i = 0; i < output.length; i++) {
        // extra safety to enforce, that the state is not preserved and also
        // assign bogus values
        ts.clearAttributes();
        termAtt.setEmpty().append("bogusTerm");
        if (offsetAtt != null)
            offsetAtt.setOffset(14584724, 24683243);
        if (typeAtt != null)
            typeAtt.setType("bogusType");
        if (posIncrAtt != null)
            posIncrAtt.setPositionIncrement(45987657);
        if (posLengthAtt != null)
            posLengthAtt.setPositionLength(45987653);
        if (keywordAtt != null)
            keywordAtt.setKeyword((i & 1) == 0);

        checkClearAtt.getAndResetClearCalled(); // reset it, because we
        // called clearAttribute()
        // before
        assertTrue("token " + i + " does not exist", ts.incrementToken());
        assertTrue("clearAttributes() was not called correctly in TokenStream chain",
                checkClearAtt.getAndResetClearCalled());

        assertEquals("term " + i, output[i], termAtt.toString());
        if (startOffsets != null) {
            assertEquals("startOffset " + i, startOffsets[i], offsetAtt.startOffset());
        }
        if (endOffsets != null) {
            assertEquals("endOffset " + i, endOffsets[i], offsetAtt.endOffset());
        }
        if (types != null) {
            assertEquals("type " + i, types[i], typeAtt.type());
        }
        if (posIncrements != null) {
            assertEquals("posIncrement " + i, posIncrements[i], posIncrAtt.getPositionIncrement());
        }
        if (posLengths != null) {
            assertEquals("posLength " + i, posLengths[i], posLengthAtt.getPositionLength());
        }
        if (keywordAtts != null) {
            assertEquals("keywordAtt " + i, keywordAtts[i], keywordAtt.isKeyword());
        }

        // we can enforce some basic things about a few attributes even if
        // the caller doesn't check:
        if (offsetAtt != null) {
            final int startOffset = offsetAtt.startOffset();
            final int endOffset = offsetAtt.endOffset();
            if (finalOffset != null) {
                assertTrue("startOffset must be <= finalOffset", startOffset <= finalOffset.intValue());
                assertTrue("endOffset must be <= finalOffset: got endOffset=" + endOffset + " vs finalOffset="
                        + finalOffset.intValue(), endOffset <= finalOffset.intValue());
            }

            if (offsetsAreCorrect) {
                assertTrue("offsets must not go backwards startOffset=" + startOffset + " is < lastStartOffset="
                        + lastStartOffset, offsetAtt.startOffset() >= lastStartOffset);
                lastStartOffset = offsetAtt.startOffset();
            }

            if (offsetsAreCorrect && posLengthAtt != null && posIncrAtt != null) {
                // Validate offset consistency in the graph, ie
                // all tokens leaving from a certain pos have the
                // same startOffset, and all tokens arriving to a
                // certain pos have the same endOffset:
                final int posInc = posIncrAtt.getPositionIncrement();
                pos += posInc;

                final int posLength = posLengthAtt.getPositionLength();

                if (!posToStartOffset.containsKey(pos)) {
                    // First time we've seen a token leaving from this
                    // position:
                    posToStartOffset.put(pos, startOffset);
                    // System.out.println("  + s " + pos + " -> " +
                    // startOffset);
                } else {
                    // We've seen a token leaving from this position
                    // before; verify the startOffset is the same:
                    // System.out.println("  + vs " + pos + " -> " +
                    // startOffset);
                    assertEquals("pos=" + pos + " posLen=" + posLength + " token=" + termAtt,
                            posToStartOffset.get(pos).intValue(), startOffset);
                }

                final int endPos = pos + posLength;

                if (!posToEndOffset.containsKey(endPos)) {
                    // First time we've seen a token arriving to this
                    // position:
                    posToEndOffset.put(endPos, endOffset);
                    // System.out.println("  + e " + endPos + " -> " +
                    // endOffset);
                } else {
                    // We've seen a token arriving to this position
                    // before; verify the endOffset is the same:
                    // System.out.println("  + ve " + endPos + " -> " +
                    // endOffset);
                    assertEquals("pos=" + pos + " posLen=" + posLength + " token=" + termAtt,
                            posToEndOffset.get(endPos).intValue(), endOffset);
                }
            }
        }
        if (posIncrAtt != null) {
            if (i == 0) {
                assertTrue("first posIncrement must be >= 1", posIncrAtt.getPositionIncrement() >= 1);
            } else {
                assertTrue("posIncrement must be >= 0", posIncrAtt.getPositionIncrement() >= 0);
            }
        }
        if (posLengthAtt != null) {
            assertTrue("posLength must be >= 1", posLengthAtt.getPositionLength() >= 1);
        }
    }

    if (ts.incrementToken()) {
        fail("TokenStream has more tokens than expected (expected count=" + output.length + "); extra token="
                + termAtt.toString());
    }

    // repeat our extra safety checks for end()
    ts.clearAttributes();
    if (termAtt != null)
        termAtt.setEmpty().append("bogusTerm");
    if (offsetAtt != null)
        offsetAtt.setOffset(14584724, 24683243);
    if (typeAtt != null)
        typeAtt.setType("bogusType");
    if (posIncrAtt != null)
        posIncrAtt.setPositionIncrement(45987657);
    if (posLengthAtt != null)
        posLengthAtt.setPositionLength(45987653);

    checkClearAtt.getAndResetClearCalled(); // reset it, because we called
    // clearAttribute() before

    ts.end();
    assertTrue("super.end()/clearAttributes() was not called correctly in end()",
            checkClearAtt.getAndResetClearCalled());

    if (finalOffset != null) {
        assertEquals("finalOffset", finalOffset.intValue(), offsetAtt.endOffset());
    }
    if (offsetAtt != null) {
        assertTrue("finalOffset must be >= 0", offsetAtt.endOffset() >= 0);
    }
    if (finalPosInc != null) {
        assertEquals("finalPosInc", finalPosInc.intValue(), posIncrAtt.getPositionIncrement());
    }

    ts.close();
}