Example usage for org.apache.lucene.analysis Token getFlags

List of usage examples for org.apache.lucene.analysis Token getFlags

Introduction

In this page you can find the example usage for org.apache.lucene.analysis Token getFlags.

Prototype

@Override
public int getFlags() 

Source Link

Usage

From source file:hu.mokk.hunglish.lucene.analysis.CompoundWordTokenFilterBase.java

private final void setToken(final Token token) throws IOException {
    termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength());
    flagsAtt.setFlags(token.getFlags());
    typeAtt.setType(token.type());//ww  w.  jav a 2 s . co  m
    offsetAtt.setOffset(token.startOffset(), token.endOffset());
    posIncAtt.setPositionIncrement(token.getPositionIncrement());
    payloadAtt.setPayload(token.getPayload());
}

From source file:org.alfresco.solr.component.spellcheck.AlfrescoSpellCheckCollator.java

License:Open Source License

@SuppressWarnings("deprecation")
private String getCollation(String origQuery, List<SpellCheckCorrection> corrections) {
    StringBuilder collation = new StringBuilder(origQuery);
    int offset = 0;
    String corr = "";
    for (int i = 0; i < corrections.size(); i++) {
        SpellCheckCorrection correction = corrections.get(i);
        Token tok = correction.getOriginal();
        // we are replacing the query in order, but injected terms might
        // cause illegal offsets due to previous replacements.
        if (tok.getPositionIncrement() == 0)
            continue;
        corr = correction.getCorrection();
        boolean addParenthesis = false;
        Character requiredOrProhibited = null;
        int indexOfSpace = corr.indexOf(' ');
        StringBuilder corrSb = new StringBuilder(corr);
        int bump = 1;

        // If the correction contains whitespace (because it involved
        // breaking a word in 2+ words),
        // then be sure all of the new words have the same
        // optional/required/prohibited status in the query.
        while (indexOfSpace > -1 && indexOfSpace < corr.length() - 1) {
            addParenthesis = true;//from  w  ww .j a v  a2 s.  co m
            char previousChar = tok.startOffset() > 0 ? origQuery.charAt(tok.startOffset() - 1) : ' ';
            if (previousChar == '-' || previousChar == '+') {
                corrSb.insert(indexOfSpace + bump, previousChar);
                if (requiredOrProhibited == null) {
                    requiredOrProhibited = previousChar;
                }
                bump++;
            } else if ((tok.getFlags()
                    & QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG) == QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG) {
                corrSb.insert(indexOfSpace + bump, "AND ");
                bump += 4;
            }
            indexOfSpace = correction.getCorrection().indexOf(' ', indexOfSpace + bump);
        }

        int oneForReqOrProhib = 0;
        if (addParenthesis) {
            if (requiredOrProhibited != null) {
                corrSb.insert(0, requiredOrProhibited);
                oneForReqOrProhib++;
            }
            corrSb.insert(0, '(');
            corrSb.append(')');
        }
        corr = corrSb.toString();
        int startIndex = tok.startOffset() + offset - oneForReqOrProhib;
        int endIndex = tok.endOffset() + offset;
        collation.replace(startIndex, endIndex, corr);
        offset += corr.length() - oneForReqOrProhib - (tok.endOffset() - tok.startOffset());
    }
    return collation.toString();
}

From source file:org.apache.solr.analysis.BufferedTokenStream.java

License:Apache License

/** old api emulation for back compat */
private boolean writeToken(Token token) throws IOException {
    clearAttributes();// ww w  .  j  a v a  2  s. c o  m
    termAtt.copyBuffer(token.buffer(), 0, token.length());
    offsetAtt.setOffset(token.startOffset(), token.endOffset());
    typeAtt.setType(token.type());
    flagsAtt.setFlags(token.getFlags());
    posIncAtt.setPositionIncrement(token.getPositionIncrement());
    payloadAtt.setPayload(token.getPayload());
    return true;
}

From source file:org.apache.solr.spelling.SpellCheckCollator.java

License:Apache License

private String getCollation(String origQuery, List<SpellCheckCorrection> corrections) {
    StringBuilder collation = new StringBuilder(origQuery);
    int offset = 0;
    String corr = "";
    for (int i = 0; i < corrections.size(); i++) {
        SpellCheckCorrection correction = corrections.get(i);
        Token tok = correction.getOriginal();
        // we are replacing the query in order, but injected terms might cause
        // illegal offsets due to previous replacements.
        if (tok.getPositionIncrement() == 0)
            continue;
        corr = correction.getCorrection();
        boolean addParenthesis = false;
        Character requiredOrProhibited = null;
        int indexOfSpace = corr.indexOf(' ');
        StringBuilder corrSb = new StringBuilder(corr);
        int bump = 1;

        //If the correction contains whitespace (because it involved breaking a word in 2+ words),
        //then be sure all of the new words have the same optional/required/prohibited status in the query.
        while (indexOfSpace > -1 && indexOfSpace < corr.length() - 1) {
            addParenthesis = true;/*from  w ww .  j a va2s .co  m*/
            char previousChar = tok.startOffset() > 0 ? origQuery.charAt(tok.startOffset() - 1) : ' ';
            if (previousChar == '-' || previousChar == '+') {
                corrSb.insert(indexOfSpace + bump, previousChar);
                if (requiredOrProhibited == null) {
                    requiredOrProhibited = previousChar;
                }
                bump++;
            } else if ((tok.getFlags()
                    & QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG) == QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG) {
                corrSb.insert(indexOfSpace + bump, "AND ");
                bump += 4;
            }
            indexOfSpace = correction.getCorrection().indexOf(' ', indexOfSpace + bump);
        }

        int oneForReqOrProhib = 0;
        if (addParenthesis) {
            if (requiredOrProhibited != null) {
                corrSb.insert(0, requiredOrProhibited);
                oneForReqOrProhib++;
            }
            corrSb.insert(0, '(');
            corrSb.append(')');
        }
        corr = corrSb.toString();
        int startIndex = tok.startOffset() + offset - oneForReqOrProhib;
        int endIndex = tok.endOffset() + offset;
        collation.replace(startIndex, endIndex, corr);
        offset += corr.length() - oneForReqOrProhib - (tok.endOffset() - tok.startOffset());
    }
    return collation.toString();
}

From source file:org.apache.solr.spelling.SpellingQueryConverter.java

License:Apache License

/**
 * Converts the original query string to a collection of Lucene Tokens.
 * @param original the original query string
 * @return a Collection of Lucene Tokens
 *///  w  w w.jav a  2s. c  om
@Override
public Collection<Token> convert(String original) {
    if (original == null) { // this can happen with q.alt = and no query
        return Collections.emptyList();
    }
    Collection<Token> result = new ArrayList<Token>();
    Matcher matcher = QUERY_REGEX.matcher(original);
    String nextWord = null;
    int nextStartIndex = 0;
    String lastBooleanOp = null;
    while (nextWord != null || matcher.find()) {
        String word = null;
        int startIndex = 0;
        if (nextWord != null) {
            word = nextWord;
            startIndex = nextStartIndex;
            nextWord = null;
        } else {
            word = matcher.group(0);
            startIndex = matcher.start();
        }
        if (matcher.find()) {
            nextWord = matcher.group(0);
            nextStartIndex = matcher.start();
        }
        if ("AND".equals(word) || "OR".equals(word) || "NOT".equals(word)) {
            lastBooleanOp = word;
            continue;
        }
        // treat "AND NOT" as "NOT"...
        if ("AND".equals(nextWord) && original.length() > nextStartIndex + 7
                && original.substring(nextStartIndex, nextStartIndex + 7).equals("AND NOT")) {
            nextWord = "NOT";
        }

        int flagValue = 0;
        if (word.charAt(0) == '-' || (startIndex > 0 && original.charAt(startIndex - 1) == '-')) {
            flagValue = PROHIBITED_TERM_FLAG;
        } else if (word.charAt(0) == '+' || (startIndex > 0 && original.charAt(startIndex - 1) == '+')) {
            flagValue = REQUIRED_TERM_FLAG;
            //we don't know the default operator so just assume the first operator isn't new.
        } else if (nextWord != null && lastBooleanOp != null && !nextWord.equals(lastBooleanOp)
                && ("AND".equals(nextWord) || "OR".equals(nextWord) || "NOT".equals(nextWord))) {
            flagValue = TERM_PRECEDES_NEW_BOOLEAN_OPERATOR_FLAG;
            //...unless the 1st boolean operator is a NOT, because only AND/OR can be default.
        } else if (nextWord != null && lastBooleanOp == null && !nextWord.equals(lastBooleanOp)
                && ("NOT".equals(nextWord))) {
            flagValue = TERM_PRECEDES_NEW_BOOLEAN_OPERATOR_FLAG;
        }
        try {
            analyze(result, word, startIndex, flagValue);
        } catch (IOException e) {
            // TODO: shouldn't we log something?
        }
    }
    if (lastBooleanOp != null) {
        for (Token t : result) {
            int f = t.getFlags();
            t.setFlags(f |= QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG);
        }
    }
    return result;
}

From source file:org.apache.solr.spelling.SpellingQueryConverterTest.java

License:Apache License

private boolean hasRequiredFlag(Token t) {
    return (t.getFlags() & QueryConverter.REQUIRED_TERM_FLAG) == QueryConverter.REQUIRED_TERM_FLAG;
}

From source file:org.apache.solr.spelling.SpellingQueryConverterTest.java

License:Apache License

private boolean hasProhibitedFlag(Token t) {
    return (t.getFlags() & QueryConverter.PROHIBITED_TERM_FLAG) == QueryConverter.PROHIBITED_TERM_FLAG;
}

From source file:org.apache.solr.spelling.SpellingQueryConverterTest.java

License:Apache License

private boolean hasNBOFlag(Token t) {
    return (t.getFlags()
            & QueryConverter.TERM_PRECEDES_NEW_BOOLEAN_OPERATOR_FLAG) == QueryConverter.TERM_PRECEDES_NEW_BOOLEAN_OPERATOR_FLAG;
}

From source file:org.apache.solr.spelling.SpellingQueryConverterTest.java

License:Apache License

private boolean hasInBooleanFlag(Token t) {
    return (t.getFlags()
            & QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG) == QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG;
}