List of usage examples for org.apache.lucene.analysis Token getFlags
@Override public int getFlags()
From source file:hu.mokk.hunglish.lucene.analysis.CompoundWordTokenFilterBase.java
private final void setToken(final Token token) throws IOException { termAtt.setTermBuffer(token.termBuffer(), 0, token.termLength()); flagsAtt.setFlags(token.getFlags()); typeAtt.setType(token.type());//ww w. jav a 2 s . co m offsetAtt.setOffset(token.startOffset(), token.endOffset()); posIncAtt.setPositionIncrement(token.getPositionIncrement()); payloadAtt.setPayload(token.getPayload()); }
From source file:org.alfresco.solr.component.spellcheck.AlfrescoSpellCheckCollator.java
License:Open Source License
@SuppressWarnings("deprecation") private String getCollation(String origQuery, List<SpellCheckCorrection> corrections) { StringBuilder collation = new StringBuilder(origQuery); int offset = 0; String corr = ""; for (int i = 0; i < corrections.size(); i++) { SpellCheckCorrection correction = corrections.get(i); Token tok = correction.getOriginal(); // we are replacing the query in order, but injected terms might // cause illegal offsets due to previous replacements. if (tok.getPositionIncrement() == 0) continue; corr = correction.getCorrection(); boolean addParenthesis = false; Character requiredOrProhibited = null; int indexOfSpace = corr.indexOf(' '); StringBuilder corrSb = new StringBuilder(corr); int bump = 1; // If the correction contains whitespace (because it involved // breaking a word in 2+ words), // then be sure all of the new words have the same // optional/required/prohibited status in the query. while (indexOfSpace > -1 && indexOfSpace < corr.length() - 1) { addParenthesis = true;//from w ww .j a v a2 s. co m char previousChar = tok.startOffset() > 0 ? origQuery.charAt(tok.startOffset() - 1) : ' '; if (previousChar == '-' || previousChar == '+') { corrSb.insert(indexOfSpace + bump, previousChar); if (requiredOrProhibited == null) { requiredOrProhibited = previousChar; } bump++; } else if ((tok.getFlags() & QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG) == QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG) { corrSb.insert(indexOfSpace + bump, "AND "); bump += 4; } indexOfSpace = correction.getCorrection().indexOf(' ', indexOfSpace + bump); } int oneForReqOrProhib = 0; if (addParenthesis) { if (requiredOrProhibited != null) { corrSb.insert(0, requiredOrProhibited); oneForReqOrProhib++; } corrSb.insert(0, '('); corrSb.append(')'); } corr = corrSb.toString(); int startIndex = tok.startOffset() + offset - oneForReqOrProhib; int endIndex = tok.endOffset() + offset; collation.replace(startIndex, endIndex, corr); offset += corr.length() - oneForReqOrProhib - (tok.endOffset() - tok.startOffset()); } return collation.toString(); }
From source file:org.apache.solr.analysis.BufferedTokenStream.java
License:Apache License
/** old api emulation for back compat */ private boolean writeToken(Token token) throws IOException { clearAttributes();// ww w . j a v a 2 s. c o m termAtt.copyBuffer(token.buffer(), 0, token.length()); offsetAtt.setOffset(token.startOffset(), token.endOffset()); typeAtt.setType(token.type()); flagsAtt.setFlags(token.getFlags()); posIncAtt.setPositionIncrement(token.getPositionIncrement()); payloadAtt.setPayload(token.getPayload()); return true; }
From source file:org.apache.solr.spelling.SpellCheckCollator.java
License:Apache License
private String getCollation(String origQuery, List<SpellCheckCorrection> corrections) { StringBuilder collation = new StringBuilder(origQuery); int offset = 0; String corr = ""; for (int i = 0; i < corrections.size(); i++) { SpellCheckCorrection correction = corrections.get(i); Token tok = correction.getOriginal(); // we are replacing the query in order, but injected terms might cause // illegal offsets due to previous replacements. if (tok.getPositionIncrement() == 0) continue; corr = correction.getCorrection(); boolean addParenthesis = false; Character requiredOrProhibited = null; int indexOfSpace = corr.indexOf(' '); StringBuilder corrSb = new StringBuilder(corr); int bump = 1; //If the correction contains whitespace (because it involved breaking a word in 2+ words), //then be sure all of the new words have the same optional/required/prohibited status in the query. while (indexOfSpace > -1 && indexOfSpace < corr.length() - 1) { addParenthesis = true;/*from w ww . j a va2s .co m*/ char previousChar = tok.startOffset() > 0 ? origQuery.charAt(tok.startOffset() - 1) : ' '; if (previousChar == '-' || previousChar == '+') { corrSb.insert(indexOfSpace + bump, previousChar); if (requiredOrProhibited == null) { requiredOrProhibited = previousChar; } bump++; } else if ((tok.getFlags() & QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG) == QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG) { corrSb.insert(indexOfSpace + bump, "AND "); bump += 4; } indexOfSpace = correction.getCorrection().indexOf(' ', indexOfSpace + bump); } int oneForReqOrProhib = 0; if (addParenthesis) { if (requiredOrProhibited != null) { corrSb.insert(0, requiredOrProhibited); oneForReqOrProhib++; } corrSb.insert(0, '('); corrSb.append(')'); } corr = corrSb.toString(); int startIndex = tok.startOffset() + offset - oneForReqOrProhib; int endIndex = tok.endOffset() + offset; collation.replace(startIndex, endIndex, corr); offset += corr.length() - oneForReqOrProhib - (tok.endOffset() - tok.startOffset()); } return collation.toString(); }
From source file:org.apache.solr.spelling.SpellingQueryConverter.java
License:Apache License
/** * Converts the original query string to a collection of Lucene Tokens. * @param original the original query string * @return a Collection of Lucene Tokens */// w w w.jav a 2s. c om @Override public Collection<Token> convert(String original) { if (original == null) { // this can happen with q.alt = and no query return Collections.emptyList(); } Collection<Token> result = new ArrayList<Token>(); Matcher matcher = QUERY_REGEX.matcher(original); String nextWord = null; int nextStartIndex = 0; String lastBooleanOp = null; while (nextWord != null || matcher.find()) { String word = null; int startIndex = 0; if (nextWord != null) { word = nextWord; startIndex = nextStartIndex; nextWord = null; } else { word = matcher.group(0); startIndex = matcher.start(); } if (matcher.find()) { nextWord = matcher.group(0); nextStartIndex = matcher.start(); } if ("AND".equals(word) || "OR".equals(word) || "NOT".equals(word)) { lastBooleanOp = word; continue; } // treat "AND NOT" as "NOT"... if ("AND".equals(nextWord) && original.length() > nextStartIndex + 7 && original.substring(nextStartIndex, nextStartIndex + 7).equals("AND NOT")) { nextWord = "NOT"; } int flagValue = 0; if (word.charAt(0) == '-' || (startIndex > 0 && original.charAt(startIndex - 1) == '-')) { flagValue = PROHIBITED_TERM_FLAG; } else if (word.charAt(0) == '+' || (startIndex > 0 && original.charAt(startIndex - 1) == '+')) { flagValue = REQUIRED_TERM_FLAG; //we don't know the default operator so just assume the first operator isn't new. } else if (nextWord != null && lastBooleanOp != null && !nextWord.equals(lastBooleanOp) && ("AND".equals(nextWord) || "OR".equals(nextWord) || "NOT".equals(nextWord))) { flagValue = TERM_PRECEDES_NEW_BOOLEAN_OPERATOR_FLAG; //...unless the 1st boolean operator is a NOT, because only AND/OR can be default. } else if (nextWord != null && lastBooleanOp == null && !nextWord.equals(lastBooleanOp) && ("NOT".equals(nextWord))) { flagValue = TERM_PRECEDES_NEW_BOOLEAN_OPERATOR_FLAG; } try { analyze(result, word, startIndex, flagValue); } catch (IOException e) { // TODO: shouldn't we log something? } } if (lastBooleanOp != null) { for (Token t : result) { int f = t.getFlags(); t.setFlags(f |= QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG); } } return result; }
From source file:org.apache.solr.spelling.SpellingQueryConverterTest.java
License:Apache License
private boolean hasRequiredFlag(Token t) { return (t.getFlags() & QueryConverter.REQUIRED_TERM_FLAG) == QueryConverter.REQUIRED_TERM_FLAG; }
From source file:org.apache.solr.spelling.SpellingQueryConverterTest.java
License:Apache License
private boolean hasProhibitedFlag(Token t) { return (t.getFlags() & QueryConverter.PROHIBITED_TERM_FLAG) == QueryConverter.PROHIBITED_TERM_FLAG; }
From source file:org.apache.solr.spelling.SpellingQueryConverterTest.java
License:Apache License
private boolean hasNBOFlag(Token t) { return (t.getFlags() & QueryConverter.TERM_PRECEDES_NEW_BOOLEAN_OPERATOR_FLAG) == QueryConverter.TERM_PRECEDES_NEW_BOOLEAN_OPERATOR_FLAG; }
From source file:org.apache.solr.spelling.SpellingQueryConverterTest.java
License:Apache License
private boolean hasInBooleanFlag(Token t) { return (t.getFlags() & QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG) == QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG; }