List of usage examples for org.apache.lucene.analysis Token setFlags
@Override public void setFlags(int flags)
From source file:org.apache.solr.analysis.BufferedTokenStream.java
License:Apache License
/** old api emulation for back compat */ private Token readToken() throws IOException { if (!input.incrementToken()) { return null; } else {/*w ww . j av a 2s. c om*/ Token token = new Token(); token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); token.setType(typeAtt.type()); token.setFlags(flagsAtt.getFlags()); token.setPositionIncrement(posIncAtt.getPositionIncrement()); token.setPayload(payloadAtt.getPayload()); return token; } }
From source file:org.apache.solr.handler.component.SpellCheckComponent.java
License:Apache License
private Collection<Token> getTokens(String q, Analyzer analyzer) throws IOException { Collection<Token> result = new ArrayList<Token>(); assert analyzer != null; TokenStream ts = analyzer.tokenStream("", q); try {/*www. j ava2 s . c om*/ ts.reset(); // TODO: support custom attributes CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class); FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class); PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class); PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); while (ts.incrementToken()) { Token token = new Token(); token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); token.setType(typeAtt.type()); token.setFlags(flagsAtt.getFlags()); token.setPayload(payloadAtt.getPayload()); token.setPositionIncrement(posIncAtt.getPositionIncrement()); result.add(token); } ts.end(); return result; } finally { IOUtils.closeWhileHandlingException(ts); } }
From source file:org.apache.solr.spelling.SimpleQueryConverter.java
License:Apache License
@Override public Collection<Token> convert(String origQuery) { Collection<Token> result = new HashSet<Token>(); WhitespaceAnalyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_40); TokenStream ts = null;/*from w w w. j av a 2s . c om*/ try { ts = analyzer.tokenStream("", origQuery); // TODO: support custom attributes CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); OffsetAttribute offsetAtt = ts.addAttribute(OffsetAttribute.class); TypeAttribute typeAtt = ts.addAttribute(TypeAttribute.class); FlagsAttribute flagsAtt = ts.addAttribute(FlagsAttribute.class); PayloadAttribute payloadAtt = ts.addAttribute(PayloadAttribute.class); PositionIncrementAttribute posIncAtt = ts.addAttribute(PositionIncrementAttribute.class); ts.reset(); while (ts.incrementToken()) { Token tok = new Token(); tok.copyBuffer(termAtt.buffer(), 0, termAtt.length()); tok.setOffset(offsetAtt.startOffset(), offsetAtt.endOffset()); tok.setFlags(flagsAtt.getFlags()); tok.setPayload(payloadAtt.getPayload()); tok.setPositionIncrement(posIncAtt.getPositionIncrement()); tok.setType(typeAtt.type()); result.add(tok); } ts.end(); return result; } catch (IOException e) { throw new RuntimeException(e); } finally { IOUtils.closeWhileHandlingException(ts); } }
From source file:org.apache.solr.spelling.SpellingQueryConverter.java
License:Apache License
/** * Converts the original query string to a collection of Lucene Tokens. * @param original the original query string * @return a Collection of Lucene Tokens *//* w w w .j ava 2 s. com*/ @Override public Collection<Token> convert(String original) { if (original == null) { // this can happen with q.alt = and no query return Collections.emptyList(); } Collection<Token> result = new ArrayList<Token>(); Matcher matcher = QUERY_REGEX.matcher(original); String nextWord = null; int nextStartIndex = 0; String lastBooleanOp = null; while (nextWord != null || matcher.find()) { String word = null; int startIndex = 0; if (nextWord != null) { word = nextWord; startIndex = nextStartIndex; nextWord = null; } else { word = matcher.group(0); startIndex = matcher.start(); } if (matcher.find()) { nextWord = matcher.group(0); nextStartIndex = matcher.start(); } if ("AND".equals(word) || "OR".equals(word) || "NOT".equals(word)) { lastBooleanOp = word; continue; } // treat "AND NOT" as "NOT"... if ("AND".equals(nextWord) && original.length() > nextStartIndex + 7 && original.substring(nextStartIndex, nextStartIndex + 7).equals("AND NOT")) { nextWord = "NOT"; } int flagValue = 0; if (word.charAt(0) == '-' || (startIndex > 0 && original.charAt(startIndex - 1) == '-')) { flagValue = PROHIBITED_TERM_FLAG; } else if (word.charAt(0) == '+' || (startIndex > 0 && original.charAt(startIndex - 1) == '+')) { flagValue = REQUIRED_TERM_FLAG; //we don't know the default operator so just assume the first operator isn't new. } else if (nextWord != null && lastBooleanOp != null && !nextWord.equals(lastBooleanOp) && ("AND".equals(nextWord) || "OR".equals(nextWord) || "NOT".equals(nextWord))) { flagValue = TERM_PRECEDES_NEW_BOOLEAN_OPERATOR_FLAG; //...unless the 1st boolean operator is a NOT, because only AND/OR can be default. } else if (nextWord != null && lastBooleanOp == null && !nextWord.equals(lastBooleanOp) && ("NOT".equals(nextWord))) { flagValue = TERM_PRECEDES_NEW_BOOLEAN_OPERATOR_FLAG; } try { analyze(result, word, startIndex, flagValue); } catch (IOException e) { // TODO: shouldn't we log something? } } if (lastBooleanOp != null) { for (Token t : result) { int f = t.getFlags(); t.setFlags(f |= QueryConverter.TERM_IN_BOOLEAN_QUERY_FLAG); } } return result; }
From source file:org.apache.solr.spelling.SpellingQueryConverter.java
License:Apache License
protected void analyze(Collection<Token> result, String text, int offset, int flagsAttValue) throws IOException { TokenStream stream = analyzer.tokenStream("", text); // TODO: support custom attributes CharTermAttribute termAtt = stream.addAttribute(CharTermAttribute.class); TypeAttribute typeAtt = stream.addAttribute(TypeAttribute.class); PayloadAttribute payloadAtt = stream.addAttribute(PayloadAttribute.class); PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class); OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class); stream.reset();/*from w ww . j a va 2s. co m*/ while (stream.incrementToken()) { Token token = new Token(); token.copyBuffer(termAtt.buffer(), 0, termAtt.length()); token.setOffset(offset + offsetAtt.startOffset(), offset + offsetAtt.endOffset()); token.setFlags(flagsAttValue); //overwriting any flags already set... token.setType(typeAtt.type()); token.setPayload(payloadAtt.getPayload()); token.setPositionIncrement(posIncAtt.getPositionIncrement()); result.add(token); } stream.end(); stream.close(); }
From source file:wiki.indexer.tokenizer.CustomWikipediaTokenizer.java
License:Apache License
@SuppressWarnings("unchecked") private void collapseAndSaveTokens(final Token reusableToken, int tokenType, String type) throws IOException { // collapse/* ww w .j a v a 2 s. co m*/ StringBuffer buffer = new StringBuffer(32); int numAdded = scanner.setText(buffer); // TODO: how to know how much whitespace to add int theStart = scanner.yychar(); int lastPos = theStart + numAdded; int tmpTokType; int numSeen = 0; List tmp = new ArrayList(); Token saved = new Token(); setupSavedToken(saved, 0, type); tmp.add(saved); // while we can get a token and that token is the same type and we have // not transitioned to a new wiki-item of the same type while ((tmpTokType = scanner.getNextToken()) != CustomWikipediaTokenizerImpl.YYEOF && tmpTokType == tokenType && scanner.getNumWikiTokensSeen() > numSeen) { int currPos = scanner.yychar(); // append whitespace for (int i = 0; i < (currPos - lastPos); i++) { buffer.append(' '); } numAdded = scanner.setText(buffer); saved = new Token(); setupSavedToken(saved, scanner.getPositionIncrement(), type); tmp.add(saved); numSeen++; lastPos = currPos + numAdded; } // trim the buffer String s = buffer.toString().trim(); reusableToken.setTermBuffer(s.toCharArray(), 0, s.length()); reusableToken.setStartOffset(theStart); reusableToken.setEndOffset(theStart + s.length()); reusableToken.setFlags(UNTOKENIZED_TOKEN_FLAG); // The way the loop is written, we will have proceeded to the next // token. We need to pushback the scanner to lastPos if (tmpTokType != CustomWikipediaTokenizerImpl.YYEOF) { scanner.yypushback(scanner.yylength()); } tokens = tmp.iterator(); }
From source file:wiki.indexer.tokenizer.CustomWikipediaTokenizer.java
License:Apache License
private void collapseTokens(final Token reusableToken, int tokenType) throws IOException { // collapse//from ww w .ja v a 2 s . c om StringBuffer buffer = new StringBuffer(32); int numAdded = scanner.setText(buffer); // TODO: how to know how much whitespace to add int theStart = scanner.yychar(); int lastPos = theStart + numAdded; int tmpTokType; int numSeen = 0; // while we can get a token and that token is the same type and we have // not transitioned to a new wiki-item of the same type while ((tmpTokType = scanner.getNextToken()) != CustomWikipediaTokenizerImpl.YYEOF && tmpTokType == tokenType && scanner.getNumWikiTokensSeen() > numSeen) { int currPos = scanner.yychar(); // append whitespace for (int i = 0; i < (currPos - lastPos); i++) { buffer.append(' '); } numAdded = scanner.setText(buffer); numSeen++; lastPos = currPos + numAdded; } // trim the buffer String s = buffer.toString().trim(); reusableToken.setTermBuffer(s.toCharArray(), 0, s.length()); reusableToken.setStartOffset(theStart); reusableToken.setEndOffset(theStart + s.length()); reusableToken.setFlags(UNTOKENIZED_TOKEN_FLAG); // The way the loop is written, we will have proceeded to the next // token. We need to pushback the scanner to lastPos if (tmpTokType != CustomWikipediaTokenizerImpl.YYEOF) { scanner.yypushback(scanner.yylength()); } else { tokens = null; } }