List of usage examples for org.apache.lucene.analysis TokenStream incrementToken
public abstract boolean incrementToken() throws IOException;
From source file:aos.lucene.analysis.i18n.ChineseDemo.java
License:Apache License
private static void analyze(String string, Analyzer analyzer) throws IOException { StringBuffer buffer = new StringBuffer(); TokenStream stream = analyzer.tokenStream("contents", new StringReader(string)); TermAttribute term = stream.addAttribute(TermAttribute.class); while (stream.incrementToken()) { //C buffer.append("["); buffer.append(term.term());//from w ww .j a va2 s.c o m buffer.append("] "); } String output = buffer.toString(); Frame f = new Frame(); f.setTitle(analyzer.getClass().getSimpleName() + " : " + string); f.setResizable(true); Font font = new Font(null, Font.PLAIN, 36); int width = getWidth(f.getFontMetrics(font), output); f.setSize((width < 250) ? 250 : width + 50, 75); // NOTE: if Label doesn't render the Chinese characters // properly, try using javax.swing.JLabel instead Label label = new Label(output); //D label.setSize(width, 75); label.setAlignment(Label.CENTER); label.setFont(font); f.add(label); f.setVisible(true); }
From source file:aos.lucene.search.advanced.SpanQueryTest.java
License:Apache License
private void dumpSpans(SpanQuery query) throws IOException { Spans spans = query.getSpans(reader); LOGGER.info(query + ":"); int numSpans = 0; TopDocs hits = searcher.search(query, 10); float[] scores = new float[2]; for (ScoreDoc sd : hits.scoreDocs) { scores[sd.doc] = sd.score;//from w w w.j a va 2 s. c o m } while (spans.next()) { numSpans++; int id = spans.doc(); Document doc = reader.document(id); TokenStream stream = analyzer.tokenStream("contents", new StringReader(doc.get("f"))); TermAttribute term = stream.addAttribute(TermAttribute.class); StringBuilder buffer = new StringBuilder(); buffer.append(" "); int i = 0; while (stream.incrementToken()) { if (i == spans.start()) { buffer.append("<"); } buffer.append(term.term()); if (i + 1 == spans.end()) { buffer.append(">"); } buffer.append(" "); i++; } buffer.append("(").append(scores[id]).append(") "); LOGGER.info(buffer); } if (numSpans == 0) { LOGGER.info(" No spans"); } LOGGER.info(); }
From source file:at.ac.tuwien.ifs.myluceneanalyzers.fa.algorithm.PersianDictionaryCountCompoundWord.java
@SuppressWarnings({ "resource", "deprecation" }) private String stem(String input) throws IOException { String output = ""; Reader reader = new StringReader(input); Tokenizer source = new StandardTokenizer(Version.LUCENE_4_10_3, reader); TokenStream tokenStream = new PersianStemFilter(source); CharTermAttribute charTermAttributeGreedy = tokenStream.getAttribute(CharTermAttribute.class); tokenStream.reset();//from w w w . j av a 2s . co m while (tokenStream.incrementToken()) { output = output + " " + charTermAttributeGreedy.toString(); } return output.trim(); }
From source file:at.ac.univie.mminf.luceneSKOS.analysis.AbstractMeSHFilter.java
License:Apache License
public static CharsRef analyze(Analyzer analyzer, String text, CharsRef reuse) throws IOException { TokenStream ts = analyzer.tokenStream("", new StringReader(text)); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); // PositionIncrementAttribute posIncAtt = // ts.addAttribute(PositionIncrementAttribute.class); ts.reset();/* w w w.j ava 2 s.co m*/ reuse.length = 0; while (ts.incrementToken()) { int length = termAtt.length(); if (length == 0) { throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token"); } // if (posIncAtt.getPositionIncrement() != 1) { // throw new IllegalArgumentException("term: " + text + // " analyzed to a token with posinc != 1"); // } reuse.grow(reuse.length + length + 1); /* current + word + separator */ int end = reuse.offset + reuse.length; if (reuse.length > 0) { reuse.chars[end++] = 32; // space reuse.length++; } System.arraycopy(termAtt.buffer(), 0, reuse.chars, end, length); reuse.length += length; } ts.end(); ts.close(); if (reuse.length == 0) { throw new IllegalArgumentException("term: " + text + " was completely eliminated by analyzer"); } return reuse; }
From source file:at.ac.univie.mminf.luceneSKOS.analysis.SNOMEDFilter.java
License:Apache License
public static CharsRef analyze(Analyzer analyzer, String text, CharsRef reuse) throws IOException { TokenStream ts = analyzer.tokenStream("", new StringReader(text)); CharTermAttribute termAtt = ts.addAttribute(CharTermAttribute.class); // PositionIncrementAttribute posIncAtt = // ts.addAttribute(PositionIncrementAttribute.class); boolean phraseTerm = false; ts.reset();// w w w . ja va 2 s . c o m reuse.length = 0; while (ts.incrementToken()) { // System.out.println(text + " | " + termAtt.toString()); int length = termAtt.length(); if (length == 0) { throw new IllegalArgumentException("term: " + text + " analyzed to a zero-length token"); } // if (posIncAtt.getPositionIncrement() != 1) { // throw new IllegalArgumentException("term: " + text + // " analyzed to a token with posinc != 1"); // } reuse.grow(reuse.length + length + 1); /* * current + word + * separator */ int end = reuse.offset + reuse.length; if (reuse.length > 0) { reuse.chars[end++] = 32; // space reuse.length++; phraseTerm = true; } System.arraycopy(termAtt.buffer(), 0, reuse.chars, end, length); reuse.length += length; } ts.end(); ts.close(); if (reuse.length == 0) { throw new IllegalArgumentException("term: " + text + " was completely eliminated by analyzer"); } if (phraseTerm) { reuse.grow(reuse.length + 2); /* current + word + separator */ reuse.length += 2; char next = reuse.chars[0]; for (int i = 0; i < reuse.length - 2; i++) { char tmp = reuse.chars[i + 1]; reuse.chars[i + 1] = next; next = tmp; } reuse.chars[0] = '\"'; reuse.chars[reuse.length - 1] = '\"'; } return reuse; }
From source file:at.ac.univie.mminf.luceneSKOS.util.AnalyzerUtils.java
License:Apache License
public static void displayTokens(TokenStream stream) throws IOException { CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); while (stream.incrementToken()) { System.out.println("[" + term.toString() + "] "); }/*from w w w. j a v a 2s .c om*/ }
From source file:at.ac.univie.mminf.luceneSKOS.util.AnalyzerUtils.java
License:Apache License
public static void displayTokensWithPositions(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); int position = 0; while (stream.incrementToken()) { int increment = posIncr.getPositionIncrement(); if (increment > 0) { position = position + increment; System.out.println(); System.out.print(position + ":"); }/*ww w. j a v a2s. c o m*/ System.out.print("[" + term.toString() + "] "); } System.out.println(); }
From source file:at.ac.univie.mminf.luceneSKOS.util.AnalyzerUtils.java
License:Apache License
public static void displayTokensWithFullDetails(Analyzer analyzer, String text) throws IOException { TokenStream stream = analyzer.tokenStream("contents", new StringReader(text)); CharTermAttribute term = stream.addAttribute(CharTermAttribute.class); PositionIncrementAttribute posIncr = stream.addAttribute(PositionIncrementAttribute.class); OffsetAttribute offset = stream.addAttribute(OffsetAttribute.class); TypeAttribute type = stream.addAttribute(TypeAttribute.class); PayloadAttribute payload = stream.addAttribute(PayloadAttribute.class); int position = 0; while (stream.incrementToken()) { int increment = posIncr.getPositionIncrement(); if (increment > 0) { position = position + increment; System.out.println(); System.out.print(position + ":"); }//from w w w . ja v a2 s .c o m Payload pl = payload.getPayload(); if (pl != null) { System.out.print("[" + term.toString() + ":" + offset.startOffset() + "->" + offset.endOffset() + ":" + type.type() + ":" + new String(pl.getData()) + "] "); } else { System.out.print("[" + term.toString() + ":" + offset.startOffset() + "->" + offset.endOffset() + ":" + type.type() + "] "); } } System.out.println(); }
From source file:at.itbh.bev.apibeans.FinderImpl.java
License:Open Source License
public FullTextQuery constructQuery(EntityManager em, String postalCode, String place, String addressLine, String houseId) throws InvalidApiUsageException { FullTextEntityManager fullTextEm = Search.getFullTextEntityManager(em); if ((Objects.toString(postalCode, "") + Objects.toString(place, "") + Objects.toString(addressLine, "") + Objects.toString(houseId, "")).length() == 0) { throw new InvalidApiUsageException( "At least one parameter must be provided. Coordinates don't count as parameters."); }/* w w w .jav a2 s . c o m*/ if (addressLine != null && addressLine.length() < 2 && addressLine.length() > 0) { throw new InvalidApiUsageException("The parameter addressLine must consist of at least 2 characters."); } QueryBuilder b = fullTextEm.getSearchFactory().buildQueryBuilder().forEntity(AdresseDenormalized.class) .get(); List<Query> queries = new ArrayList<>(); if (postalCode != null && postalCode.length() > 0) { queries.add(b.keyword().onField("postalCode").boostedTo(20).matching(postalCode).createQuery()); } if (addressLine != null && addressLine.length() > 0) { queries.add(b.keyword().onField("addressLine").matching(addressLine + addressLine + addressLine) .createQuery()); // triple addressLine since in the data source it is also tripled if // there is no building or address name queries.add(b.keyword().onField("addressLineExact").boostedTo(10) .matching(addressLine + addressLine + addressLine).createQuery()); } if (houseId != null && houseId.length() > 0) { // if search string contains a number, take the first number in the // search string and match with the house number Matcher matcher = housenumberPattern.matcher(houseId); if (matcher.find()) { queries.add( b.keyword().onField("hausnrzahl").boostedTo(50).matching(matcher.group(1)).createQuery()); } if (houseId.matches(".*\\D.*")) { queries.add(b.keyword().onField("houseIdExact").matching(houseId).createQuery()); } queries.add(b.keyword().onField("houseId").boostedTo(20).matching(houseId).createQuery()); TextAnalyzer analyzer = new TextAnalyzer(); TokenStream stream; try { stream = analyzer.tokenStream(null, new StringReader(houseId)); // CharTermAttribute cattr = stream.addAttribute(CharTermAttribute.class); stream.reset(); if (stream.incrementToken()) { // if analyzer does not remove everything check hofname and hausnrgebaeudebez queries.add(b.keyword().onField("hofname").matching(houseId).createQuery()); queries.add(b.keyword().onField("hausnrgebaeudebez").matching(houseId).createQuery()); // System.out.println(cattr.toString()); } stream.end(); stream.close(); } catch (IOException e1) { e1.printStackTrace(); } analyzer.close(); } if (place != null && place.length() > 0) { queries.add(b.keyword().onField("place").matching(place).createQuery()); queries.add(b.keyword().onField("municipalityExact").boostedTo(20).matching(place).createQuery()); queries.add(b.keyword().onField("placeExact").boostedTo(5).matching(place).createQuery()); } @SuppressWarnings("rawtypes") BooleanJunction bq = b.bool(); for (Query item : queries) { bq = bq.should(item); } FullTextQuery fullTextQuery = fullTextEm.createFullTextQuery(bq.createQuery(), AdresseDenormalized.class); return fullTextQuery; }
From source file:at.newmedialab.lmf.util.solr.suggestion.service.FieldAnalyzerService.java
License:Apache License
/** * analyzes string like the default field * @param df the name of the default field * @param s the string to analyze/*from w w w . j ava2 s . co m*/ * @return */ public static String analyzeString(SolrCore core, String df, String s) { try { TokenStream ts = core.getSchema().getFieldType(df).getQueryAnalyzer().tokenStream(df, new StringReader(s)); StringBuffer b = new StringBuffer(); ts.reset(); while (ts.incrementToken()) { b.append(" "); CharTermAttribute attr = ts.getAttribute(CharTermAttribute.class); b.append(attr); } return b.toString().trim(); } catch (IOException e) { e.printStackTrace(); return s; } }