List of usage examples for org.apache.lucene.search.vectorhighlight FastVectorHighlighter FastVectorHighlighter
public FastVectorHighlighter(boolean phraseHighlight, boolean fieldMatch, FragListBuilder fragListBuilder, FragmentsBuilder fragmentsBuilder)
From source file:aos.lucene.tools.FastVectorHighlighterSample.java
License:Apache License
static FastVectorHighlighter getHighlighter() { FragListBuilder fragListBuilder = new SimpleFragListBuilder(); // #F FragmentsBuilder fragmentBuilder = // #F new ScoreOrderFragmentsBuilder( // #F BaseFragmentsBuilder.COLORED_PRE_TAGS, // #F BaseFragmentsBuilder.COLORED_POST_TAGS); // #F return new FastVectorHighlighter(true, true, // #F fragListBuilder, fragmentBuilder); // #F }
From source file:invertedindex.SearchIndex.java
public ArrayList<SearchResults> search(String keyword) throws IOException { String indexLocation = this.getIndexLocation(); // System.out.println("Inside search method"); // indexLocation = ""; // BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); // while (true) { try {/* w w w. j a va 2s. c o m*/ IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation))); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(topDocs, true); String query = keyword; query = "\"" + query + "\""; Query q = new QueryParser(Version.LUCENE_47, "contents", analyzer).parse(query); SimpleFragListBuilder fragListBuilder = new SimpleFragListBuilder(); ScoreOrderFragmentsBuilder fragBuilder = new ScoreOrderFragmentsBuilder(); FastVectorHighlighter fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, fragListBuilder, fragBuilder); fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, fragListBuilder, fragBuilder); // System.out.println(q); // searcher.search(q, collector); // searcher.search(q, null,topDocs); ScoreDoc[] hits = collector.topDocs().scoreDocs; // 4. display results System.out.println("Found " + hits.length + " hits."); totalHits = hits.length; searchResulsAL = new ArrayList<>(); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; FieldQuery fq = fvh.getFieldQuery(q); // System.out.println("fq "+fq); String[] fragments = fvh.getBestFragments(fq, searcher.getIndexReader(), docId, "contents", 50, 10); //String[] lineFragments = fvh.getBestFragments(fq, searcher.getIndexReader(), docId, "contents", 18,10); Document d = searcher.doc(docId); String filePath = d.get("path"); for (int j = 0; j < fragments.length; j++) { // System.out.println("FRAGMENT iS "+fragments[j]); // int k=0; // for(k=0;k<lineFragments.length;k++){ // fragments[j].getSc String temp = Jsoup.parse(fragments[j]).text(); // LineNumberSearcher lns = new LineNumberSearcher(); //lineNumbersList = new ArrayList<>(); lineNumberArrayList = new ArrayList<>(); lineNumber = "null"; boolean g = Pattern.compile("\\n").matcher(fragments[j]).find(); if (!g) { // System.out.println("NO G"); lineNumbersList = lns.search(temp, filePath); // for(String s : lineNumbersList){ // System.out.println("s is "+s); // } // if(lineNumbersList.get(0).isEmpty()){ // lineNumber = "Not Found"; // }else { if (!lineNumbersList.isEmpty()) { // System.out.println("in line number"); lineNumber = lineNumbersList.get(0); } // } } //here is the tried code for enter space /* else{ System.out.println("YES G"); String lines[] = fragments[j].split("\\r?\\n"); // ArrayList<String> newLines = new ArrayList<>(); ArrayList<String> newLines = new ArrayList<>(Arrays.asList(lines)); System.out.println("Here 3"); int special = 0; for(String line : newLines){ if(Pattern.compile("^$").matcher(line).find()){ newLines.remove(line); special++; } } System.out.println("Here 4"); // List<String> list = Arrays.asList(lines); // if(list.contains(temp)){ // // } // for(String line: newLines){ // System.out.println("LINE IS "+line); // } if(newLines.size()==1){ // System.out.println("Yes G but NOT G"); lineNumbersList = lns.search(temp,filePath); if(!lineNumberArrayList.isEmpty()){ lineNumber = lineNumbersList.get(0); } System.out.println("Here 1"); }else{ System.out.println("Here 2"); ArrayList<String> a0 = lns.search(Jsoup.parse(newLines.get(0)).text(),filePath); ArrayList<String> a1 = lns.search(Jsoup.parse(newLines.get(1)).text(),filePath); int k,l; outerloop: for(k=0;k<a0.size();k++){ for(l=0;l<a1.size();l++){ int secondline = Integer.parseInt(a1.get(l)); // System.out.println("second line is"+ secondline); int firstline = Integer.parseInt(a0.get(k)); // System.out.println("first line is"+firstline); int diff = secondline - firstline; // System.out.println("DIFFERENCE IS "+diff); // System.out.println("Special IS "+special); if(diff == special+1){ insideLoopFlag = true; // System.out.println("K IS "+k); // System.out.println("IN BREAK "); break outerloop; } } // System.out.println("K IS "+k); } // System.out.println("OUT OF FOR LOOP"); // System.out.println("K IS "+k); if(insideLoopFlag==true){ lineNumber = String.valueOf(a0.get(k)); } // System.out.println("LINE NUMBER IS "+lineNumber); } } */ // } fragments[j] = fragments[j].replaceAll("\\n", " "); // System.out.println("\t\t" + fragments[j] + "..."); fragments[j] = fragments[j] + "...."; if (!(lineNumber.equals("null"))) { // System.out.println("in line number"); fragments[j] = fragments[j] + " at Line " + lineNumber; } } //Setting Results SearchResults sr = new SearchResults(); sr.setFilename(d.get("filename")); sr.setScore(hits[i].score); sr.setFragments(fragments); sr.setPath(filePath); sr.setContentType(d.get("contentType")); // sr.setLineNumber(lineNumber); searchResulsAL.add(sr); // } // writer.close(); reader.close(); } catch (Exception e) { System.out.println("Error searching in search index " + e + " : " + e.getMessage()); // break; } // } return searchResulsAL; }
From source file:invertedindex.SearchIndex.java
public ArrayList<SearchResults> multipleSearch(String keyword1, String keyword2, String radio) throws IOException { String indexLocation = this.getIndexLocation(); try {/*from w w w . jav a2s . c o m*/ IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation))); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(topDocs, true); String query1 = keyword1; String query2 = keyword2; query1 = "\"" + query1 + "\""; query2 = "\"" + query2 + "\""; Query q1 = new QueryParser(Version.LUCENE_47, "contents", analyzer).parse(query1); Query q2 = new QueryParser(Version.LUCENE_47, "contents", analyzer).parse(query2); BooleanQuery apiQuery = new BooleanQuery(); if (radio.equalsIgnoreCase("and")) { apiQuery.add(q1, BooleanClause.Occur.MUST); apiQuery.add(q2, BooleanClause.Occur.MUST); } else if (radio.equalsIgnoreCase("or")) { apiQuery.add(q1, BooleanClause.Occur.SHOULD); apiQuery.add(q2, BooleanClause.Occur.SHOULD); } else if (radio.equalsIgnoreCase("not")) { apiQuery.add(q1, BooleanClause.Occur.MUST); apiQuery.add(q2, BooleanClause.Occur.MUST_NOT); } SimpleFragListBuilder fragListBuilder = new SimpleFragListBuilder(); ScoreOrderFragmentsBuilder fragBuilder = new ScoreOrderFragmentsBuilder(); FastVectorHighlighter fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, fragListBuilder, fragBuilder); fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, fragListBuilder, fragBuilder); searcher.search(apiQuery, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; System.out.println("Found " + hits.length + " hits."); totalHits = hits.length; searchResulsAL = new ArrayList<>(); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; FieldQuery fq = fvh.getFieldQuery(apiQuery); // String[] fragments = fvh.getBestFragments(fq, searcher.getIndexReader(), docId, "contents", 50, 10); Document d = searcher.doc(docId); // String filePath = d.get("path"); for (int j = 0; j < fragments.length; j++) { String temp = Jsoup.parse(fragments[j]).text(); // LineNumberSearcher lns = new LineNumberSearcher(); //lineNumbersList = new ArrayList<>(); lineNumber = "null"; lineNumberArrayList = new ArrayList<>(); boolean g = Pattern.compile("\\n").matcher(fragments[j]).find(); if (!g) { // System.out.println("NO G g"); lineNumbersList = lns.search(temp, filePath); // for(String s : lineNumbersList){ // System.out.println("s is "+s); // } // if (!lineNumbersList.isEmpty()) { // System.out.println("in line number"); lineNumber = lineNumbersList.get(0); } } fragments[j] = fragments[j].replaceAll("\\n", " "); // System.out.println("\t\t" + fragments[j] + "..."); fragments[j] = fragments[j] + " ...."; if (!(lineNumber.equals("null"))) { // System.out.println("in line number"); fragments[j] = fragments[j] + " at Line " + lineNumber; } } SearchResults sr = new SearchResults(); sr.setFilename(d.get("filename")); sr.setScore(hits[i].score); sr.setFragments(fragments); sr.setPath(filePath); sr.setContentType(d.get("contentType")); searchResulsAL.add(sr); } reader.close(); } catch (Exception e) { System.out.println("Error searching in search index " + e + " : " + e.getMessage()); } return searchResulsAL; }
From source file:net.sourceforge.docfetcher.model.search.HighlightService.java
License:Open Source License
@MutableCopy @NotNull/*www. j a va 2 s. c om*/ @SuppressWarnings("unchecked") private static List<Range> highlightPhrases(@NotNull Query query, @NotNull String text) throws CheckedOutOfMemoryError { // FastVectorHighlighter only supports TermQuery, PhraseQuery and BooleanQuery FastVectorHighlighter highlighter = new FastVectorHighlighter(true, true, null, null); FieldQuery fieldQuery = highlighter.getFieldQuery(query); Directory directory = new RAMDirectory(); try { /* * Hack: We have to put the given text in a RAM index, because the * fast-vector highlighter can only work on index readers */ IndexWriterAdapter writer = new IndexWriterAdapter(directory); Document doc = new Document(); doc.add(Fields.createContent(text, true)); // must store token positions and offsets writer.add(doc); Closeables.closeQuietly(writer); // flush unwritten documents into index IndexReader indexReader = IndexReader.open(directory); // This might throw an OutOfMemoryError FieldTermStack fieldTermStack = new FieldTermStack(indexReader, 0, Fields.CONTENT.key(), fieldQuery); FieldPhraseList fieldPhraseList = new FieldPhraseList(fieldTermStack, fieldQuery); // Hack: We'll use reflection to access a private field java.lang.reflect.Field field = fieldPhraseList.getClass().getDeclaredField("phraseList"); field.setAccessible(true); LinkedList<WeightedPhraseInfo> infoList = (LinkedList<WeightedPhraseInfo>) field.get(fieldPhraseList); List<Range> ranges = new ArrayList<Range>(infoList.size()); for (WeightedPhraseInfo phraseInfo : infoList) { int start = phraseInfo.getStartOffset(); int end = phraseInfo.getEndOffset(); ranges.add(new Range(start, end - start)); } return ranges; } catch (OutOfMemoryError e) { throw new CheckedOutOfMemoryError(e); } catch (Exception e) { return new ArrayList<Range>(0); } }
From source file:net.sourceforge.docfetcher.model.search.HighlightServiceTest.java
License:Open Source License
@SuppressWarnings("unchecked") @Test// w ww .jav a2 s. c om public void testPhraseHighlighter() throws Exception { // Create index Directory directory = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(IndexRegistry.LUCENE_VERSION, Collections.EMPTY_SET); IndexWriterAdapter writer = new IndexWriterAdapter(directory); Document doc = new Document(); doc.add(new Field("content", "some text", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); writer.add(doc); Closeables.closeQuietly(writer); // flush unwritten documents into index // Perform phrase search QueryParser queryParser = new QueryParser(IndexRegistry.LUCENE_VERSION, "content", analyzer); Query query = queryParser.parse("\"text\""); FastVectorHighlighter highlighter = new FastVectorHighlighter(true, true, null, null); FieldQuery fieldQuery = highlighter.getFieldQuery(query); IndexSearcher searcher = null; try { searcher = new IndexSearcher(directory); TopDocs docs = searcher.search(query, 10); assertEquals(1, docs.scoreDocs.length); int docId = docs.scoreDocs[0].doc; // Get phrase highlighting offsets FieldTermStack fieldTermStack = new FieldTermStack(searcher.getIndexReader(), docId, "content", fieldQuery); FieldPhraseList fieldPhraseList = new FieldPhraseList(fieldTermStack, fieldQuery); java.lang.reflect.Field field = fieldPhraseList.getClass().getDeclaredField("phraseList"); field.setAccessible(true); LinkedList<WeightedPhraseInfo> list = (LinkedList<WeightedPhraseInfo>) field.get(fieldPhraseList); assertEquals(5, list.get(0).getStartOffset()); assertEquals(9, list.get(0).getEndOffset()); } finally { Closeables.closeQuietly(searcher); } }
From source file:net.sourceforge.vaticanfetcher.model.search.HighlightServiceTest.java
License:Open Source License
@SuppressWarnings("unchecked") @Test//from w w w . ja v a 2s . c o m public void testPhraseHighlighter() throws Exception { // Create index Directory directory = new RAMDirectory(); Analyzer analyzer = new StandardAnalyzer(IndexRegistry.LUCENE_VERSION, Collections.EMPTY_SET); IndexWriterAdapter writer = new IndexWriterAdapter(directory); Document doc = new Document(); doc.add(new Field("content", "some text", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS)); writer.add(doc); Closeables.closeQuietly(writer); // flush unwritten documents into index // Perform phrase search QueryParser queryParser = new QueryParser(IndexRegistry.LUCENE_VERSION, "content", analyzer); Query query = queryParser.parse("\"text\""); FastVectorHighlighter highlighter = new FastVectorHighlighter(true, true, null, null); FieldQuery fieldQuery = highlighter.getFieldQuery(query); IndexSearcher searcher = new IndexSearcher(directory); TopDocs docs = searcher.search(query, 10); assertEquals(1, docs.scoreDocs.length); int docId = docs.scoreDocs[0].doc; // Get phrase highlighting offsets FieldTermStack fieldTermStack = new FieldTermStack(searcher.getIndexReader(), docId, "content", fieldQuery); FieldPhraseList fieldPhraseList = new FieldPhraseList(fieldTermStack, fieldQuery); java.lang.reflect.Field field = fieldPhraseList.getClass().getDeclaredField("phraseList"); field.setAccessible(true); LinkedList<WeightedPhraseInfo> list = (LinkedList<WeightedPhraseInfo>) field.get(fieldPhraseList); assertEquals(5, list.get(0).getStartOffset()); assertEquals(9, list.get(0).getEndOffset()); }