Example usage for org.apache.lucene.search.vectorhighlight FastVectorHighlighter FastVectorHighlighter

List of usage examples for org.apache.lucene.search.vectorhighlight FastVectorHighlighter FastVectorHighlighter

Introduction

In this page you can find the example usage for org.apache.lucene.search.vectorhighlight FastVectorHighlighter FastVectorHighlighter.

Prototype

public FastVectorHighlighter(boolean phraseHighlight, boolean fieldMatch, FragListBuilder fragListBuilder,
        FragmentsBuilder fragmentsBuilder) 

Source Link

Document

a constructor.

Usage

From source file:aos.lucene.tools.FastVectorHighlighterSample.java

License:Apache License

static FastVectorHighlighter getHighlighter() {
    FragListBuilder fragListBuilder = new SimpleFragListBuilder(); // #F
    FragmentsBuilder fragmentBuilder = // #F
            new ScoreOrderFragmentsBuilder( // #F
                    BaseFragmentsBuilder.COLORED_PRE_TAGS, // #F
                    BaseFragmentsBuilder.COLORED_POST_TAGS); // #F
    return new FastVectorHighlighter(true, true, // #F
            fragListBuilder, fragmentBuilder); // #F
}

From source file:invertedindex.SearchIndex.java

public ArrayList<SearchResults> search(String keyword) throws IOException {

    String indexLocation = this.getIndexLocation();
    //    System.out.println("Inside search method");

    //        indexLocation = "";
    //        BufferedReader br = new BufferedReader(new InputStreamReader(System.in));
    //        while (true) {
    try {/* w  w  w.  j a  va 2s.  c  o  m*/
        IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation)));
        IndexSearcher searcher = new IndexSearcher(reader);
        TopScoreDocCollector collector = TopScoreDocCollector.create(topDocs, true);

        String query = keyword;

        query = "\"" + query + "\"";

        Query q = new QueryParser(Version.LUCENE_47, "contents", analyzer).parse(query);

        SimpleFragListBuilder fragListBuilder = new SimpleFragListBuilder();
        ScoreOrderFragmentsBuilder fragBuilder = new ScoreOrderFragmentsBuilder();
        FastVectorHighlighter fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT,
                FastVectorHighlighter.DEFAULT_FIELD_MATCH, fragListBuilder, fragBuilder);
        fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT,
                FastVectorHighlighter.DEFAULT_FIELD_MATCH, fragListBuilder, fragBuilder);

        //      System.out.println(q);

        //                

        searcher.search(q, collector);
        //                searcher.search(q, null,topDocs);
        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        // 4. display results
        System.out.println("Found " + hits.length + " hits.");
        totalHits = hits.length;
        searchResulsAL = new ArrayList<>();

        for (int i = 0; i < hits.length; ++i) {
            int docId = hits[i].doc;
            FieldQuery fq = fvh.getFieldQuery(q);
            //                    System.out.println("fq "+fq);

            String[] fragments = fvh.getBestFragments(fq, searcher.getIndexReader(), docId, "contents", 50, 10);
            //String[] lineFragments = fvh.getBestFragments(fq, searcher.getIndexReader(), docId, "contents", 18,10);

            Document d = searcher.doc(docId);

            String filePath = d.get("path");

            for (int j = 0; j < fragments.length; j++) {
                //                                    System.out.println("FRAGMENT iS "+fragments[j]);
                //                                int k=0;
                //                                for(k=0;k<lineFragments.length;k++){
                //                                    fragments[j].getSc
                String temp = Jsoup.parse(fragments[j]).text();
                //                                    
                LineNumberSearcher lns = new LineNumberSearcher();

                //lineNumbersList = new ArrayList<>();
                lineNumberArrayList = new ArrayList<>();
                lineNumber = "null";
                boolean g = Pattern.compile("\\n").matcher(fragments[j]).find();
                if (!g) {
                    //                                        System.out.println("NO G");
                    lineNumbersList = lns.search(temp, filePath);
                    //                                        for(String s : lineNumbersList){
                    //                                            System.out.println("s is "+s);
                    //                                        }
                    //                                        if(lineNumbersList.get(0).isEmpty()){
                    //                                            lineNumber = "Not Found";
                    //                                        }else {
                    if (!lineNumbersList.isEmpty()) {
                        //                                                System.out.println("in line number");
                        lineNumber = lineNumbersList.get(0);
                    }

                    //                                        }

                }

                //here is the tried code for enter space
                /*       
                else{
                  System.out.println("YES G");
                   String lines[] = fragments[j].split("\\r?\\n");
                //                                         ArrayList<String> newLines = new ArrayList<>();
                  ArrayList<String> newLines = new ArrayList<>(Arrays.asList(lines));
                  System.out.println("Here 3");
                  int special = 0;
                   for(String line : newLines){
                       if(Pattern.compile("^$").matcher(line).find()){
                           newLines.remove(line);
                           special++;
                       }
                   }
                   System.out.println("Here 4");
                //                                          List<String> list = Arrays.asList(lines);
                //                                          if(list.contains(temp)){
                //                                              
                //                                          }
                          
                //                                        for(String line: newLines){
                //                                            System.out.println("LINE IS "+line);
                //                                        }
                  if(newLines.size()==1){
                //                                            System.out.println("Yes G but NOT G");
                      lineNumbersList = lns.search(temp,filePath);
                      if(!lineNumberArrayList.isEmpty()){
                          lineNumber = lineNumbersList.get(0);
                      }
                      System.out.println("Here 1");
                  }else{
                      System.out.println("Here 2");
                          ArrayList<String> a0 = lns.search(Jsoup.parse(newLines.get(0)).text(),filePath);
                  ArrayList<String> a1 = lns.search(Jsoup.parse(newLines.get(1)).text(),filePath);
                  int k,l;
                  outerloop:
                  for(k=0;k<a0.size();k++){
                      for(l=0;l<a1.size();l++){
                          int secondline = Integer.parseInt(a1.get(l));
                //                                                System.out.println("second line is"+ secondline);
                          int firstline = Integer.parseInt(a0.get(k));
                //                                                System.out.println("first line is"+firstline);
                          int diff = secondline - firstline;
                //                                                System.out.println("DIFFERENCE IS "+diff);
                //                                                System.out.println("Special IS "+special);
                          if(diff == special+1){
                              insideLoopFlag = true;
                //                                                    System.out.println("K IS "+k);
                //                                                    System.out.println("IN BREAK ");
                              break outerloop;
                          }
                      }
                //                                            System.out.println("K IS "+k);
                  }
                //                                        System.out.println("OUT OF FOR LOOP");
                //                                        System.out.println("K IS "+k);
                  if(insideLoopFlag==true){
                  lineNumber = String.valueOf(a0.get(k));
                  }
                //                                        System.out.println("LINE NUMBER IS "+lineNumber);
                  }
                          
                          
                }
                */

                //                                }
                fragments[j] = fragments[j].replaceAll("\\n", " ");
                //                                System.out.println("\t\t" + fragments[j] + "...");
                fragments[j] = fragments[j] + "....";
                if (!(lineNumber.equals("null"))) {
                    //                                    System.out.println("in line number");
                    fragments[j] = fragments[j] + " at Line " + lineNumber;
                }

            }

            //Setting Results
            SearchResults sr = new SearchResults();
            sr.setFilename(d.get("filename"));
            sr.setScore(hits[i].score);
            sr.setFragments(fragments);
            sr.setPath(filePath);
            sr.setContentType(d.get("contentType"));
            //                    sr.setLineNumber(lineNumber);

            searchResulsAL.add(sr);

            //                    
        }
        //      writer.close();
        reader.close();

    } catch (Exception e) {
        System.out.println("Error searching in search index " + e + " : " + e.getMessage());
        //      break;
    }

    //    }

    return searchResulsAL;

}

From source file:invertedindex.SearchIndex.java

public ArrayList<SearchResults> multipleSearch(String keyword1, String keyword2, String radio)
        throws IOException {

    String indexLocation = this.getIndexLocation();

    try {/*from w w w  .  jav  a2s .  c  o m*/
        IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation)));
        IndexSearcher searcher = new IndexSearcher(reader);
        TopScoreDocCollector collector = TopScoreDocCollector.create(topDocs, true);

        String query1 = keyword1;
        String query2 = keyword2;
        query1 = "\"" + query1 + "\"";
        query2 = "\"" + query2 + "\"";

        Query q1 = new QueryParser(Version.LUCENE_47, "contents", analyzer).parse(query1);
        Query q2 = new QueryParser(Version.LUCENE_47, "contents", analyzer).parse(query2);

        BooleanQuery apiQuery = new BooleanQuery();
        if (radio.equalsIgnoreCase("and")) {
            apiQuery.add(q1, BooleanClause.Occur.MUST);
            apiQuery.add(q2, BooleanClause.Occur.MUST);
        } else if (radio.equalsIgnoreCase("or")) {
            apiQuery.add(q1, BooleanClause.Occur.SHOULD);
            apiQuery.add(q2, BooleanClause.Occur.SHOULD);
        } else if (radio.equalsIgnoreCase("not")) {
            apiQuery.add(q1, BooleanClause.Occur.MUST);
            apiQuery.add(q2, BooleanClause.Occur.MUST_NOT);
        }

        SimpleFragListBuilder fragListBuilder = new SimpleFragListBuilder();
        ScoreOrderFragmentsBuilder fragBuilder = new ScoreOrderFragmentsBuilder();
        FastVectorHighlighter fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT,
                FastVectorHighlighter.DEFAULT_FIELD_MATCH, fragListBuilder, fragBuilder);
        fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT,
                FastVectorHighlighter.DEFAULT_FIELD_MATCH, fragListBuilder, fragBuilder);

        searcher.search(apiQuery, collector);

        ScoreDoc[] hits = collector.topDocs().scoreDocs;

        System.out.println("Found " + hits.length + " hits.");
        totalHits = hits.length;
        searchResulsAL = new ArrayList<>();

        for (int i = 0; i < hits.length; ++i) {
            int docId = hits[i].doc;
            FieldQuery fq = fvh.getFieldQuery(apiQuery);
            //                    

            String[] fragments = fvh.getBestFragments(fq, searcher.getIndexReader(), docId, "contents", 50, 10);

            Document d = searcher.doc(docId);
            //                    
            String filePath = d.get("path");

            for (int j = 0; j < fragments.length; j++) {

                String temp = Jsoup.parse(fragments[j]).text();
                //                                 
                LineNumberSearcher lns = new LineNumberSearcher();

                //lineNumbersList = new ArrayList<>();
                lineNumber = "null";
                lineNumberArrayList = new ArrayList<>();
                boolean g = Pattern.compile("\\n").matcher(fragments[j]).find();
                if (!g) {
                    //                                        System.out.println("NO G g");
                    lineNumbersList = lns.search(temp, filePath);
                    //                                        for(String s : lineNumbersList){
                    //                                            System.out.println("s is "+s);
                    //                                        }
                    //                                      
                    if (!lineNumbersList.isEmpty()) {
                        //                                                System.out.println("in line number");
                        lineNumber = lineNumbersList.get(0);
                    }

                }

                fragments[j] = fragments[j].replaceAll("\\n", " ");
                //                                System.out.println("\t\t" + fragments[j] + "...");
                fragments[j] = fragments[j] + " ....";
                if (!(lineNumber.equals("null"))) {
                    //                                    System.out.println("in line number");
                    fragments[j] = fragments[j] + " at Line " + lineNumber;
                }

            }

            SearchResults sr = new SearchResults();
            sr.setFilename(d.get("filename"));
            sr.setScore(hits[i].score);
            sr.setFragments(fragments);
            sr.setPath(filePath);
            sr.setContentType(d.get("contentType"));

            searchResulsAL.add(sr);

        }

        reader.close();

    } catch (Exception e) {
        System.out.println("Error searching in search index " + e + " : " + e.getMessage());

    }

    return searchResulsAL;

}

From source file:net.sourceforge.docfetcher.model.search.HighlightService.java

License:Open Source License

@MutableCopy
@NotNull/*www. j  a  va 2 s.  c om*/
@SuppressWarnings("unchecked")
private static List<Range> highlightPhrases(@NotNull Query query, @NotNull String text)
        throws CheckedOutOfMemoryError {
    // FastVectorHighlighter only supports TermQuery, PhraseQuery and BooleanQuery
    FastVectorHighlighter highlighter = new FastVectorHighlighter(true, true, null, null);
    FieldQuery fieldQuery = highlighter.getFieldQuery(query);
    Directory directory = new RAMDirectory();
    try {
        /*
         * Hack: We have to put the given text in a RAM index, because the
         * fast-vector highlighter can only work on index readers
         */
        IndexWriterAdapter writer = new IndexWriterAdapter(directory);
        Document doc = new Document();
        doc.add(Fields.createContent(text, true)); // must store token positions and offsets
        writer.add(doc);
        Closeables.closeQuietly(writer); // flush unwritten documents into index
        IndexReader indexReader = IndexReader.open(directory);

        // This might throw an OutOfMemoryError
        FieldTermStack fieldTermStack = new FieldTermStack(indexReader, 0, Fields.CONTENT.key(), fieldQuery);

        FieldPhraseList fieldPhraseList = new FieldPhraseList(fieldTermStack, fieldQuery);

        // Hack: We'll use reflection to access a private field
        java.lang.reflect.Field field = fieldPhraseList.getClass().getDeclaredField("phraseList");
        field.setAccessible(true);
        LinkedList<WeightedPhraseInfo> infoList = (LinkedList<WeightedPhraseInfo>) field.get(fieldPhraseList);

        List<Range> ranges = new ArrayList<Range>(infoList.size());
        for (WeightedPhraseInfo phraseInfo : infoList) {
            int start = phraseInfo.getStartOffset();
            int end = phraseInfo.getEndOffset();
            ranges.add(new Range(start, end - start));
        }
        return ranges;
    } catch (OutOfMemoryError e) {
        throw new CheckedOutOfMemoryError(e);
    } catch (Exception e) {
        return new ArrayList<Range>(0);
    }
}

From source file:net.sourceforge.docfetcher.model.search.HighlightServiceTest.java

License:Open Source License

@SuppressWarnings("unchecked")
@Test//  w ww  .jav a2  s. c  om
public void testPhraseHighlighter() throws Exception {
    // Create index
    Directory directory = new RAMDirectory();
    Analyzer analyzer = new StandardAnalyzer(IndexRegistry.LUCENE_VERSION, Collections.EMPTY_SET);
    IndexWriterAdapter writer = new IndexWriterAdapter(directory);
    Document doc = new Document();
    doc.add(new Field("content", "some text", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
    writer.add(doc);
    Closeables.closeQuietly(writer); // flush unwritten documents into index

    // Perform phrase search
    QueryParser queryParser = new QueryParser(IndexRegistry.LUCENE_VERSION, "content", analyzer);
    Query query = queryParser.parse("\"text\"");
    FastVectorHighlighter highlighter = new FastVectorHighlighter(true, true, null, null);
    FieldQuery fieldQuery = highlighter.getFieldQuery(query);
    IndexSearcher searcher = null;
    try {
        searcher = new IndexSearcher(directory);
        TopDocs docs = searcher.search(query, 10);
        assertEquals(1, docs.scoreDocs.length);
        int docId = docs.scoreDocs[0].doc;

        // Get phrase highlighting offsets
        FieldTermStack fieldTermStack = new FieldTermStack(searcher.getIndexReader(), docId, "content",
                fieldQuery);
        FieldPhraseList fieldPhraseList = new FieldPhraseList(fieldTermStack, fieldQuery);
        java.lang.reflect.Field field = fieldPhraseList.getClass().getDeclaredField("phraseList");
        field.setAccessible(true);
        LinkedList<WeightedPhraseInfo> list = (LinkedList<WeightedPhraseInfo>) field.get(fieldPhraseList);
        assertEquals(5, list.get(0).getStartOffset());
        assertEquals(9, list.get(0).getEndOffset());
    } finally {
        Closeables.closeQuietly(searcher);
    }
}

From source file:net.sourceforge.vaticanfetcher.model.search.HighlightServiceTest.java

License:Open Source License

@SuppressWarnings("unchecked")
@Test//from w  w w . ja v a 2s  .  c  o m
public void testPhraseHighlighter() throws Exception {
    // Create index
    Directory directory = new RAMDirectory();
    Analyzer analyzer = new StandardAnalyzer(IndexRegistry.LUCENE_VERSION, Collections.EMPTY_SET);
    IndexWriterAdapter writer = new IndexWriterAdapter(directory);
    Document doc = new Document();
    doc.add(new Field("content", "some text", Store.NO, Index.ANALYZED, TermVector.WITH_POSITIONS_OFFSETS));
    writer.add(doc);
    Closeables.closeQuietly(writer); // flush unwritten documents into index

    // Perform phrase search
    QueryParser queryParser = new QueryParser(IndexRegistry.LUCENE_VERSION, "content", analyzer);
    Query query = queryParser.parse("\"text\"");
    FastVectorHighlighter highlighter = new FastVectorHighlighter(true, true, null, null);
    FieldQuery fieldQuery = highlighter.getFieldQuery(query);
    IndexSearcher searcher = new IndexSearcher(directory);
    TopDocs docs = searcher.search(query, 10);
    assertEquals(1, docs.scoreDocs.length);
    int docId = docs.scoreDocs[0].doc;

    // Get phrase highlighting offsets
    FieldTermStack fieldTermStack = new FieldTermStack(searcher.getIndexReader(), docId, "content", fieldQuery);
    FieldPhraseList fieldPhraseList = new FieldPhraseList(fieldTermStack, fieldQuery);
    java.lang.reflect.Field field = fieldPhraseList.getClass().getDeclaredField("phraseList");
    field.setAccessible(true);
    LinkedList<WeightedPhraseInfo> list = (LinkedList<WeightedPhraseInfo>) field.get(fieldPhraseList);
    assertEquals(5, list.get(0).getStartOffset());
    assertEquals(9, list.get(0).getEndOffset());
}