List of usage examples for org.apache.lucene.search IndexSearcher getIndexReader
public IndexReader getIndexReader()
From source file:dk.dbc.opensearch.fedora.search.LuceneFieldIndex.java
License:Open Source License
public int findHighestId(String namespace) throws IOException { TermQuery luceneQuery = new TermQuery(new Term(PID_NAMESPACE, namespace)); searchManager.maybeRefreshBlocking(); IndexSearcher localSearcher = searchManager.acquire(); try {/*from www. ja v a 2 s .c om*/ log.debug("Query: {}", luceneQuery.toString()); TopFieldDocs search = localSearcher.search(luceneQuery, 1, new Sort(new SortField(PID_INT, SortField.Type.INT, true))); if (search.scoreDocs.length > 0) { IndexReader localReader = localSearcher.getIndexReader(); Document document = localReader.document(search.scoreDocs[0].doc); IndexableField identifer = document.getField(PID_INT); if (identifer != null) { return identifer.numericValue().intValue(); } } return 0; } finally { searchManager.release(localSearcher); } }
From source file:dk.dbc.opensearch.fedora.search.LuceneFieldIndex.java
License:Open Source License
/** * For queries that are beforehand known to retrieve all (active) documents * from the index, this method can bypass the performance penalty of an * actual search, and simply return all documents from an IndexReader. * @return all PIDs in index as IPidList object * @throws IOException if IndexWriter or IndexReader throws an exception *//*from w w w .j a va2 s. com*/ IPidList getAll() throws IOException { IPidList results = null; searchManager.maybeRefreshBlocking(); IndexSearcher localSearcher = searchManager.acquire(); IndexReader localReader = localSearcher.getIndexReader(); try { PidCollector pidCollector = new PidCollector(pidCollectorMaxInMemory, pidCollectorTmpDir); for (AtomicReaderContext context : localReader.getContext().leaves()) { AtomicReader subReader = context.reader(); pidCollector.setNextReader(context); Bits liveDocs = subReader.getLiveDocs(); int numDocs = subReader.numDocs(); int numDelDocs = subReader.numDeletedDocs(); log.debug("getAll, reader has {} documents, {} deleted documents", numDocs, numDelDocs); for (int i = 0; i < numDocs + numDelDocs; i++) { if (liveDocs != null && !liveDocs.get(i)) { // Skip deleted documents log.trace("Skipping deleted document {}", i); continue; } log.trace("Getting doc id {}", i); pidCollector.collect(i); } } results = pidCollector.getResults(); } finally { searchManager.release(localSearcher); } return results; }
From source file:dk.defxws.fgslucene.Statement.java
License:Open Source License
public ResultSet executeQuery(IndexSearcher searcher, String queryString, int startRecord, int maxResults, int snippetsMax, int fieldMaxLength, Analyzer analyzer, String defaultQueryFields, boolean allowLeadingWildcard, boolean lowercaseExpandedTerms, String indexPath, String indexName, String snippetBegin, String snippetEnd, String sortFields) throws GenericSearchException { if (logger.isDebugEnabled()) logger.debug("executeQuery" + " query=" + queryString + " startRecord=" + startRecord + " maxResults=" + maxResults + " snippetsMax=" + snippetsMax + " fieldMaxLength=" + fieldMaxLength + " indexName=" + indexName + " sortFields=" + sortFields + " defaultQueryFields=" + defaultQueryFields + " allowLeadingWildcard=" + allowLeadingWildcard + " lowercaseExpandedTerms=" + lowercaseExpandedTerms); this.searcher = searcher; ResultSet rs = null;//from w ww . j a va 2s . c o m StringTokenizer defaultFieldNames = new StringTokenizer(defaultQueryFields); int countFields = defaultFieldNames.countTokens(); String[] defaultFields = new String[countFields]; for (int i = 0; i < countFields; i++) { defaultFields[i] = defaultFieldNames.nextToken(); } Query query = null; if (defaultFields.length == 1) { QueryParser queryParser = new QueryParser(Version.LUCENE_36, defaultFields[0], analyzer); queryParser.setAllowLeadingWildcard(allowLeadingWildcard); queryParser.setLowercaseExpandedTerms(lowercaseExpandedTerms); if (logger.isDebugEnabled()) logger.debug("executeQuery queryParser" + " allowLeadingWildcard=" + queryParser.getAllowLeadingWildcard() + " lowercaseExpandedTerms=" + queryParser.getLowercaseExpandedTerms()); try { query = queryParser.parse(queryString); } catch (ParseException e) { throw new GenericSearchException(e.toString()); } } else { MultiFieldQueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_36, defaultFields, analyzer); queryParser.setAllowLeadingWildcard(allowLeadingWildcard); queryParser.setLowercaseExpandedTerms(lowercaseExpandedTerms); if (logger.isDebugEnabled()) logger.debug("executeQuery mfqueryParser" + " allowLeadingWildcard=" + queryParser.getAllowLeadingWildcard() + " lowercaseExpandedTerms=" + queryParser.getLowercaseExpandedTerms()); try { query = queryParser.parse(queryString); } catch (ParseException e) { throw new GenericSearchException(e.toString()); } } if (logger.isDebugEnabled()) logger.debug("executeQuery after parse query=" + query); try { query.rewrite(searcher.getIndexReader()); } catch (Exception e) { throw new GenericSearchException(e.toString()); } if (logger.isDebugEnabled()) logger.debug("executeQuery after rewrite query=" + query); int start = Integer.parseInt(Integer.toString(startRecord)); TopDocs hits = getHits(query, start + maxResults - 1, sortFields); ScoreDoc[] docs = hits.scoreDocs; int end = Math.min(hits.totalHits, start + maxResults - 1); if (logger.isDebugEnabled()) logger.debug("executeQuery hits.totalHits=" + hits.totalHits); StringBuffer resultXml = new StringBuffer(); resultXml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); String queryStringEncoded = null; try { queryStringEncoded = URLEncoder.encode(queryString, "UTF-8"); } catch (UnsupportedEncodingException e) { errorExit(e.toString()); } resultXml.append("<lucenesearch " + " xmlns:dc=\"http://purl.org/dc/elements/1.1/" + "\" query=\"" + queryStringEncoded + "\" indexName=\"" + indexName + "\" sortFields=\"" + sortFields + "\" hitPageStart=\"" + startRecord + "\" hitPageSize=\"" + maxResults + "\" hitTotal=\"" + hits.totalHits + "\">"); ScoreDoc hit = null; Document doc = null; String hitsScore = null; for (int i = start; i <= end; i++) { try { hit = docs[i - 1]; doc = searcher.doc(hit.doc); hitsScore = "" + hit.score; } catch (CorruptIndexException e) { errorExit(e.toString()); } catch (IOException e) { errorExit(e.toString()); } resultXml.append("<hit no=\"" + i + "\" score=\"" + hitsScore + "\">"); for (ListIterator li = doc.getFields().listIterator(); li.hasNext();) { Fieldable f = (Fieldable) li.next(); resultXml.append("<field name=\"" + f.name() + "\""); String snippets = null; if (snippetsMax > 0) { SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("!!!SNIPPETBEGIN", "!!!SNIPPETEND"); QueryScorer scorer = new QueryScorer(query, f.name()); Highlighter highlighter = new Highlighter(formatter, scorer); Fragmenter fragmenter = new SimpleFragmenter(fieldMaxLength); highlighter.setTextFragmenter(fragmenter); TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(f.stringValue())); try { snippets = highlighter.getBestFragments(tokenStream, f.stringValue(), snippetsMax, " ... "); } catch (Exception e) { // all Exceptions to be caught, not just IOException errorExit(e.toString()); } snippets = checkTruncatedWords(snippets, " ... "); snippets = StreamUtility.enc(snippets); snippets = snippets.replaceAll("!!!SNIPPETBEGIN", snippetBegin); snippets = snippets.replaceAll("!!!SNIPPETEND", snippetEnd); if (snippets != null && !snippets.equals("")) { resultXml.append(" snippet=\"yes\">" + snippets); } } if (snippets == null || snippets.equals("")) if (fieldMaxLength > 0 && f.stringValue().length() > fieldMaxLength) { String snippet = f.stringValue().substring(0, fieldMaxLength); int iamp = snippet.lastIndexOf("&"); if (iamp > -1 && iamp > fieldMaxLength - 8) snippet = snippet.substring(0, iamp); resultXml.append(">" + StreamUtility.enc(snippet) + " ... "); } else resultXml.append(">" + StreamUtility.enc(f.stringValue())); resultXml.append("</field>"); } resultXml.append("</hit>"); } resultXml.append("</lucenesearch>"); if (logger.isDebugEnabled()) { int size = 500; if (resultXml.length() < size) size = resultXml.length(); String debugString = resultXml.substring(0, size); if (resultXml.length() > size) debugString += "..."; logger.debug("executeQuery resultXml=" + debugString); } rs = new ResultSet(resultXml); return rs; }
From source file:dk.defxws.fgssolr.Statement.java
License:Open Source License
public ResultSet executeQuery(IndexSearcher searcher, String queryString, int startRecord, int maxResults, int snippetsMax, int fieldMaxLength, Analyzer analyzer, String defaultQueryFields, String indexPath, String indexName, String snippetBegin, String snippetEnd, String sortFields) throws GenericSearchException { boolean allowLeadingWildcard = true; boolean lowercaseExpandedTerms = true; if (logger.isDebugEnabled()) logger.debug("executeQuery" + " query=" + queryString + " startRecord=" + startRecord + " maxResults=" + maxResults + " snippetsMax=" + snippetsMax + " fieldMaxLength=" + fieldMaxLength + " indexName=" + indexName + " sortFields=" + sortFields + " defaultQueryFields=" + defaultQueryFields + " allowLeadingWildcard=" + allowLeadingWildcard + " lowercaseExpandedTerms=" + lowercaseExpandedTerms); this.searcher = searcher; ResultSet rs = null;//from w ww. j a v a 2 s .c o m StringTokenizer defaultFieldNames = new StringTokenizer(defaultQueryFields); int countFields = defaultFieldNames.countTokens(); String[] defaultFields = new String[countFields]; for (int i = 0; i < countFields; i++) { defaultFields[i] = defaultFieldNames.nextToken(); } Query query = null; if (defaultFields.length == 1) { QueryParser queryParser = new QueryParser(Version.LUCENE_36, defaultFields[0], analyzer); queryParser.setAllowLeadingWildcard(allowLeadingWildcard); queryParser.setLowercaseExpandedTerms(lowercaseExpandedTerms); if (logger.isDebugEnabled()) logger.debug("executeQuery queryParser" + " allowLeadingWildcard=" + queryParser.getAllowLeadingWildcard() + " lowercaseExpandedTerms=" + queryParser.getLowercaseExpandedTerms()); try { query = queryParser.parse(queryString); } catch (ParseException e) { throw new GenericSearchException(e.toString()); } } else { MultiFieldQueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_36, defaultFields, analyzer); queryParser.setAllowLeadingWildcard(allowLeadingWildcard); queryParser.setLowercaseExpandedTerms(lowercaseExpandedTerms); if (logger.isDebugEnabled()) logger.debug("executeQuery mfqueryParser" + " allowLeadingWildcard=" + queryParser.getAllowLeadingWildcard() + " lowercaseExpandedTerms=" + queryParser.getLowercaseExpandedTerms()); try { query = queryParser.parse(queryString); } catch (ParseException e) { throw new GenericSearchException(e.toString()); } } if (logger.isDebugEnabled()) logger.debug("executeQuery after parse query=" + query); try { query.rewrite(searcher.getIndexReader()); } catch (Exception e) { throw new GenericSearchException(e.toString()); } if (logger.isDebugEnabled()) logger.debug("executeQuery after rewrite query=" + query); int start = Integer.parseInt(Integer.toString(startRecord)); TopDocs hits = getHits(query, start + maxResults - 1, sortFields); ScoreDoc[] docs = hits.scoreDocs; int end = Math.min(hits.totalHits, start + maxResults - 1); if (logger.isDebugEnabled()) logger.debug("executeQuery hits.totalHits=" + hits.totalHits); StringBuffer resultXml = new StringBuffer(); resultXml.append("<?xml version=\"1.0\" encoding=\"UTF-8\"?>"); String queryStringEncoded = null; try { queryStringEncoded = URLEncoder.encode(queryString, "UTF-8"); } catch (UnsupportedEncodingException e) { errorExit(e.toString()); } resultXml.append("<solrsearch " + " xmlns:dc=\"http://purl.org/dc/elements/1.1/" + "\" query=\"" + queryStringEncoded + "\" indexName=\"" + indexName + "\" sortFields=\"" + sortFields + "\" hitPageStart=\"" + startRecord + "\" hitPageSize=\"" + maxResults + "\" hitTotal=\"" + hits.totalHits + "\">"); ScoreDoc hit = null; Document doc = null; String hitsScore = null; for (int i = start; i <= end; i++) { try { hit = docs[i - 1]; doc = searcher.doc(hit.doc); hitsScore = "" + hit.score; } catch (CorruptIndexException e) { errorExit(e.toString()); } catch (IOException e) { errorExit(e.toString()); } resultXml.append("<hit no=\"" + i + "\" score=\"" + hitsScore + "\">"); for (ListIterator li = doc.getFields().listIterator(); li.hasNext();) { Fieldable f = (Fieldable) li.next(); resultXml.append("<field name=\"" + f.name() + "\""); String snippets = null; if (snippetsMax > 0) { SimpleHTMLFormatter formatter = new SimpleHTMLFormatter("!!!SNIPPETBEGIN", "!!!SNIPPETEND"); QueryScorer scorer = new QueryScorer(query, f.name()); Highlighter highlighter = new Highlighter(formatter, scorer); Fragmenter fragmenter = new SimpleFragmenter(fieldMaxLength); highlighter.setTextFragmenter(fragmenter); TokenStream tokenStream = analyzer.tokenStream(f.name(), new StringReader(f.stringValue())); try { snippets = highlighter.getBestFragments(tokenStream, f.stringValue(), snippetsMax, " ... "); } catch (Exception e) { // all Exceptions to be caught, not just IOException errorExit(e.toString()); } snippets = checkTruncatedWords(snippets, " ... "); snippets = StreamUtility.enc(snippets); snippets = snippets.replaceAll("!!!SNIPPETBEGIN", snippetBegin); snippets = snippets.replaceAll("!!!SNIPPETEND", snippetEnd); if (snippets != null && !snippets.equals("")) { resultXml.append(" snippet=\"yes\">" + snippets); } } if (snippets == null || snippets.equals("")) if (fieldMaxLength > 0 && f.stringValue().length() > fieldMaxLength) { String snippet = f.stringValue().substring(0, fieldMaxLength); int iamp = snippet.lastIndexOf("&"); if (iamp > -1 && iamp > fieldMaxLength - 8) snippet = snippet.substring(0, iamp); resultXml.append(">" + StreamUtility.enc(snippet) + " ... "); } else resultXml.append(">" + StreamUtility.enc(f.stringValue())); resultXml.append("</field>"); } resultXml.append("</hit>"); } resultXml.append("</solrsearch>"); if (logger.isDebugEnabled()) { int size = 500; if (resultXml.length() < size) size = resultXml.length(); String debugString = resultXml.substring(0, size); if (resultXml.length() > size) debugString += "..."; logger.debug("executeQuery resultXml=" + debugString); } rs = new ResultSet(resultXml); return rs; }
From source file:edu.isi.pfindr.learn.search.LuceneDictionaryAugmenter.java
License:Apache License
public String expandWithDictionaryFromTopLuceneIndexTerms(String data) { //System.out.println("Original data"+ data); StringBuilder dictionaryDataBuilder = new StringBuilder(); data = data.replaceAll("\\s+", " "); dictionaryDataBuilder.append(data);//w w w .j av a 2s.com try { //Construct the query //Query q = new QueryParser(Version.LUCENE_30, "id_content", analyzer).parse(data); //Query q = new QueryParser(Version.LUCENE_30, "content", analyzer).parse(data); //IndexReader indexReader = IndexReader.open(indexDir); IndexSearcher indexSearcher = new IndexSearcher(indexDir); QueryParser queryParser = new QueryParser(Version.LUCENE_30, "content", new StandardAnalyzer(Version.LUCENE_30, new File(ServletContextInfo.getContextPath() + stopWordsDirectory + "stopwords.txt"))); //queryParser.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = queryParser.parse(data); //Get the top hits TopScoreDocCollector collector = TopScoreDocCollector.create(HIT_COUNT, true); //Search dictionary index indexSearcher.search(query, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; //System.out.println("Hits retrieved:"+ hits.length); //Parse through the top hits (number of hits specified by HIT_COUNT above) and //collect the frequency of the terms in a Map Map<String, Double> termFreqMap = new HashMap<String, Double>(); double value; for (int i = 0; i < hits.length; ++i) { TermPositionVector v = (TermPositionVector) indexSearcher.getIndexReader() .getTermFreqVector(hits[i].doc, "content"); //.getTermFreqVector(hits[i].doc, "id_content"); String[] terms = v.getTerms(); int[] freq = v.getTermFrequencies(); double[] tfidf = new double[v.getTerms().length]; double termTotal = 0.0; int docTotal = indexSearcher.getIndexReader().numDocs(); for (int t = 0; t < terms.length; t++) { termTotal += freq[t]; } for (int j = 0; j < terms.length; ++j) { tfidf[j] = (double) (freq[j] / termTotal) * (1 + Math.log(docTotal / (1 + //(indexSearcher.getIndexReader().docFreq(new Term("id_contents", terms[j])))))); (indexSearcher.getIndexReader().docFreq(new Term("content", terms[j])))))); if (!termFreqMap.containsKey(terms[j])) {//if the map does not already contain the phenotype termFreqMap.put(terms[j], tfidf[j]); } else { //else add to the existing value value = termFreqMap.get(terms[j]).doubleValue() > tfidf[j] ? termFreqMap.get(terms[j]).doubleValue() : tfidf[j]; //value = ((Double)termFreqMap.get(terms[j])).doubleValue() + tfidf[j]; termFreqMap.put(terms[j], value); } } } //Append the original query term with the top (specified by MAX_DICTIONARY_TERMS) most frequent terms if (hits.length > 0) { value = 0; //reusing variable as an index now //System.out.println("Sorted Map......"); Map<String, String> sortedMap = SortMap.sortByComparator(termFreqMap); //Include the top 10 matches from the dictionary definition for (Map.Entry entry : sortedMap.entrySet()) { dictionaryDataBuilder.append(" ") .append((((String) entry.getKey()).replaceAll("\\t", " ")).replaceAll("\\s+", " ")); if (value++ > MAX_DICTIONARY_TERMS) //get the top 10 terms break; //System.out.println("Key : " + entry.getKey() //+ " Value : " + entry.getValue()); } } // close searcher, no need to access the documents any more. indexSearcher.close(); } catch (CorruptIndexException ce) { ce.printStackTrace(); } catch (IOException io) { io.printStackTrace(); } catch (Exception e) { e.printStackTrace(); } //System.out.println(" Expand word with dictionary .."+ dictionaryDataBuilder.toString()); return dictionaryDataBuilder.toString(); }
From source file:edu.mit.ll.vizlinc.highlight.WeightedSpanTermExtractor.java
License:Apache License
private IndexReader getReaderForField(String field) throws IOException { if (wrapToCaching && !cachedTokenStream && !(tokenStream instanceof CachingTokenFilter)) { tokenStream = new CachingTokenFilter(new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze)); cachedTokenStream = true;/* w ww . j a va2 s . c o m*/ } IndexReader reader = readers.get(field); if (reader == null) { MemoryIndex indexer = new MemoryIndex(); indexer.addField(field, new OffsetLimitTokenFilter(tokenStream, maxDocCharsToAnalyze)); tokenStream.reset(); IndexSearcher searcher = indexer.createSearcher(); reader = searcher.getIndexReader(); readers.put(field, reader); } return reader; }
From source file:Example.lucene.SearchNHilight.java
public static void main(String[] args) throws IOException, ParseException, InvalidTokenOffsetsException { //... Above, create documents with two fields, one with term vectors (tv) and one without (notv) Analyzer analyzer = new ThaiAnalyzer(Version.LUCENE_45); Directory index = FSDirectory.open(new File("data/indexing")); String querystr = args.length > 0 ? args[0] : "golf user"; // the "title" arg specifies the default field to use // when no field is explicitly specified in the query. Query query = new MultiFieldQueryParser(Version.LUCENE_45, new String[] { "content" }, analyzer) .parse(querystr);//from ww w. j av a 2 s . c o m // 3. search int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopDocs hits = searcher.search(query, 10); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); String Preview; for (int i = 0; i < 10; i++) { int id = hits.scoreDocs[i].doc; Document doc = searcher.doc(id); String text; Preview = ""; System.out.println(doc.get("url")); System.out.println(doc.get("title")); text = doc.get("content"); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content", analyzer); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "..."); int k = 0; for (TextFragment frag1 : frag) { if ((frag1 != null) && (frag1.getScore() > 0)) { Preview += (frag1.toString()) + "...<br>"; k++; // Get 2 Line Preview if (k >= 2) break; } } //Term vector System.out.println("-------------"); } }
From source file:focusedCrawler.util.persistence.Searcher.java
License:Open Source License
public TermEnum listElements(String indexDir) throws IOException { try {//from www. jav a2 s . c o m Directory fsDir = FSDirectory.open(new File(indexDir)); IndexSearcher is = new IndexSearcher(fsDir); IndexReader reader = is.getIndexReader(); return reader.terms(); } catch (java.io.FileNotFoundException ex) { return null; } }
From source file:invertedindex.ReadingIndex.java
public Map<String, Set<Integer>> printingIndex() throws IOException { try {/*from w w w . ja v a2 s. com*/ MatchAllDocsQuery query = new MatchAllDocsQuery(); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(getIndexLocation()))); IndexSearcher searcher = new IndexSearcher(reader); TopDocs hits = searcher.search(query, Integer.MAX_VALUE); Map<String, Set<Integer>> invertedIndex = new HashMap<>(); if (null == hits.scoreDocs || hits.scoreDocs.length <= 0) { System.out.println("No Hits Found with MatchAllDocsQuery"); return null; } for (ScoreDoc hit : hits.scoreDocs) { Document doc = searcher.doc(hit.doc); List<IndexableField> allFields = doc.getFields(); for (IndexableField field : allFields) { //Single document inverted index Terms terms = searcher.getIndexReader().getTermVector(hit.doc, field.name()); if (terms != null) { TermsEnum termsEnum = terms.iterator(null); while (termsEnum.next() != null) { if (invertedIndex.containsKey(termsEnum.term().utf8ToString())) { Set<Integer> existingDocs = invertedIndex.get(termsEnum.term().utf8ToString()); existingDocs.add(hit.doc); // existingDocs.add(Integer.parseInt((searcher.doc(hit.doc).get("lineNumber")))); invertedIndex.put(termsEnum.term().utf8ToString(), existingDocs); } else { Set<Integer> docs = new TreeSet<>(); docs.add(hit.doc); // docs.add(Integer.parseInt((searcher.doc(hit.doc).get("lineNumber")))); invertedIndex.put(termsEnum.term().utf8ToString(), docs); } } } } } //System.out.println("Printing Inverted Index:"); //invertedIndex.forEach((key , value) -> {System.out.println(key+":"+value); //}); return invertedIndex; } catch (Exception e) { return null; } }
From source file:invertedindex.SearchIndex.java
public ArrayList<SearchResults> search(String keyword) throws IOException { String indexLocation = this.getIndexLocation(); // System.out.println("Inside search method"); // indexLocation = ""; // BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); // while (true) { try {/*from w w w.java2 s. c om*/ IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation))); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(topDocs, true); String query = keyword; query = "\"" + query + "\""; Query q = new QueryParser(Version.LUCENE_47, "contents", analyzer).parse(query); SimpleFragListBuilder fragListBuilder = new SimpleFragListBuilder(); ScoreOrderFragmentsBuilder fragBuilder = new ScoreOrderFragmentsBuilder(); FastVectorHighlighter fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, fragListBuilder, fragBuilder); fvh = new FastVectorHighlighter(FastVectorHighlighter.DEFAULT_PHRASE_HIGHLIGHT, FastVectorHighlighter.DEFAULT_FIELD_MATCH, fragListBuilder, fragBuilder); // System.out.println(q); // searcher.search(q, collector); // searcher.search(q, null,topDocs); ScoreDoc[] hits = collector.topDocs().scoreDocs; // 4. display results System.out.println("Found " + hits.length + " hits."); totalHits = hits.length; searchResulsAL = new ArrayList<>(); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; FieldQuery fq = fvh.getFieldQuery(q); // System.out.println("fq "+fq); String[] fragments = fvh.getBestFragments(fq, searcher.getIndexReader(), docId, "contents", 50, 10); //String[] lineFragments = fvh.getBestFragments(fq, searcher.getIndexReader(), docId, "contents", 18,10); Document d = searcher.doc(docId); String filePath = d.get("path"); for (int j = 0; j < fragments.length; j++) { // System.out.println("FRAGMENT iS "+fragments[j]); // int k=0; // for(k=0;k<lineFragments.length;k++){ // fragments[j].getSc String temp = Jsoup.parse(fragments[j]).text(); // LineNumberSearcher lns = new LineNumberSearcher(); //lineNumbersList = new ArrayList<>(); lineNumberArrayList = new ArrayList<>(); lineNumber = "null"; boolean g = Pattern.compile("\\n").matcher(fragments[j]).find(); if (!g) { // System.out.println("NO G"); lineNumbersList = lns.search(temp, filePath); // for(String s : lineNumbersList){ // System.out.println("s is "+s); // } // if(lineNumbersList.get(0).isEmpty()){ // lineNumber = "Not Found"; // }else { if (!lineNumbersList.isEmpty()) { // System.out.println("in line number"); lineNumber = lineNumbersList.get(0); } // } } //here is the tried code for enter space /* else{ System.out.println("YES G"); String lines[] = fragments[j].split("\\r?\\n"); // ArrayList<String> newLines = new ArrayList<>(); ArrayList<String> newLines = new ArrayList<>(Arrays.asList(lines)); System.out.println("Here 3"); int special = 0; for(String line : newLines){ if(Pattern.compile("^$").matcher(line).find()){ newLines.remove(line); special++; } } System.out.println("Here 4"); // List<String> list = Arrays.asList(lines); // if(list.contains(temp)){ // // } // for(String line: newLines){ // System.out.println("LINE IS "+line); // } if(newLines.size()==1){ // System.out.println("Yes G but NOT G"); lineNumbersList = lns.search(temp,filePath); if(!lineNumberArrayList.isEmpty()){ lineNumber = lineNumbersList.get(0); } System.out.println("Here 1"); }else{ System.out.println("Here 2"); ArrayList<String> a0 = lns.search(Jsoup.parse(newLines.get(0)).text(),filePath); ArrayList<String> a1 = lns.search(Jsoup.parse(newLines.get(1)).text(),filePath); int k,l; outerloop: for(k=0;k<a0.size();k++){ for(l=0;l<a1.size();l++){ int secondline = Integer.parseInt(a1.get(l)); // System.out.println("second line is"+ secondline); int firstline = Integer.parseInt(a0.get(k)); // System.out.println("first line is"+firstline); int diff = secondline - firstline; // System.out.println("DIFFERENCE IS "+diff); // System.out.println("Special IS "+special); if(diff == special+1){ insideLoopFlag = true; // System.out.println("K IS "+k); // System.out.println("IN BREAK "); break outerloop; } } // System.out.println("K IS "+k); } // System.out.println("OUT OF FOR LOOP"); // System.out.println("K IS "+k); if(insideLoopFlag==true){ lineNumber = String.valueOf(a0.get(k)); } // System.out.println("LINE NUMBER IS "+lineNumber); } } */ // } fragments[j] = fragments[j].replaceAll("\\n", " "); // System.out.println("\t\t" + fragments[j] + "..."); fragments[j] = fragments[j] + "...."; if (!(lineNumber.equals("null"))) { // System.out.println("in line number"); fragments[j] = fragments[j] + " at Line " + lineNumber; } } //Setting Results SearchResults sr = new SearchResults(); sr.setFilename(d.get("filename")); sr.setScore(hits[i].score); sr.setFragments(fragments); sr.setPath(filePath); sr.setContentType(d.get("contentType")); // sr.setLineNumber(lineNumber); searchResulsAL.add(sr); // } // writer.close(); reader.close(); } catch (Exception e) { System.out.println("Error searching in search index " + e + " : " + e.getMessage()); // break; } // } return searchResulsAL; }