List of usage examples for org.apache.lucene.search IndexSearcher doc
public Document doc(int docID) throws IOException
.getIndexReader().document(docID)
From source file:Example.lucene.SearchNHilight.java
public static void main(String[] args) throws IOException, ParseException, InvalidTokenOffsetsException { //... Above, create documents with two fields, one with term vectors (tv) and one without (notv) Analyzer analyzer = new ThaiAnalyzer(Version.LUCENE_45); Directory index = FSDirectory.open(new File("data/indexing")); String querystr = args.length > 0 ? args[0] : "golf user"; // the "title" arg specifies the default field to use // when no field is explicitly specified in the query. Query query = new MultiFieldQueryParser(Version.LUCENE_45, new String[] { "content" }, analyzer) .parse(querystr);//from w w w . j a va 2 s . com // 3. search int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopDocs hits = searcher.search(query, 10); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); String Preview; for (int i = 0; i < 10; i++) { int id = hits.scoreDocs[i].doc; Document doc = searcher.doc(id); String text; Preview = ""; System.out.println(doc.get("url")); System.out.println(doc.get("title")); text = doc.get("content"); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content", analyzer); TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "..."); int k = 0; for (TextFragment frag1 : frag) { if ((frag1 != null) && (frag1.getScore() > 0)) { Preview += (frag1.toString()) + "...<br>"; k++; // Get 2 Line Preview if (k >= 2) break; } } //Term vector System.out.println("-------------"); } }
From source file:Example.lucene.TestIndexer.java
public static void main(String[] args) throws IOException, ParseException { // 0. Specify the analyzer for tokenizing text. // The same analyzer should be used for indexing and searching Analyzer analyzer = new ThaiAnalyzer(Version.LUCENE_45); String InDirName = "data/test_snipped"; File InDir = new File(InDirName); // 1. create the index Directory index = FSDirectory.open(new File("data/indexingonly")); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45, analyzer); try (IndexWriter w = new IndexWriter(index, config)) { String[] s;/*from w ww . ja va 2 s .co m*/ int id = 1; for (File f : InDir.listFiles()) { try (ArcReader ar = new ArcReader(f)) { System.out.println(f.getName()); while (ar.Next()) { s = ar.Record.ArchiveContent.split("\n"); switch (s.length) { case 2: addDoc(w, id++, ar.Record.URL, s[0], s[1]); break; case 1: addDoc(w, id++, ar.Record.URL, s[0], ""); break; default: break; } } } } } // 2. query String querystr = args.length > 0 ? args[0] : ""; // the "title" arg specifies the default field to use // when no field is explicitly specified in the query. Query q = new QueryParser(Version.LUCENE_45, "title", analyzer).parse(querystr); // 3. search int hitsPerPage = 10; try (IndexReader reader = DirectoryReader.open(index)) { IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; // 4. display results System.out.println("Found " + hits.length + " hits."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out.println((i + 1) + ". " + d.get("id") + "\t" + d.get("url") + "\t" + d.get("title") + "\t" + d.get("content")); } } }
From source file:Example.lucene.TestSearch.java
public static void main(String[] args) throws ParseException, IOException { Analyzer analyzer = new ThaiAnalyzer(Version.LUCENE_45); Directory index = FSDirectory.open(new File("data/indexingonly")); // 2. query//from w ww .j a v a 2 s . c o m String querystr = args.length > 0 ? args[0] : "golf user"; // the "title" arg specifies the default field to use // when no field is explicitly specified in the query. Query q = new MultiFieldQueryParser(Version.LUCENE_45, new String[] { "content" }, analyzer) .parse(querystr); // 3. search int hitsPerPage = 10; try (IndexReader reader = DirectoryReader.open(index)) { IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage, true); searcher.search(q, collector); TopDocs td = collector.topDocs(5); ScoreDoc[] hits = td.scoreDocs; // 4. display results System.out.println("Found " + hits.length + " hits. from " + td.totalHits + " docs."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out.println((i + 1) + ". " + d.get("id") + "\t" + d.get("url") + "\t" + d.get("title") + "\t" + d.get("content")); } } }
From source file:FindIO.TextIndex.java
License:Apache License
public Map<String, double[]> searchText(String queryString) throws Exception { List<String> terms = Arrays.asList(queryString.trim().split(" ")); IndexReader reader = DirectoryReader.open(FSDirectory.open(indexFile)); IndexSearcher searcher = new IndexSearcher(reader); // :Post-Release-Update-Version.LUCENE_XY: Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null;/*w w w .j a v a 2 s . co m*/ in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); // :Post-Release-Update-Version.LUCENE_XY: QueryParser parser = new QueryParser(fieldname1, analyzer); Query query = parser.parse(queryString); if (test) System.out.println("Searching for text: " + query.toString(fieldname1)); TopDocs topDocs; if (test) { // repeat & time as benchmark long start = System.currentTimeMillis(); topDocs = searcher.search(query, null, Common.topK); long end = System.currentTimeMillis(); System.out.println("Time: " + (end - start) + " ms"); } else { topDocs = searcher.search(query, null, Common.topK); } ScoreDoc[] hits = topDocs.scoreDocs; Map<String, double[]> mapResults = new HashMap<String, double[]>(); //print out the top hits documents for (ScoreDoc hit : hits) { Document doc = searcher.doc(hit.doc); String tag = doc.get(fieldname1); int index = terms.indexOf(tag); if (index == -1) { continue; } String[] images = doc.get(fieldname2).split("\\s+"); for (int i = 0; i < images.length; i += 2) { String imageName = images[i]; String freq = images[i + 1]; if (mapResults.get(imageName) == null) { mapResults.put(imageName, new double[terms.size()]); } double[] docTerms = mapResults.get(imageName); docTerms[index] = Double.parseDouble(freq); } } reader.close(); return mapResults; }
From source file:FindIO.TextIndex.java
License:Apache License
/** * update score mainly used for relevance feedback, the input should be stemmed * @param imageID//w ww.j a va 2 s . c o m * @param tag_score_pairs * @throws Throwable */ public void updateScore(String imageID, List<FindIOPair> tag_score_pairs) throws Throwable { IndexReader reader = DirectoryReader.open(FSDirectory.open(indexFile)); IndexSearcher searcher = new IndexSearcher(reader); // :Post-Release-Update-Version.LUCENE_XY: Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null; in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); // :Post-Release-Update-Version.LUCENE_XY: QueryParser parser = new QueryParser(fieldname1, analyzer); for (FindIOPair pair : tag_score_pairs) { String tag = pair.getID(); double add_score = pair.getValue(); Query query = parser.parse(tag); System.out.println("Updating Text: " + query.toString(fieldname1)); TopDocs topDocs; if (test) { // repeat & time as benchmark long start = System.currentTimeMillis(); topDocs = searcher.search(query, null, Common.topK); long end = System.currentTimeMillis(); System.out.println("Time: " + (end - start) + " ms"); } else { topDocs = searcher.search(query, null, Common.topK); } ScoreDoc[] hits = topDocs.scoreDocs; if (hits.length == 0) { //It's a new tag Document doc = new Document(); String img_score = imageID + " " + (0.1 * add_score) + " "; if (add_score > 0) { // set fields for document this.tag_field.setStringValue(this.textAnalyzer.getStem(tag)); this.img_field.setStringValue(img_score); doc.add(tag_field); doc.add(img_field); MMwriter.addDocument(doc); } } else { //The tag is included in the index int docId = hits[0].doc; //retrieve the old document Document doc = searcher.doc(docId); //replacement field value String currentScores = doc.get(fieldname2); String[] img_score_pairs = currentScores.split(" "); StringBuilder stringBuilder = new StringBuilder(); boolean isImageContained = false; for (int i = 0; i < img_score_pairs.length; i += 2) { String img = img_score_pairs[i]; double old_score = Double.valueOf(img_score_pairs[i + 1]); double new_score = old_score + add_score; if (new_score < 0) { new_score = 0; } String img_score_pair; if (img.equals(imageID)) { img_score_pair = img + " " + new_score + " "; isImageContained = true; } else { img_score_pair = img + " " + old_score + " "; } stringBuilder.append(img_score_pair); } if (!isImageContained) { //If the image was not covered by the tag, append it to the tail stringBuilder.append(imageID + " " + add_score + " "); } //remove all occurrences of the old field doc.removeFields(fieldname2); this.img_field.setStringValue(stringBuilder.toString().trim()); if (test) System.out.println(stringBuilder.toString()); //insert the replacement doc.add(img_field); Term tagTerm = new Term(this.fieldname1, tag); MMwriter.updateDocument(tagTerm, doc); } MMwriter.commit(); } reader.close(); closeWriter(); }
From source file:focusedCrawler.util.persistence.Searcher.java
License:Open Source License
public String get(String key) throws Exception { // System.out.println("indexDir: " + this.indexDir.getAbsolutePath()); Directory fsDir = FSDirectory.open(indexDir); IndexSearcher is = new IndexSearcher(fsDir); ScoreDoc[] hits = search(this.indexDir, key, is); if (hits.length != 0) { Document hitDoc = is.doc(hits[0].doc); return hitDoc.get("value"); } else {//ww w . j av a 2 s . c o m return null; } }
From source file:fr.lipn.yasemir.ontology.annotation.KNNAnnotator.java
License:Open Source License
public DocumentAnnotation annotate(String document) { DocumentAnnotation ret = new DocumentAnnotation(); try {//from w w w . j av a 2 s .co m IndexReader reader = IndexReader.open(FSDirectory.open(new File(termIndexPath))); IndexSearcher searcher = new IndexSearcher(reader); document = document.replaceAll("Support, .+?;", ""); document = document.replaceAll("\\[.*?\\]", "").trim(); //document = document.replaceAll( "\\p{Punct}", " " ); String[] fragments = document.split("[;:\\.,]"); for (String ofragment : fragments) { ofragment = ofragment.replaceAll("\\p{Punct}", " "); ofragment = ofragment.trim(); String sa[] = ofragment.split("(?<=[ \\n])"); EnglishStemmer st = new EnglishStemmer(); StringBuffer fbuf = new StringBuffer(); for (String s : sa) { st.setCurrent(s.trim()); st.stem(); fbuf.append(st.getCurrent()); fbuf.append(" "); } String fragment = fbuf.toString().trim(); //stemmed fragment if (fragment.length() == 0) continue; //System.err.println("Annotating: "+fragment); //use K-NN annotation (see Trieschnigg et al. 2009) IndexReader docreader = IndexReader.open(FSDirectory.open(new File(this.standardIndexPath))); IndexSearcher docsearcher = new IndexSearcher(docreader); QueryParser parser = new QueryParser(Version.LUCENE_44, "text", Yasemir.analyzer); Query query = parser.parse(fragment); System.err.println("Looking for: " + query); TopDocs results = docsearcher.search(query, N); //get the first 100 documents ScoreDoc[] hits = results.scoreDocs; int topLimit = Math.min(results.totalHits, K); int bottomLimit = Math.min(results.totalHits, N) - K; int numTotalHits = Math.min(results.totalHits, N); //System.err.println("top:"+topLimit+" bottom:"+bottomLimit+" total:"+numTotalHits); HashMap<String, Double> ttags = new HashMap<String, Double>(); HashMap<String, Integer> btags = new HashMap<String, Integer>(); if (topLimit < bottomLimit) { //Get the tags used in the top K documents matching the request hits = docsearcher.search(query, numTotalHits).scoreDocs; for (int i = 0; i < topLimit; i++) { Document doc = docsearcher.doc(hits[i].doc); Vector<String> tags = new Vector<String>(); List<IndexableField> docFields = doc.getFields(); for (IndexableField f : docFields) { String fname = f.name(); if (fname.endsWith("annot")) { tags.add(fname + ":" + doc.get(fname)); } } String[] tagStrings = (String[]) tags.toArray(); for (String t : tagStrings) { t = t.replaceAll("\\W|_", " "); Double nt = ttags.get(t); if (nt == null) nt = new Double(hits[i].score); else nt = new Double(hits[i].score + nt.doubleValue()); ttags.put(t, nt); } } for (int i = bottomLimit; i < numTotalHits; i++) { Document doc = docsearcher.doc(hits[i].doc); Vector<String> tags = new Vector<String>(); List<IndexableField> docFields = doc.getFields(); for (IndexableField f : docFields) { String fname = f.name(); if (fname.endsWith("annot")) { tags.add(fname + ":" + doc.get(fname)); } } String[] tagStrings = (String[]) tags.toArray(); for (String t : tagStrings) { t = t.replaceAll("\\W|_", " "); Integer nt = btags.get(t); if (nt == null) nt = new Integer(1); else nt = new Integer((nt.intValue() + 1)); btags.put(t, nt); } } } Vector<WeightedTag> tagv = new Vector<WeightedTag>(); //now find, for all tags, the corresponding MeSH concepts double sum = 0; for (String tag : ttags.keySet()) { double tagStrength = ttags.get(tag).doubleValue(); double compStrength = 0; if (btags.containsKey(tag)) { compStrength = (btags.get(tag).doubleValue()) / ((double) K); } //System.err.println(tag+ " :str="+tagStrength+", comp="+compStrength); double weight = tagStrength * (1 - compStrength); sum += weight; tagv.add(new WeightedTag(tag, weight)); } double avg = sum / (double) tagv.size(); double ssum = 0; for (WeightedTag wt : tagv) { ssum += Math.sqrt(Math.pow(wt.getWeight() - avg, 2d)); } double stddev = ssum / (double) tagv.size(); //System.err.println("avg w: "+avg+" stddev:"+stddev+" limit:"+(avg+2*stddev)); double limit = (avg + 2 * stddev); //definition of statistic outlier TagComparator comparator = new TagComparator(); Collections.sort(tagv, comparator); int i = 0; for (WeightedTag wt : tagv) { String tag = wt.getName(); if (i >= maxTags) break; if (wt.getWeight() >= limit) { QueryParser tagparser = new QueryParser(Version.LUCENE_44, "labels", Yasemir.analyzer); Query tagquery = tagparser.parse("\"" + tag + "\""); TopDocs tagresults = searcher.search(tagquery, 5); ScoreDoc[] taghits = tagresults.scoreDocs; int numTagTotalHits = tagresults.totalHits; if (numTagTotalHits > 0) { taghits = searcher.search(tagquery, numTagTotalHits).scoreDocs; Document doc = searcher.doc(taghits[0].doc); Annotation ann = new Annotation(doc.get("id")); //System.err.println("Adding: "+tag+" w:"+wt.getWeight()); String ontoID = ann.getRelatedOntology().getOntologyID(); Vector<Annotation> annotations = ret.get(ontoID); if (annotations == null) annotations = new Vector<Annotation>(); annotations.add(ann); ret.put(ontoID, annotations); i++; } } } docreader.close(); } reader.close(); } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:fr.lipn.yasemir.ontology.annotation.SentenceBasedAnnotator.java
License:Open Source License
/** * Implementation of the annotate method by IndexBasedAnnotator. * /*from w w w. jav a2 s .com*/ * The input text is splitted in fragments according to punctuation; * every fragment is used as a query and sent to a Lucene SE that * was used to index the terminology (BM25 weight). * Up to the 20 top results returned by the system are taken as the annotation for the * fragment text. All the fragment annotations combined compose the document annotation * that is returned by this method. * */ public DocumentAnnotation annotate(String document) { DocumentAnnotation ret = new DocumentAnnotation(); try { IndexReader reader = IndexReader.open(FSDirectory.open(new File(termIndexPath))); IndexSearcher searcher = new IndexSearcher(reader); searcher.setSimilarity(new BM25Similarity()); /* document=document.replaceAll("\\[.*?\\]", "").trim(); //document = document.replaceAll( "\\p{Punct}", " " ); String [] fragments = document.split("[;:\\.,]"); */ String[] fragments = (String[]) getSentences(document).toArray(); for (String ofragment : fragments) { ofragment = ofragment.replaceAll("\\p{Punct}", " "); ofragment = ofragment.trim(); String sa[] = ofragment.split("(?<=[ \\n])"); EnglishStemmer st = new EnglishStemmer(); StringBuffer fbuf = new StringBuffer(); for (String s : sa) { st.setCurrent(s.trim()); st.stem(); fbuf.append(st.getCurrent()); fbuf.append(" "); } String fragment = fbuf.toString().trim(); //stemmed fragment if (fragment.length() == 0) continue; //System.err.println("Annotating: "+fragment); QueryParser parser = new QueryParser(Version.LUCENE_44, "labels", Yasemir.analyzer); Query query = parser.parse(fragment); String stemmedFragment = query.toString("labels").replaceAll("labels:", ""); TopDocs results = searcher.search(query, 20); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; //System.err.println(numTotalHits + " total matching classes"); if (numTotalHits > 0) { hits = searcher.search(query, numTotalHits).scoreDocs; for (int i = 0; i < Math.min(numTotalHits, MAX_ANNOTS); i++) { Document doc = searcher.doc(hits[i].doc); String ptrn = "(?i)(" + doc.get("labels").replaceAll(", ", "|") + ")"; //System.err.println("OWLClass="+doc.get("id")+" score="+hits[i].score); if (Tools.checkPattern(stemmedFragment, ptrn)) { //System.err.println("OK: OWLClass="+doc.get("id")+" score="+hits[i].score); Annotation ann = new Annotation(doc.get("id")); String ontoID = ann.getRelatedOntology().getOntologyID(); Vector<Annotation> annotations = ret.get(ontoID); if (annotations == null) annotations = new Vector<Annotation>(); annotations.add(ann); ret.put(ontoID, annotations); } } } } reader.close(); } catch (Exception e) { e.printStackTrace(); } return ret; }
From source file:fr.paris.lutece.plugins.calendar.service.search.CalendarLuceneSearchEngine.java
License:Open Source License
/** * Return search results//from w w w . j av a 2 s . c o m * @param arrayAgendaIds The calendar ids * @param arrayCategory the category ids * @param strContent The search query * @param dateBegin The date begin * @param dateEnd The date end * @return Results as a collection of SearchResult */ public List<CalendarSearchResult> getSearchResults(String[] arrayAgendaIds, String[] arrayCategory, String strContent, Date dateBegin, Date dateEnd) { ArrayList<CalendarSearchItem> listResults = new ArrayList<CalendarSearchItem>(); if (arrayAgendaIds == null || arrayAgendaIds.length == 0) { return new ArrayList<CalendarSearchResult>(); } IndexSearcher searcher = null; //Filter filterRole = getFilterRoles( request ); Filter filterRole = null; try { IndexReader ir = DirectoryReader.open(IndexationService.getDirectoryIndex()); searcher = new IndexSearcher(ir); Collection<String> queriesForSearchInContent = new ArrayList<String>(); Collection<String> queriesForSearchInTitle = new ArrayList<String>(); Collection<String> fieldsForSearchInContent = new ArrayList<String>(); Collection<String> fieldsForSearchInTitle = new ArrayList<String>(); Collection<BooleanClause.Occur> flagsForSearchInContent = new ArrayList<BooleanClause.Occur>(); Collection<BooleanClause.Occur> flagsForSearchInTitle = new ArrayList<BooleanClause.Occur>(); //Calendar Id if (arrayAgendaIds.length > 0) { String strQueryCalendar = OPEN_PARENTHESIS; int intMoreCalendar = 0; for (String strAgendaId : arrayAgendaIds) { strQueryCalendar += (strAgendaId + "_" + Constants.CALENDAR_SHORT_NAME); ++intMoreCalendar; if ((arrayAgendaIds.length > 1) && (intMoreCalendar < arrayAgendaIds.length)) { strQueryCalendar += (SPACE + OR + SPACE); } } strQueryCalendar += CLOSE_PARENTHESIS; Query queryAgendaId = new TermQuery(new Term(Constants.FIELD_CALENDAR_ID, strQueryCalendar)); queriesForSearchInContent.add(queryAgendaId.toString()); queriesForSearchInTitle.add(queryAgendaId.toString()); fieldsForSearchInContent.add(Constants.FIELD_CALENDAR_ID); flagsForSearchInContent.add(BooleanClause.Occur.MUST); fieldsForSearchInTitle.add(Constants.FIELD_CALENDAR_ID); flagsForSearchInTitle.add(BooleanClause.Occur.MUST); } //category Id if ((arrayCategory != null) && (arrayCategory.length > 0)) { String strQueryCategory = OPEN_PARENTHESIS; int intMoreCategory = 0; for (String strCategoryId : arrayCategory) { strQueryCategory += strCategoryId; ++intMoreCategory; if ((arrayCategory.length > 1) && (intMoreCategory < arrayCategory.length)) { strQueryCategory += (SPACE + OR + SPACE); } } strQueryCategory += CLOSE_PARENTHESIS; Query queryAgendaId = new TermQuery(new Term(Constants.FIELD_CATEGORY, strQueryCategory)); queriesForSearchInContent.add(queryAgendaId.toString()); queriesForSearchInTitle.add(queryAgendaId.toString()); fieldsForSearchInContent.add(Constants.FIELD_CATEGORY); flagsForSearchInContent.add(BooleanClause.Occur.MUST); fieldsForSearchInTitle.add(Constants.FIELD_CATEGORY); flagsForSearchInTitle.add(BooleanClause.Occur.MUST); } //Type (=calendar) PhraseQuery queryType = new PhraseQuery(); queryType.add(new Term(SearchItem.FIELD_TYPE, Constants.PLUGIN_NAME)); queriesForSearchInContent.add(queryType.toString()); queriesForSearchInTitle.add(queryType.toString()); fieldsForSearchInContent.add(SearchItem.FIELD_TYPE); flagsForSearchInContent.add(BooleanClause.Occur.MUST); fieldsForSearchInTitle.add(SearchItem.FIELD_TYPE); flagsForSearchInTitle.add(BooleanClause.Occur.MUST); //Content if (StringUtils.isNotBlank(strContent)) { Query queryContent = new TermQuery(new Term(SearchItem.FIELD_CONTENTS, strContent)); queriesForSearchInTitle.add(queryContent.toString()); fieldsForSearchInContent.add(SearchItem.FIELD_CONTENTS); flagsForSearchInContent.add(BooleanClause.Occur.MUST); Query queryTitle = new TermQuery(new Term(SearchItem.FIELD_TITLE, strContent)); queriesForSearchInContent.add(queryTitle.toString()); fieldsForSearchInTitle.add(SearchItem.FIELD_TITLE); flagsForSearchInTitle.add(BooleanClause.Occur.MUST); } //Dates if ((dateBegin != null) && (dateEnd != null)) { BytesRef strDateBegin = new BytesRef(Utils.getDate(dateBegin)); BytesRef strDateEnd = new BytesRef(Utils.getDate(dateEnd)); Query queryDate = new TermRangeQuery(SearchItem.FIELD_DATE, strDateBegin, strDateEnd, true, true); queriesForSearchInContent.add(queryDate.toString()); queriesForSearchInTitle.add(queryDate.toString()); fieldsForSearchInContent.add(SearchItem.FIELD_DATE); flagsForSearchInContent.add(BooleanClause.Occur.MUST); fieldsForSearchInTitle.add(SearchItem.FIELD_DATE); flagsForSearchInTitle.add(BooleanClause.Occur.MUST); } else { BytesRef strDate = new BytesRef(Utils.getDate(new Date())); Query queryDate = new TermRangeQuery(SearchItem.FIELD_DATE, strDate, null, true, true); queriesForSearchInContent.add(queryDate.toString()); queriesForSearchInTitle.add(queryDate.toString()); fieldsForSearchInContent.add(SearchItem.FIELD_DATE); flagsForSearchInContent.add(BooleanClause.Occur.MUST); fieldsForSearchInTitle.add(SearchItem.FIELD_DATE); flagsForSearchInTitle.add(BooleanClause.Occur.MUST); } //Search in contents Query queryMulti = MultiFieldQueryParser.parse(IndexationService.LUCENE_INDEX_VERSION, queriesForSearchInContent.toArray(new String[queriesForSearchInContent.size()]), fieldsForSearchInContent.toArray(new String[fieldsForSearchInContent.size()]), flagsForSearchInContent.toArray(new BooleanClause.Occur[flagsForSearchInContent.size()]), IndexationService.getAnalyser()); // Get results documents TopDocs hits = null; int nLimit = Integer.parseInt(AppPropertiesService.getProperty(PROPERTY_RESULTS_LIMIT)); hits = searcher.search(queryMulti, filterRole, nLimit); for (int i = 0; hits.totalHits > i; i++) { ScoreDoc hit = hits.scoreDocs[i]; Document document = searcher.doc(hit.doc); CalendarSearchItem si = new CalendarSearchItem(document); listResults.add(si); } //Search in titles Query queryMultiTitle = MultiFieldQueryParser.parse(IndexationService.LUCENE_INDEX_VERSION, queriesForSearchInTitle.toArray(new String[queriesForSearchInTitle.size()]), fieldsForSearchInTitle.toArray(new String[fieldsForSearchInTitle.size()]), flagsForSearchInTitle.toArray(new BooleanClause.Occur[flagsForSearchInTitle.size()]), IndexationService.getAnalyser()); // Get results documents TopDocs hitsTitle = null; hitsTitle = searcher.search(queryMultiTitle, filterRole, nLimit); for (int i = 0; hitsTitle.totalHits > i; i++) { ScoreDoc hit = hitsTitle.scoreDocs[i]; Document document = searcher.doc(hit.doc); CalendarSearchItem si = new CalendarSearchItem(document); listResults.add(si); } } catch (Exception e) { AppLogService.error(e.getMessage(), e); } return convertList(listResults); }
From source file:fr.paris.lutece.plugins.directory.service.directorysearch.DirectoryLuceneSearchEngine.java
License:Open Source License
/** * {@inheritDoc}//from www.j a va2 s. co m */ @Override public List<Integer> getSearchResults(HashMap<String, Object> mapQuery) { ArrayList<Integer> listResults = new ArrayList<Integer>(); IndexSearcher searcher = null; try { searcher = DirectorySearchService.getInstance().getSearcher(); Collection<String> queries = new ArrayList<String>(); Collection<String> fields = new ArrayList<String>(); Collection<BooleanClause.Occur> flags = new ArrayList<BooleanClause.Occur>(); // contains id directory if (mapQuery.containsKey(DirectorySearchItem.FIELD_ID_DIRECTORY)) { Query queryIdDirectory = new TermQuery(new Term(DirectorySearchItem.FIELD_ID_DIRECTORY, Integer.toString((Integer) mapQuery.get(DirectorySearchItem.FIELD_ID_DIRECTORY)))); queries.add(queryIdDirectory.toString()); fields.add(DirectorySearchItem.FIELD_ID_DIRECTORY); flags.add(BooleanClause.Occur.MUST); } if (mapQuery.containsKey(DirectorySearchItem.FIELD_ID_DIRECTORY_ENTRY)) { Query queryIdDirectory = new TermQuery(new Term(DirectorySearchItem.FIELD_ID_DIRECTORY_ENTRY, Integer.toString((Integer) mapQuery.get(DirectorySearchItem.FIELD_ID_DIRECTORY_ENTRY)))); queries.add(queryIdDirectory.toString()); fields.add(DirectorySearchItem.FIELD_ID_DIRECTORY_ENTRY); flags.add(BooleanClause.Occur.MUST); } if (mapQuery.containsKey(DirectorySearchItem.FIELD_ID_DIRECTORY_FIELD)) { Collection<String> queriesIdDirectoryField = new ArrayList<String>(); Collection<String> fieldsIdDirectoryField = new ArrayList<String>(); Collection<BooleanClause.Occur> flagsIdDirectoryField = new ArrayList<BooleanClause.Occur>(); for (Integer idField : (List<Integer>) mapQuery.get(DirectorySearchItem.FIELD_ID_DIRECTORY_FIELD)) { Query queryIdDirectory = new TermQuery( new Term(DirectorySearchItem.FIELD_ID_DIRECTORY_FIELD, Integer.toString(idField))); queriesIdDirectoryField.add(queryIdDirectory.toString()); fieldsIdDirectoryField.add(DirectorySearchItem.FIELD_ID_DIRECTORY_FIELD); flagsIdDirectoryField.add(BooleanClause.Occur.SHOULD); } Query queryMultiIdDirectoryField = MultiFieldQueryParser.parse( IndexationService.LUCENE_INDEX_VERSION, queriesIdDirectoryField.toArray(new String[queriesIdDirectoryField.size()]), queriesIdDirectoryField.toArray(new String[fieldsIdDirectoryField.size()]), flagsIdDirectoryField.toArray(new BooleanClause.Occur[flagsIdDirectoryField.size()]), IndexationService.getAnalyser()); queries.add(queryMultiIdDirectoryField.toString()); fields.add(DirectorySearchItem.FIELD_ID_DIRECTORY_FIELD); flags.add(BooleanClause.Occur.MUST); } //contains content if (mapQuery.containsKey(DirectorySearchItem.FIELD_CONTENTS)) { Query queryContent = new TermQuery(new Term(DirectorySearchItem.FIELD_CONTENTS, (String) mapQuery.get(DirectorySearchItem.FIELD_CONTENTS))); queries.add(queryContent.toString()); fields.add(DirectorySearchItem.FIELD_CONTENTS); flags.add(BooleanClause.Occur.MUST); } //contains date if (mapQuery.containsKey(DirectorySearchItem.FIELD_DATE)) { Query queryDate = new TermQuery(new Term(DirectorySearchItem.FIELD_DATE, DateTools.dateToString( (Date) mapQuery.get(DirectorySearchItem.FIELD_DATE), DateTools.Resolution.DAY))); queries.add(queryDate.toString()); fields.add(DirectorySearchItem.FIELD_CONTENTS); flags.add(BooleanClause.Occur.MUST); } //contains range date if (mapQuery.containsKey(DirectorySearchItem.FIELD_DATE_BEGIN) && mapQuery.containsKey(DirectorySearchItem.FIELD_DATE_END)) { BytesRef strLowerTerm = new BytesRef(DateTools.dateToString( (Date) mapQuery.get(DirectorySearchItem.FIELD_DATE_BEGIN), DateTools.Resolution.DAY)); BytesRef strUpperTerm = new BytesRef(DateTools.dateToString( (Date) mapQuery.get(DirectorySearchItem.FIELD_DATE_END), DateTools.Resolution.DAY)); Query queryRangeDate = new TermRangeQuery(DirectorySearchItem.FIELD_DATE, strLowerTerm, strUpperTerm, true, true); queries.add(queryRangeDate.toString()); fields.add(DirectorySearchItem.FIELD_DATE); flags.add(BooleanClause.Occur.MUST); } //record date creation //contains date creation if (mapQuery.containsKey(DirectorySearchItem.FIELD_DATE_CREATION)) { Query queryDate = new TermQuery(new Term(DirectorySearchItem.FIELD_DATE_CREATION, DateTools.dateToString((Date) mapQuery.get(DirectorySearchItem.FIELD_DATE_CREATION), DateTools.Resolution.DAY))); queries.add(queryDate.toString()); fields.add(DirectorySearchItem.FIELD_DATE_CREATION); flags.add(BooleanClause.Occur.MUST); } //contains range date if (mapQuery.containsKey(DirectorySearchItem.FIELD_DATE_CREATION_BEGIN) && mapQuery.containsKey(DirectorySearchItem.FIELD_DATE_CREATION_END)) { BytesRef strLowerTerm = new BytesRef( DateTools.dateToString((Date) mapQuery.get(DirectorySearchItem.FIELD_DATE_CREATION_BEGIN), DateTools.Resolution.DAY)); BytesRef strUpperTerm = new BytesRef( DateTools.dateToString((Date) mapQuery.get(DirectorySearchItem.FIELD_DATE_CREATION_END), DateTools.Resolution.DAY)); Query queryRangeDate = new TermRangeQuery(DirectorySearchItem.FIELD_DATE_CREATION, strLowerTerm, strUpperTerm, true, true); queries.add(queryRangeDate.toString()); fields.add(DirectorySearchItem.FIELD_DATE_CREATION); flags.add(BooleanClause.Occur.MUST); } //record date creation //contains date creation if (mapQuery.containsKey(DirectorySearchItem.FIELD_DATE_MODIFICATION)) { Query queryDate = new TermQuery(new Term(DirectorySearchItem.FIELD_DATE_MODIFICATION, DateTools.dateToString((Date) mapQuery.get(DirectorySearchItem.FIELD_DATE_MODIFICATION), DateTools.Resolution.DAY))); queries.add(queryDate.toString()); fields.add(DirectorySearchItem.FIELD_DATE_MODIFICATION); flags.add(BooleanClause.Occur.MUST); } //contains range modification date if (mapQuery.containsKey(DirectorySearchItem.FIELD_DATE_MODIFICATION_BEGIN) && mapQuery.containsKey(DirectorySearchItem.FIELD_DATE_MODIFICATION_END)) { BytesRef strLowerTerm = new BytesRef(DateTools.dateToString( (Date) mapQuery.get(DirectorySearchItem.FIELD_DATE_MODIFICATION_BEGIN), DateTools.Resolution.DAY)); BytesRef strUpperTerm = new BytesRef( DateTools.dateToString((Date) mapQuery.get(DirectorySearchItem.FIELD_DATE_MODIFICATION_END), DateTools.Resolution.DAY)); Query queryRangeDate = new TermRangeQuery(DirectorySearchItem.FIELD_DATE_MODIFICATION, strLowerTerm, strUpperTerm, true, true); queries.add(queryRangeDate.toString()); fields.add(DirectorySearchItem.FIELD_DATE_MODIFICATION); flags.add(BooleanClause.Occur.MUST); } Query queryMulti = MultiFieldQueryParser.parse(IndexationService.LUCENE_INDEX_VERSION, queries.toArray(new String[queries.size()]), fields.toArray(new String[fields.size()]), flags.toArray(new BooleanClause.Occur[flags.size()]), IndexationService.getAnalyser()); // Get results documents TopDocs topDocs = searcher.search(queryMulti, LuceneSearchEngine.MAX_RESPONSES); ScoreDoc[] hits = topDocs.scoreDocs; for (int i = 0; i < hits.length; i++) { int docId = hits[i].doc; Document document = searcher.doc(docId); listResults.add(new DirectorySearchItem(document).getIdDirectoryRecord()); } } catch (Exception e) { AppLogService.error(e.getMessage(), e); } return listResults; }