List of usage examples for org.apache.lucene.search IndexSearcher doc
public Document doc(int docID) throws IOException
.getIndexReader().document(docID)
From source file:irlucene.MEDRetrieval.java
public double[] precisionRecal(QueryData query, ScoreDoc[] hits) { double precisionRecall[] = { 0, 0 }; int relevantAnswers; int answers;/* w w w .ja v a 2s. c o m*/ int relevants; IndexReader indexReader; IndexSearcher indexSearcher; try { indexReader = DirectoryReader.open(index); indexSearcher = new IndexSearcher(indexReader); relevantAnswers = 0; answers = hits.length; relevants = query.getNumberRelevantDocuments(); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document doc = indexSearcher.doc(docId); for (int d : query.getRelevantDocuments()) { if (Integer.valueOf(doc.get("id").trim()) == d) { relevantAnswers++; } } } if (answers == 0 || relevants == 0) { precisionRecall[0] = 0; precisionRecall[1] = 0; } else { precisionRecall[0] = (double) relevantAnswers / answers; precisionRecall[1] = (double) relevantAnswers / relevants; } } catch (IOException ex) { Logger.getLogger(CFCRetrieval.class.getName()).log(Level.SEVERE, null, ex); } return precisionRecall; }
From source file:irlucene.MEDRetrieval.java
public double pAtN(QueryData query, ScoreDoc[] hits, int n) { double pAtN = 0; int limit;//from ww w .j a v a 2s . c o m int relevantAnswers; IndexReader indexReader; IndexSearcher indexSearcher; try { indexReader = DirectoryReader.open(index); indexSearcher = new IndexSearcher(indexReader); relevantAnswers = 0; if (n > hits.length) { limit = hits.length; } else { limit = n; } for (int i = 0; i < limit; ++i) { int docId = hits[i].doc; Document doc = indexSearcher.doc(docId); for (int d : query.getRelevantDocuments()) { if (d == Integer.valueOf(doc.get("id").trim())) { relevantAnswers++; } } } pAtN = 100 * relevantAnswers / n; } catch (IOException ex) { Logger.getLogger(CFCRetrieval.class.getName()).log(Level.SEVERE, null, ex); } return pAtN; }
From source file:irlucene.MEDRetrieval.java
public void printHits(ScoreDoc[] hits) { try {//ww w .j a va2 s . co m IndexReader indexReader = DirectoryReader.open(index); IndexSearcher indexSearcher = new IndexSearcher(indexReader); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = indexSearcher.doc(docId); System.out.println((i + 1) + " " + d.get("id")); } } catch (IOException ex) { Logger.getLogger(CFCRetrieval.class.getName()).log(Level.SEVERE, null, ex); } }
From source file:it.cnr.ilc.lc.clavius.search.Tester.java
private static void searchWithHighlighter(String term) throws IOException, ParseException, InvalidTokenOffsetsException { logger.info("searchWithContext2 (" + term + ")"); Directory indexDirectory = FSDirectory .open(Paths.get("/var/lucene/claviusTest/indexes/it.cnr.ilc.lc.clavius.search.entity.PlainText")); DirectoryReader ireader = DirectoryReader.open(indexDirectory); IndexSearcher searcher = new IndexSearcher(ireader); QueryParser parser = new QueryParser("content", new StandardAnalyzer()); Query query = parser.parse(term); TopDocs hits = searcher.search(query, 10); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); //Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); ClaviusHighlighter highlighter = new ClaviusHighlighter(htmlFormatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter(9)); for (int i = 0; i < hits.totalHits; i++) { int id = hits.scoreDocs[i].doc; Document doc = searcher.doc(id); String idDoc = doc.get("idDoc"); String text = doc.get("content"); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content", new StandardAnalyzer()); List<Annotation> frag = highlighter.getBestTextClaviusFragments(tokenStream, idDoc, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "..."); for (int j = 0; j < frag.size(); j++) { logger.info("idDoc: " + idDoc + ", Annotation[" + j + "] " + frag.get(j).toString()); }// w w w.j a v a2 s . co m // TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, text, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "..."); // for (int j = 0; j < frag.length; j++) { // if ((frag[j] != null) && (frag[j].getScore() > 0)) { // logger.info("frag["+j+"] "+frag[j].toString()); // } // } // } }
From source file:it.cnr.ilc.lc.claviusweb.ClaviusSearch.java
private static List<Annotation> fullTextSearch(String term) throws IOException, ParseException, InvalidTokenOffsetsException { log.info("fullTextSearch (" + term + ")"); List<Annotation> result = new ArrayList<>(); try {/*from www. ja v a2 s . c o m*/ Directory indexDirectory = FSDirectory .open(Paths.get("/var/lucene/clavius-1.0.5/indexes/it.cnr.ilc.lc.claviusweb.entity.PlainText")); DirectoryReader ireader = DirectoryReader.open(indexDirectory); IndexSearcher searcher = new IndexSearcher(ireader); Analyzer fullTextAnalyzer = CustomAnalyzer.builder() .addCharFilter("patternReplace", "pattern", "([\\-\\(\\)\\[\\],\\.;:])", "replacement", " $1 ") .withTokenizer("whitespace").build(); //QueryParser parserTerm = new QueryParser("content", fullTextAnalyzer); // AnalyzingQueryParser parser = new AnalyzingQueryParser("content", fullTextAnalyzer); // Query query2 = parser.parse(term); // Query query = new WildcardQuery(new Term("content", term)); TopDocs hits = searcher.search(query, MAX_SEARCH_HITS); SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter(); //Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query)); ClaviusHighlighter highlighter = new ClaviusHighlighter(htmlFormatter, new QueryScorer(query)); highlighter.setTextFragmenter(new SimpleFragmenter()); log.info("hits.totalHits=(" + hits.totalHits + ")"); for (int i = 0; i < hits.totalHits; i++) { int id = hits.scoreDocs[i].doc; Document doc = searcher.doc(id); String idDoc = doc.get("idDoc"); //String text = doc.get("content"); TokenStream tokenStream = TokenSources.getAnyTokenStream(searcher.getIndexReader(), id, "content", fullTextAnalyzer); List<Annotation> frag = highlighter.getBestTextClaviusFragments(tokenStream, doc, false, 10);//highlighter.getBestFragments(tokenStream, text, 3, "..."); for (int j = 0; j < frag.size(); j++) { log.debug("idDoc: " + idDoc + ", Annotation[" + j + "] " + frag.get(j).toString()); } result.addAll(frag); } } catch (InvalidTokenOffsetsException | IOException e) { log.error(e); } log.info("Full Text Search found " + result.size() + " result(s) for term " + term); return result; }
From source file:it.eng.spagobi.analiticalmodel.documentsbrowser.service.SearchContentAction.java
License:Mozilla Public License
@Override public void doService() { List objects;/* w ww. j av a 2 s .c o m*/ logger.debug("IN"); try { UserProfile profile = (UserProfile) getUserProfile(); Vector<String> fieldsToSearch = new Vector<String>(); String valueFilter = getAttributeAsString(SpagoBIConstants.VALUE_FILTER); String attributes = getAttributeAsString(ATTRIBUTES); String metaDataToSearch = null; if (attributes != null) { if (attributes.equalsIgnoreCase("ALL")) {//SEARCH IN ALL FIELDS fieldsToSearch.add(IndexingConstants.BIOBJ_LABEL); fieldsToSearch.add(IndexingConstants.BIOBJ_NAME); fieldsToSearch.add(IndexingConstants.BIOBJ_DESCR); fieldsToSearch.add(IndexingConstants.METADATA); //search metadata binary content fieldsToSearch.add(IndexingConstants.CONTENTS); //search subobject fields fieldsToSearch.add(IndexingConstants.SUBOBJ_DESCR); fieldsToSearch.add(IndexingConstants.SUBOBJ_NAME); } else if (attributes.equalsIgnoreCase("LABEL")) {//SEARCH IN LABEL DOC fieldsToSearch.add(IndexingConstants.BIOBJ_LABEL); } else if (attributes.equalsIgnoreCase("NAME")) {//SEARCH IN NAME DOC fieldsToSearch.add(IndexingConstants.BIOBJ_NAME); } else if (attributes.equalsIgnoreCase("DESCRIPTION")) {//SEARCH IN DESCRIPTION DOC fieldsToSearch.add(IndexingConstants.BIOBJ_DESCR); } else {//SEARCH IN CATEGORIES DOC //get categories name metaDataToSearch = attributes; //fieldsToSearch.add(IndexingConstants.METADATA); fieldsToSearch.add(IndexingConstants.CONTENTS); } } boolean similar = getAttributeAsBoolean(SIMILAR); logger.debug("Parameter [" + SpagoBIConstants.VALUE_FILTER + "] is equal to [" + valueFilter + "]"); String indexBasePath = ""; String jndiBean = SingletonConfig.getInstance().getConfigValue("SPAGOBI.RESOURCE_PATH_JNDI_NAME"); if (jndiBean != null) { indexBasePath = SpagoBIUtilities.readJndiResource(jndiBean); } String index = indexBasePath + "/idx"; IndexReader reader; HashMap returned = null; try { reader = IndexReader.open(FSDirectory.open(new File(index)), true); // read-only=true IndexSearcher searcher = new IndexSearcher(reader); String[] fields = new String[fieldsToSearch.size()]; fieldsToSearch.toArray(fields); //getting documents if (similar) { returned = LuceneSearcher.searchIndexFuzzy(searcher, valueFilter, index, fields, metaDataToSearch); } else { returned = LuceneSearcher.searchIndex(searcher, valueFilter, index, fields, metaDataToSearch); } ScoreDoc[] hits = (ScoreDoc[]) returned.get("hits"); objects = new ArrayList(); if (hits != null) { for (int i = 0; i < hits.length; i++) { ScoreDoc hit = hits[i]; Document doc = searcher.doc(hit.doc); String biobjId = doc.get(IndexingConstants.BIOBJ_ID); BIObject obj = DAOFactory.getBIObjectDAO().loadBIObjectForDetail(Integer.valueOf(biobjId)); if (obj != null) { boolean canSee = ObjectsAccessVerifier.canSee(obj, profile); if (canSee) { objects.add(obj); } } } } searcher.close(); } catch (CorruptIndexException e) { logger.error(e.getMessage(), e); throw new SpagoBIException("Index corrupted", e); } catch (IOException e) { logger.error(e.getMessage(), e); throw new SpagoBIException("Unable to read index", e); } // only searching, so catch (ParseException e) { logger.error(e.getMessage(), e); throw new SpagoBIException("Wrong query syntax", e); } JSONArray documentsJSON = (JSONArray) SerializerFactory.getSerializer("application/json") .serialize(objects, null); for (int i = 0; i < documentsJSON.length(); i++) { JSONObject jsonobj = documentsJSON.getJSONObject(i); String biobjid = jsonobj.getString("id"); String summary = (String) returned.get(biobjid); jsonobj.put("summary", summary); String views = (String) returned.get(biobjid + "-views"); jsonobj.put("views", views); } Collection func = profile.getFunctionalities(); if (func.contains("SeeMetadataFunctionality")) { JSONObject showmetadataAction = new JSONObject(); showmetadataAction.put("name", "showmetadata"); showmetadataAction.put("description", "Show Metadata"); for (int i = 0; i < documentsJSON.length(); i++) { JSONObject documentJSON = documentsJSON.getJSONObject(i); documentJSON.getJSONArray("actions").put(showmetadataAction); } } JSONObject documentsResponseJSON = createJSONResponseDocuments(documentsJSON); try { writeBackToClient(new JSONSuccess(createJSONResponse(documentsResponseJSON))); } catch (IOException e) { logger.error(e.getMessage(), e); throw new SpagoBIException("Impossible to write back the responce to the client", e); } } catch (Exception e) { logger.error("Excepiton", e); } finally { logger.debug("OUT"); } }
From source file:it.eng.spagobi.commons.utilities.indexing.LuceneSearcher.java
License:Mozilla Public License
public static HashMap<String, Object> searchIndex(IndexSearcher searcher, String queryString, String index, String[] fields, String metaDataToSearch) throws IOException, ParseException { logger.debug("IN"); HashMap<String, Object> objectsToReturn = new HashMap<String, Object>(); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); BooleanQuery andQuery = new BooleanQuery(); if (metaDataToSearch != null) { //search for query string on metadata name field and content //where metadata name = metaDataToSearch Query queryMetadata = new TermQuery(new Term(IndexingConstants.METADATA, metaDataToSearch)); andQuery.add(queryMetadata, BooleanClause.Occur.MUST); }// w w w.ja va 2s . c o m Query query = new MultiFieldQueryParser(Version.LUCENE_CURRENT, fields, analyzer).parse(queryString); andQuery.add(query, BooleanClause.Occur.MUST); Query tenantQuery = new TermQuery(new Term(IndexingConstants.TENANT, getTenant())); andQuery.add(tenantQuery, BooleanClause.Occur.MUST); logger.debug("Searching for: " + andQuery.toString()); int hitsPerPage = 50; // Collect enough docs to show 5 pages TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false); searcher.search(andQuery, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; //setsback to action objectsToReturn.put("hits", hits); //highlighter Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(andQuery)); if (hits != null) { logger.debug("hits size: " + hits.length); for (int i = 0; i < hits.length; i++) { ScoreDoc hit = hits[i]; Document doc = searcher.doc(hit.doc); String biobjId = doc.get(IndexingConstants.BIOBJ_ID); String[] subobjNames = doc.getValues(IndexingConstants.SUBOBJ_NAME); if (subobjNames != null && subobjNames.length != 0) { String views = ""; for (int k = 0; k < subobjNames.length; k++) { views += subobjNames[k] + " "; } objectsToReturn.put(biobjId + "-views", views); } String summary = ""; if (highlighter != null) { String[] summaries; try { Integer idobj = (Integer.valueOf(biobjId)); String contentToSearchOn = fillSummaryText(idobj); summaries = highlighter.getBestFragments(new StandardAnalyzer(Version.LUCENE_CURRENT), IndexingConstants.CONTENTS, contentToSearchOn, 3); StringBuffer summaryBuffer = new StringBuffer(); if (summaries.length > 0) { summaryBuffer.append(summaries[0]); } for (int j = 1; j < summaries.length; j++) { summaryBuffer.append(" ... "); summaryBuffer.append(summaries[j]); } summary = summaryBuffer.toString(); //get only a portion of summary if (summary.length() > 101) { summary = summary.substring(0, 100); summary += "..."; } objectsToReturn.put(biobjId, summary); } catch (InvalidTokenOffsetsException e) { logger.error(e.getMessage(), e); } catch (NumberFormatException e) { logger.error(e.getMessage(), e); } catch (Exception e) { logger.error(e.getMessage(), e); } } } } int numTotalHits = collector.getTotalHits(); logger.info(numTotalHits + " total matching documents"); logger.debug("OUT"); return objectsToReturn; }
From source file:it.eng.spagobi.commons.utilities.indexing.LuceneSearcher.java
License:Mozilla Public License
public static HashMap<String, Object> searchIndexFuzzy(IndexSearcher searcher, String queryString, String index, String[] fields, String metaDataToSearch) throws IOException, ParseException { logger.debug("IN"); HashMap<String, Object> objectsToReturn = new HashMap<String, Object>(); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); BooleanQuery orQuery = new BooleanQuery(); BooleanQuery andQuery = new BooleanQuery(); for (int i = 0; i < fields.length; i++) { Query query = new FuzzyQuery(new Term(fields[i], queryString)); query = query.rewrite(searcher.getIndexReader()); orQuery.add(query, BooleanClause.Occur.SHOULD); }/*w ww . ja va2s . com*/ andQuery.add(orQuery, BooleanClause.Occur.MUST); if (metaDataToSearch != null) { //search for query string on metadata name field and content //where metadata name = metaDataToSearch Query queryMetadata = new TermQuery(new Term(IndexingConstants.METADATA, metaDataToSearch)); andQuery.add(queryMetadata, BooleanClause.Occur.MUST); } Query tenantQuery = new TermQuery(new Term(IndexingConstants.TENANT, getTenant())); andQuery.add(tenantQuery, BooleanClause.Occur.MUST); logger.debug("Searching for: " + andQuery.toString()); int hitsPerPage = 50; // Collect enough docs to show 5 pages TopScoreDocCollector collector = TopScoreDocCollector.create(5 * hitsPerPage, false); searcher.search(andQuery, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; objectsToReturn.put("hits", hits); //highlighter //orQuery = orQuery.rewrite(searcher.getIndexReader()); //andQuery = andQuery.rewrite(searcher.getIndexReader()); Highlighter highlighter = new Highlighter(new SimpleHTMLFormatter(), new QueryScorer(andQuery)); if (hits != null) { for (int i = 0; i < hits.length; i++) { ScoreDoc hit = hits[i]; Document doc = searcher.doc(hit.doc); String biobjId = doc.get(IndexingConstants.BIOBJ_ID); String summary = " "; if (highlighter != null) { String[] summaries; try { Integer idobj = (Integer.valueOf(biobjId)); String contentToSearchOn = fillSummaryText(idobj); summaries = highlighter.getBestFragments(new StandardAnalyzer(Version.LUCENE_CURRENT), IndexingConstants.CONTENTS, contentToSearchOn, 3); StringBuffer summaryBuffer = new StringBuffer(); if (summaries.length > 0) { summaryBuffer.append(summaries[0]); } for (int j = 1; j < summaries.length; j++) { summaryBuffer.append(" ... "); summaryBuffer.append(summaries[j]); } summary = summaryBuffer.toString(); //get only a portion of summary if (summary.length() > 101) { summary = summary.substring(0, 100); summary += "..."; } objectsToReturn.put(biobjId, summary); } catch (InvalidTokenOffsetsException e) { logger.error(e.getMessage(), e); } catch (Exception e) { logger.error(e.getMessage(), e); } } } } int numTotalHits = collector.getTotalHits(); logger.info(numTotalHits + " total matching documents"); logger.debug("OUT"); return objectsToReturn; }
From source file:it.unipd.dei.ims.falcon.ranking.QueryMethods.java
License:Apache License
/** * Given a set of documents with multiple scores, retain the max score for * each document/*w ww. j av a 2s. c om*/ * * @param topdocs * Lucene TopDocs; documents, namely segments, are ranked by * score. The score of a segment is the sum of the score of its * constituting hashes, specifically obtained by * {@link it.unipd.dei.ims.falcon.ranking.SegmentQuery} * @param searcher * Lucene {@link org.apache.lucene.search.IndexSearcher} * @return * @throws IOException */ private static Map<String, Double> reduceMaxScoreForEachSong(TopDocs topdocs, IndexSearcher searcher) throws IOException { if (docId2songidCache == null) { // TODO although this should work, it has not been checked for concurrency issues docId2songidCache = new ConcurrentHashMap<Integer, String>(); } Map<String, Double> songid2maxscore = new TreeMap<String, Double>(); int r = 1; for (ScoreDoc sd : topdocs.scoreDocs) { String stringId = docId2songidCache.get(sd.doc); if (stringId == null) { stringId = searcher.doc(sd.doc).getField("TITLE").stringValue(); docId2songidCache.put(sd.doc, stringId); } if (!songid2maxscore.containsKey(stringId)) songid2maxscore.put(stringId, new Double(sd.score)); r++; } return songid2maxscore; }
From source file:javatools.webapi.LuceneIndexFiles.java
License:Apache License
public static List<String[]> doPagingSearch2(IndexSearcher searcher, Query query, int hitsPerPage) throws IOException { List<String[]> searchresult = new ArrayList<String[]>(); // Collect enough docs to show 5 pages TopDocs results = searcher.search(query, 10 * hitsPerPage); ScoreDoc[] hits = results.scoreDocs; int numTotalHits = results.totalHits; System.out.println(numTotalHits + " total matching documents"); int start = 0; int end = Math.min(numTotalHits, hitsPerPage); for (int i = 0; i < numTotalHits && i < hits.length; i++) { {/*from w w w. j a va 2s .c o m*/ Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) { //System.out.println((i + 1) + ". " + path); String contents = doc.get("contents"); searchresult.add(new String[] { path, contents }); if (contents != null) { //System.out.println(" Contents: " + contents); } } else { //System.out.println((i + 1) + ". " + "No path for this document"); } } } return searchresult; }