List of usage examples for org.apache.lucene.search IndexSearcher doc
public Document doc(int docID) throws IOException
.getIndexReader().document(docID)
From source file:io.github.msurdi.redeye.core.lucene.AbstractIndex.java
License:Apache License
/** * Retrieve a list of documents matching given query. The query must be a valid lucene query or '*' * for matching all documents. If the query is not valid, a best effort search is done. * * @param q a query string/*from w ww . j av a 2s .co m*/ * @return a list of the {@link io.github.msurdi.redeye.api.Indexable} documents matching. * @throws IOException */ @Override public List<T> query(String q) throws IOException { ensureOpened(); ArrayList<T> results = Lists.newArrayList(); Query query; try { if (MATCH_ALL.equals(q)) { query = new MatchAllDocsQuery(); } else { query = new QueryParser(LUCENE_VERSION, DEFAULT_FIELD, analyzer).parse(q); } } catch (ParseException e) { query = new SimpleQueryParser(analyzer, DEFAULT_FIELD).parse(q); } IndexSearcher searcher = null; try { searcherManager.maybeRefresh(); searcher = searcherManager.acquire(); TopDocs docs = searcher.search(query, Math.max(1, searcher.getIndexReader().maxDoc())); for (ScoreDoc scoreDoc : docs.scoreDocs) { Document document = searcher.doc(scoreDoc.doc); results.add(buildEntity(document)); } } finally { searcherManager.release(searcher); } return results; }
From source file:io.github.msurdi.redeye.core.lucene.AbstractIndex.java
License:Apache License
/** * Get a document from the lucene by its id * * @param id the id of the document to retrieve * @return An {@link com.google.common.base.Optional} from the document instance. * @throws IOException// w w w. j a v a2s .co m */ @Override public Optional<T> get(String id) throws IOException { ensureOpened(); final Query query = new TermQuery(new Term(Indexable.ID_FIELD, id)); IndexSearcher searcher = null; try { searcherManager.maybeRefresh(); searcher = searcherManager.acquire(); TopDocs docs = searcher.search(query, 1); if (docs.totalHits < 1) { return Optional.absent(); } else { return Optional.of(buildEntity(searcher.doc(docs.scoreDocs[0].doc))); } } finally { searcherManager.release(searcher); } }
From source file:io.jpress.module.article.searcher.LuceneSearcher.java
License:LGPL
private List<Article> toArticleList(IndexSearcher searcher, TopDocs topDocs, Highlighter highlighter, String keyword) throws IOException { List<Article> articles = new ArrayList<>(); Analyzer analyzer = new JcsegAnalyzer(JcsegTaskConfig.COMPLEX_MODE); for (ScoreDoc item : topDocs.scoreDocs) { Document doc = searcher.doc(item.doc); Article article = new Article(); String title = doc.get("title"); String content = doc.get("content"); article.setId(Long.valueOf(doc.get("aid"))); article.setTitle(title);// w w w . j a v a2 s .c o m article.setContent(content); // try { String highlightTitle = highlighter .getBestFragment(analyzer.tokenStream(keyword, new StringReader(title)), title); article.setHighlightTitle(highlightTitle); String text = article.getText(); String highlightContent = highlighter .getBestFragment(analyzer.tokenStream(keyword, new StringReader(text)), text); article.setHighlightContent(highlightContent); } catch (InvalidTokenOffsetsException e) { logger.error(e.getMessage(), e); } articles.add(article); } return articles; }
From source file:io.jpress.searcher.LuceneSearcher.java
License:LGPL
@Override public Page<SearcherBean> search(String queryString, String module, int pageNum, int pageSize) { List<SearcherBean> list = new ArrayList<SearcherBean>(); try {//from w w w .ja v a 2 s. c o m IndexSearcher mIndexSearcher = getIndexSearcher(); queryString = QueryParser.escape(queryString); String[] queries = { queryString, queryString, queryString }; String[] fields = { "title", "description", "content" }; BooleanClause.Occur[] flags = { BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD, BooleanClause.Occur.SHOULD }; Query query = MultiFieldQueryParser.parse(queries, fields, flags, new JcsegAnalyzer5X(JcsegTaskConfig.COMPLEX_MODE)); TopDocs topDocs = mIndexSearcher.search(query, 1000);//1000,?1000? if (topDocs != null && topDocs.totalHits > 0) { ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (int i = 0; i < scoreDocs.length; i++) { int docId = scoreDocs[i].doc; Document doc = mIndexSearcher.doc(docId); list.add(createSearcherBean(doc)); } } } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } return new Page<SearcherBean>(list, pageNum, pageSize, list.size() / pageSize, list.size()); }
From source file:io.vertigo.dynamo.plugins.collections.lucene.RamLuceneIndex.java
License:Apache License
private DtList<D> translateDocs(final IndexSearcher searcher, final TopDocs topDocs, final int skip, final int top) throws IOException { final DtField idField = dtDefinition.getIdField().get(); final DtList<D> dtcResult = new DtList<>(dtDefinition); final int resultLength = topDocs.scoreDocs.length; if (resultLength > skip) { for (int i = skip; i < Math.min(skip + top, resultLength); i++) { final ScoreDoc scoreDoc = topDocs.scoreDocs[i]; final Document document = searcher.doc(scoreDoc.doc); dtcResult.add(getDtObjectIndexed(document.get(idField.getName()))); }//from w ww. ja va 2 s . co m } return dtcResult; }
From source file:ir.IndexAndSearch_1106022654.java
public static void main(String[] args) throws IOException, ParseException { //menyimpan daftar id semua dokumen ArrayList idR = new ArrayList(); //menyimpan daftar judul semua dokumen ArrayList judulR = new ArrayList(); //menyimpa daftar teks untuk semua dokumen ArrayList teksR = new ArrayList(); String id = "haha"; String judul = "haha"; String teks = "haha"; //membaca data semua dokumen String fileTeks = "D:\\Kuliah\\Sem 9\\Perolehan Informasi\\2015 - 2016\\Tugas\\Tugas 2\\Teks.txt"; File file = new File(fileTeks); BufferedReader br = new BufferedReader(new FileReader(file)); try {//from w ww.java 2 s.c o m StringBuilder sb = new StringBuilder(); String line = br.readLine(); String gabung = ""; String gabung2 = ""; boolean flag = false; while (line != null) { gabung = gabung + " " + line; gabung2 = gabung2 + " " + line; final Pattern patternID = Pattern.compile("<ID>(.+?)</ID>"); final Pattern patternJ = Pattern.compile("<JUDUL>(.+?)</JUDUL>"); boolean flag2; /** * penjelasan mengenai teknik untagging teks * setiap membaca </DOK> berarti satu dokumen berhasil dibaca sehingga kita bersiap membaca dokumen selanjutnya * flag diset false, karena sebelum membaca <DOK> tidak ada data yang disimpan */ String[] arg = line.trim().split(" "); if (line.equalsIgnoreCase("</DOK>")) { flag2 = false; } /** * setiap membaca <DOK>, kita bersiap untuk menyimpan data satu dokumen, sehingga flag di set menjadi true */ if (line.equalsIgnoreCase("<DOK>")) { flag2 = true; } /** * selama flag di set true, kita membaca dan mengambil semua data yang berada di dalam tagging. * untuk tahap ini, kita membaca id dan judul */ if (flag2 = true) { //untagging <ID></ID> final Matcher matcherD = patternID.matcher(line); if (matcherD.matches()) { id = matcherD.group(1); idR.add(id); //System.out.println("id ---> " + matcherD.group(1)); } //untagging <JUDUL></JUDUL> final Matcher matcherJ = patternJ.matcher(line); if (matcherJ.matches()) { judul = matcherJ.group(1); judulR.add(judul); //System.out.println("Judul ---> " + matcherJ.group(1)); } } /** * setiap selesai membaca judul (artinya program menemukan tagging </JUDUL>) kita bersiap membaca teks * untuk membaca teks, algoritma sedikit berbeda dengan pembacaan id dan judul karena teks terdiri dari beberapa line. * idenya, kita membaca semua line dalam tag <TEKS> terlebih dahulu dan menyimpannya ke dalam variabel tipe string. * setelah menemukan tag </DOK> artinya semua teks dalam satu dokumen selesai di baca, kita menghilangkan tag yang tidak perlu * kemudian menambahkannya ke ArrayList. * variabel gabung merupakan variabel yang digunakan untuk menyimpan line teks yang dibaca, sehingga setelah semua teks dalam satu dokumen * selesai dibaca, program kembali mengeset nilainya menjadi string kosong. */ for (int i = 0; i < arg.length; i++) { if (arg[i].endsWith("</JUDUL>")) { gabung2 = ""; } //untagging <TEKS></TEKS> if (arg[i].compareTo("</DOK>") == 0) { //System.out.println("masuk"); gabung2 = gabung2.replaceAll("<TEKS>", ""); gabung2 = gabung2.replaceAll("</TEKS>", ""); gabung2 = gabung2.replaceAll("</DOK>", ""); teksR.add(gabung2); //System.out.println("Teks ---> " + gabung2); //System.out.println(id+judul+teks); gabung = ""; gabung2 = ""; } } line = br.readLine(); } //menghitung jumlah masing - masing id,teks, dan judul untuk memastikan sudah sama System.out.println("size teks: " + teksR.size()); System.out.println("size id: " + idR.size()); System.out.println("size judul: " + judulR.size()); String everything = sb.toString(); } finally { br.close(); } //inisialisasi analyzer StandardAnalyzer analyzer = new StandardAnalyzer(); //Directory index = FSDirectory.open(new File("D:\\Kuliah\\Sem 9\\Perolehan Informasi\\2015 - 2016\\Tugas\\Tugas 2\\index-dir.txt")); //membuat direktori untuk menyimpan hasil file index Directory index = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(analyzer); //inisialisasi IndexWriter dan menentukan lokasi penyimpanan hasil Index IndexWriter writer = new IndexWriter(index, config); //menambahkan dokumen ke dalam IndexWriter for (int d = 0; d < idR.size(); d++) { //in.indexer(); id = (String) idR.get(d); judul = (String) judulR.get(d); teks = (String) teksR.get(d); // System.out.println("id--->" + id); // System.out.println("judul--->" + judul); // System.out.println("teks--->" + teks); addDok(writer, id, judul, teks); } writer.close(); //baca file query File fileQ = new File("D:\\Kuliah\\Sem 9\\Perolehan Informasi\\2015 - 2016\\Tugas\\Tugas 2\\Query.txt"); BufferedReader brQ = new BufferedReader(new FileReader(fileQ)); //inisialisasi arraylist untuk menyimpan daftar query ArrayList listQ = new ArrayList(); //menyimpan query yang sedang dibaca String lineQ = brQ.readLine(); while (lineQ != null) { //System.out.println("QUERY"); //masukkan query yang sedang dibaca ke daftar query listQ.add(lineQ); lineQ = brQ.readLine(); } //menginisialisasi lokasi output Writer tulis = new BufferedWriter(new OutputStreamWriter(new FileOutputStream( "D:\\Kuliah\\Sem 9\\Perolehan Informasi\\2015 - 2016\\Tugas\\Tugas 2\\1106022654_Hasil_1.txt"), "utf-8")); //searching berdasarkan file query yang diberikan for (int qu = 0; qu < listQ.size(); qu++) { System.out.println("Query ---> " + listQ.get(qu)); String querystr = (String) listQ.get(qu); //inisialisasi query Query query = new QueryParser("teks", analyzer).parse(querystr); //inisialisasi jumlah dokumen yang ditampilkan int hitsPerPage = 10; //membaca file index IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); //inisialisasi penyimpanan hasil pencarian dokumen dengan membatasi jumlahnya hanya 10 dokumen dengan score tertinggi TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPerPage); //melakukan proses searching berdasarkan query yang diberikan dan disimpan ke collector searcher.search(query, collector); //mengambil 10 hasil tertinggi ScoreDoc[] hits = collector.topDocs().scoreDocs; //tulis hasil ke file tulis.write(querystr + "\n"); System.out.println("Query string: " + querystr); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out.println((i + 1) + ". " + d.get("id") + "\t" + d.get("judul")); tulis.write((i + 1) + ". " + d.get("id") + "\t" + d.get("judul") + "\n"); } tulis.flush(); //tulis.close(); } }
From source file:IR.LuceneModel.java
public static void main(String[] args) throws IOException { System.out.println(/*from w ww .ja v a2 s .c om*/ "Enter the FULL path where the index will be created: (e.g. /Usr/index or c:\\temp\\index)"); String indexLocation = null; BufferedReader br = new BufferedReader(new InputStreamReader(System.in)); String s = br.readLine(); LuceneModel indexer = null; try { indexLocation = s; indexer = new LuceneModel(s); } catch (Exception ex) { System.out.println("Cannot create index..." + ex.getMessage()); System.exit(-1); } // =================================================== // read input from user until he enters q for quit // =================================================== while (!s.equalsIgnoreCase("q")) { try { System.out.println( "Enter the FULL path to add into the index (q=quit): (e.g. /home/mydir/docs or c:\\Users\\mydir\\docs)"); System.out.println("[Acceptable file types: .xml, .html, .html, .txt]"); s = br.readLine(); if (s.equalsIgnoreCase("q")) { break; } // try to add file into the index indexer.indexFileOrDirectory(s); } catch (Exception e) { System.out.println("Error indexing " + s + " : " + e.getMessage()); } } // =================================================== // after adding, we always have to call the // closeIndex, otherwise the index is not created // =================================================== indexer.closeIndex(); // ========================================================= // Now search // ========================================================= IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(indexLocation))); IndexSearcher searcher = new IndexSearcher(reader); TopScoreDocCollector collector;//= TopScoreDocCollector.create(100, true); s = ""; ScoreDoc[] hits; while (!s.equalsIgnoreCase("q")) { try { System.out.println("Enter the search query (q=quit):"); s = br.readLine(); if (s.equalsIgnoreCase("q")) { break; } File queryFile = new File(s); BufferedReader r = new BufferedReader(new FileReader(queryFile)); String query;//= r.readLine(); int count = 0; String q1 = "LuceneResults.txt"; File luceneFile = new File(q1); luceneFile.createNewFile(); FileWriter writer = new FileWriter(luceneFile); while ((query = r.readLine()) != null) { try { count++; collector = TopScoreDocCollector.create(100, true); QueryParser parser = new QueryParser(Version.LUCENE_47, "contents", analyzer); Query q = parser.parse(query.replace('/', ' ')); searcher.search(q, collector); hits = collector.topDocs().scoreDocs; int query_id; query_id = count; // change this for new query System.out.println("Found " + hits.length + " hits."); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = searcher.doc(docId); System.out.println(query_id + ". " + d.get("path").replaceAll(".html", "") + " " + (i + 1) + " " + hits[i].score + " LuceneModel"); writer.write(String .format(query_id + " " + "Q0" + " " + d.get("path").replaceAll(".html", "") + " " + (i + 1) + " " + hits[i].score + " LuceneModel\n")); writer.flush(); // System.out.println(fmt.format(""+query_id,"Q0",""+d.get("path"),""+(i + 1),""+hits[i].score)); } } catch (Exception e) { // System.out.println(e.printStackTrace()); e.printStackTrace(); continue; } // 5. term stats --> watch out for which "version" of the term // must be checked here instead! Term termInstance = new Term("contents", s); long termFreq = reader.totalTermFreq(termInstance); long docCount = reader.docFreq(termInstance); System.out.println(s + " Term Frequency " + termFreq + " - Document Frequency " + docCount); // r.close(); } r.close(); writer.close(); } catch (Exception e) { System.out.println("Error searching " + s + " : " + e.getMessage()); break; } } }
From source file:irlucene.CFCRetrieval.java
public double[] precisionRecal(QueryData query, ScoreDoc[] hits) { double precisionRecall[] = { 0, 0 }; int relevantAnswers; int answers;/* w w w. ja v a 2 s . c o m*/ int relevants; IndexReader indexReader; IndexSearcher indexSearcher; try { indexReader = DirectoryReader.open(index); indexSearcher = new IndexSearcher(indexReader); relevantAnswers = 0; answers = hits.length; relevants = query.getNumberRelevantDocuments(); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document doc = indexSearcher.doc(docId); for (int d : query.getRelevantDocuments()) { if (Integer.valueOf(doc.get("recordNumber").trim()) == d) { relevantAnswers++; } } } if (answers == 0 || relevants == 0) { precisionRecall[0] = 0; precisionRecall[1] = 0; } else { precisionRecall[0] = (double) relevantAnswers / answers; precisionRecall[1] = (double) relevantAnswers / relevants; } } catch (IOException ex) { Logger.getLogger(CFCRetrieval.class.getName()).log(Level.SEVERE, null, ex); } return precisionRecall; }
From source file:irlucene.CFCRetrieval.java
public double pAtN(QueryData query, ScoreDoc[] hits, int n) { double pAtN = 0; int limit;/*from w w w. j a va 2 s. c o m*/ int relevantAnswers; IndexReader indexReader; IndexSearcher indexSearcher; try { indexReader = DirectoryReader.open(index); indexSearcher = new IndexSearcher(indexReader); relevantAnswers = 0; if (n > hits.length) { limit = hits.length; } else { limit = n; } for (int i = 0; i < limit; ++i) { int docId = hits[i].doc; Document doc = indexSearcher.doc(docId); for (int d : query.getRelevantDocuments()) { if (d == Integer.valueOf(doc.get("recordNumber").trim())) { relevantAnswers++; } } } pAtN = 100 * relevantAnswers / n; } catch (IOException ex) { Logger.getLogger(CFCRetrieval.class.getName()).log(Level.SEVERE, null, ex); } return pAtN; }
From source file:irlucene.CFCRetrieval.java
public void printHits(ScoreDoc[] hits) { try {/*from w w w . java 2 s .c o m*/ IndexReader indexReader = DirectoryReader.open(index); IndexSearcher indexSearcher = new IndexSearcher(indexReader); for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document d = indexSearcher.doc(docId); System.out.println((i + 1) + " " + d.get("paperNumber") + "\t" + d.get("title")); } } catch (IOException ex) { Logger.getLogger(CFCRetrieval.class.getName()).log(Level.SEVERE, null, ex); } }