List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:it.polito.tellmefirst.lucene.KBIndexSearcher.java
License:Open Source License
/** * Get DBpedia concepts related to a specific URI from the Lucene Index. These DBpedia concepts appear as wikilink * once in the Wikipedia page identified by the URI. * * @param uri Input URI.// ww w . ja va2 s .c om * * In the previous versions of TellMeFirst, the getResidualBagOfConcepts method take as input the * URI of a DBpedia resource (String) and the language parameter (String). We have decide to * modify the API in order to separate this module from the core of TellMeFirst. * * @since 3.0.0.0. */ public ArrayList<String> getResidualBagOfConcepts(String uri) { LOG.debug("[getResidualBagOfConcepts] - BEGIN"); ArrayList<String> result = new ArrayList<String>(); try { MMapDirectory directory = new MMapDirectory(new File(residualKb)); IndexReader reader = IndexReader.open(directory, true); IndexSearcher is = new IndexSearcher(directory, true); Query q = new TermQuery(new Term("URI", uri)); TopDocs hits = is.search(q, 1); is.close(); if (hits.totalHits != 0) { int docId = hits.scoreDocs[0].doc; org.apache.lucene.document.Document doc = reader.document(docId); String wikilinksMerged = doc.getField("KB").stringValue(); String[] wikiSplits = wikilinksMerged.split(" "); //no prod LOG.debug("Residual bag of concepts for the resource " + uri + ": "); for (String s : wikiSplits) { result.add(s); //no prod LOG.debug("* " + s); } } reader.close(); } catch (Exception e) { LOG.error("[getResidualBagOfConcepts] - EXCEPTION: ", e); } LOG.debug("[getResidualBagOfConcepts] - END"); return result; }
From source file:it.unibz.instasearch.indexing.Searcher.java
License:Open Source License
private SearchResult collectSearchResults(SearchQuery searchQuery, IndexSearcher indexSearcher, IndexReader reader, Query query) throws IOException { int maxResults = reader.numDocs(); // all documents if (searchQuery.isLimited()) maxResults = searchQuery.getMaxResults(); Map<String, Float> searchTerms = extractTerms(query); TopDocCollector collector = new TopDocCollector(maxResults); if (searchQuery.isCanceled()) return null; indexSearcher.search(query, collector); // do the actual search if (collector.getTotalHits() == 0) return null; ScoreDoc[] hits = collector.topDocs().scoreDocs; ArrayList<SearchResultDoc> resultDocs = new ArrayList<SearchResultDoc>(hits.length); for (int i = 0; i < hits.length && !searchQuery.isCanceled(); i++) { int docId = hits[i].doc; float score = hits[i].score; Document doc = reader.document(docId); SearchResultDoc resultDoc = new SearchResultDoc(getIndexDir(), doc, docId, score); if (showMatchCounts) resultDoc.computeMatchCount(reader, searchTerms.keySet()); resultDocs.add(resultDoc);// w w w .j a va2s . com } return new SearchResult(searchQuery, resultDocs, searchTerms); }
From source file:it.unipd.dei.ims.falcon.indexing.Indexing.java
License:Apache License
/** * Prints information on the songs stored in the index in the specified path. * The specific information printed is that specified by the "option". * Available options are://from www . j a v a 2 s . c o m * <ul> * <li> "show_doc_ids": prints the internal index identifier of all the * segments in the index together with the title of the song which * the segment belongs to; * <li> "show_seg_ids": prints the internal index identifier of all the * segments in the index together with the segment identifier; * <li> "show_full_index": print all the distinct hashes in the index * and the posting list associated to each hash * * </ul> * * @param indexPath * full path to the folder where the index is stored * @param option * option which specified the requested information * * @throws IndexingException */ public static void indexUtils(String indexPath, String option) throws IndexingException { IndexReader reader; try { reader = IndexReader.open(new SimpleFSDirectory(new File(indexPath), null)); if (option.equals("show_doc_ids")) { // prints all the internal segment identifiers together with // the title of the song of the considered segment. // For instance, "[6] song2" denotes that the segment with // internal identifier "6" belongs to the song with title "song2" for (int d = 0; d < reader.numDocs(); d++) { System.out.println("[" + d + "] " + reader.document(d).getField("TITLE").stringValue()); } } else if (option.equals("show_seg_ids")) { // prints all the internal segment identifiers together with // the identifier of the segment. // For instance, "[8] song2_3" denotes that the third segment // of "song2" has internal identifier "8 for (int d = 0; d < reader.numDocs(); d++) { System.out.println("[" + d + "] " + reader.document(d).getField("ID").stringValue()); } } else if (option.equals("show_full_index")) { // print the full index, that is each hash with the associated // posting list TermEnum terms = reader.terms(); while (terms.next()) { System.out.print(terms.term() + " [SF: " + terms.docFreq() + "] <"); TermPositions poss = reader.termPositions(terms.term()); while (poss.next()) { System.out.print(" " + reader.document(poss.doc()).getField("ID").stringValue() + " (" + poss.freq() + "), "); } System.out.println(">"); } } } catch (CorruptIndexException ex) { throw new IndexingException("CorruptIndexException when accessing index for printing information"); } catch (IOException ex) { throw new IndexingException("IOException when accessing index for printing information"); } }
From source file:lia.chapter5.CategorizerTest.java
License:Apache License
private void buildCategoryVectors() throws IOException { IndexSearcher searcher = Utils.getBookIndexSearcher(); IndexReader reader = searcher.getIndexReader(); int maxDoc = reader.maxDoc(); System.out.println(maxDoc);/*from www. j a v a 2 s . c om*/ for (int i = 0; i < maxDoc; i++) { Document doc = reader.document(i); String category = doc.get("category"); System.out.println("\n" + doc.get("subject") + "\n"); Map vectorMap = (Map) categoryMap.get(category); if (vectorMap == null) { vectorMap = new TreeMap(); categoryMap.put(category, vectorMap); } Terms termsVector = reader.getTermVector(i, "subject"); addTermFreqToMap(vectorMap, termsVector); } }
From source file:liredemo.flickr.TestParallelIndexer.java
License:Open Source License
public void testSearchTime() throws IOException { ImageSearcher ceddImageSearcher = new VisualWordsImageSearcher(100, DocumentBuilder.FIELD_NAME_SURF + DocumentBuilder.FIELD_NAME_BOVW); // ImageSearcher ceddImageSearcher = ImageSearcherFactory.createCEDDImageSearcher(100); IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath))); // IndexReader reader = IndexReader.open(new RAMDirectory(FSDirectory.open(new File(indexPath + "-reduced")))); System.out.println("reader.maxDoc() = " + reader.maxDoc()); for (int i = 0; i < 10; i++) { long ms = System.currentTimeMillis(); ceddImageSearcher.search(reader.document(0), reader); System.out.println("s = " + (double) (System.currentTimeMillis() - ms) / 1000d); }/*from w w w.java 2 s .c om*/ }
From source file:liredemo.flickr.TestParallelIndexer.java
License:Open Source License
public void testMirFlickrSearch() throws IOException { float avgPrecision = 0f; float nullHits = 0f; int numDocsAll = 1000; for (int docId = 0; docId < numDocsAll; docId++) { int docNumber = docId; // ImageSearcher imageSearcher = new VisualWordsImageSearcher(6, DocumentBuilder.FIELD_NAME_SURF_LOCAL_FEATURE_HISTOGRAM_VISUAL_WORDS); ImageSearcher imageSearcher = ImageSearcherFactory.createCEDDImageSearcher(10); IndexReader reader = IndexReader.open(FSDirectory.open(new File("./index-mirflickr"))); // IndexReader reader = IndexReader.open(new RAMDirectory(FSDirectory.open(new File(indexPath + "-reduced")))); // System.out.println("reader.maxDoc() = " + reader.maxDoc()); ImageSearchHits hits = imageSearcher.search(reader.document(docNumber), reader); // LsaFilter lsa = new LsaFilter(CEDD.class, DocumentBuilder.FIELD_NAME_CEDD); // lsa.filter(hits, reader.document(docNumber)); HashMap<String, Float> hist = new HashMap<String, Float>(250); // System.out.println("query tags: " + reader.document(docNumber).getValues("tags")[0]); for (int i = 0; i < hits.length(); i++) { Document doc = hits.doc(i); String tags = doc.getValues("tags")[0]; String[] t = tags.split("\\s"); for (int j = 0; j < t.length; j++) { if (t[j].length() > 1 && j > 0) { if (hist.containsKey(t[j])) { // hist.put(t[j], hist.get(t[j]) + 1f / (float) Math.max(j / 2f, 1f)); hist.put(t[j], hist.get(t[j]) + 1f); } else { // hist.put(t[j], 1f / ((float) Math.max(j / 2f, 1f))); hist.put(t[j], 1f); }/*from www. j av a 2 s. c o m*/ } } } int countHits = 0; // IDF // for (Iterator<String> iterator = hist.keySet().iterator(); iterator.hasNext(); ) { // String s = iterator.next(); // int docFreq = reader.docFreq(new Term("tags", s)); // hist.put(s, (float) (hist.get(s)*(Math.log(25000f/(float)docFreq)))); // } for (int c = 0; c < 10; c++) { String t = getMaxItem(hist); String s = ""; if (t != null && reader.document(docNumber).getValues("tags") != null && reader.document(docNumber).getValues("tags").length > 0 && reader.document(docNumber).getValues("tags")[0].indexOf(t) > -1) { s = "* "; countHits++; } // System.out.println(s + t + " ("+hist.get(t)+")"); hist.remove(t); } // System.out.println("countHits = " + countHits); avgPrecision += (float) countHits / 10f; if (countHits < 1) nullHits++; } System.out.println("avgPrecision = " + avgPrecision / (float) numDocsAll); System.out.println("nullHits = " + nullHits / (float) numDocsAll); }
From source file:liredemo.LireDemoFrame.java
License:Open Source License
private void resultsTableMouseClicked(java.awt.event.MouseEvent evt) {//GEN-FIRST:event_resultsTableMouseClicked try {//from w ww . j a v a2 s. c om IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(textfieldIndexName.getText()))); if (evt.getButton() == MouseEvent.BUTTON3) { int imageID = resultsTable.rowAtPoint(evt.getPoint()) * 3 + resultsTable.columnAtPoint(evt.getPoint()); if (imageID >= 0 && imageID < tableModel.getHits().length()) { String file = reader.document(tableModel.getHits().documentID(imageID)) .getField(DocumentBuilder.FIELD_NAME_IDENTIFIER).stringValue(); try { Desktop.getDesktop().open(new File(file)); } catch (IOException ex) { Logger.getLogger(LireDemoFrame.class.getName()).log(Level.SEVERE, null, ex); } } } if (evt.getClickCount() == 2) { searchForDocument(resultsTable.getSelectedRow() * 3 + resultsTable.getSelectedColumn()); } } catch (IOException e) { e.printStackTrace(); } }
From source file:liredemo.LireDemoFrame.java
License:Open Source License
private void searchForDocument(int tableRow) { try {//from w ww . jav a 2 s . c o m IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(textfieldIndexName.getText()))); searchForDocument(reader.document(tableModel.getHits().documentID(tableRow))); } catch (IOException e) { e.printStackTrace(); } }
From source file:liredemo.LireDemoFrame.java
License:Open Source License
private void rerankFeatureActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_rerankFeatureActionPerformed RerankFilter filter = null;// www . j a v a2 s.c o m filter = new RerankFilter(ColorLayout.class, DocumentBuilder.FIELD_NAME_COLORLAYOUT); if (selectboxRerankFeature.getSelectedIndex() == 1) { // ScalableColor filter = new RerankFilter(ScalableColor.class, DocumentBuilder.FIELD_NAME_SCALABLECOLOR); } else if (selectboxRerankFeature.getSelectedIndex() == 2) { // EdgeHistogram filter = new RerankFilter(EdgeHistogram.class, DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM); } else if (selectboxRerankFeature.getSelectedIndex() == 3) { // ACC filter = new RerankFilter(AutoColorCorrelogram.class, DocumentBuilder.FIELD_NAME_AUTOCOLORCORRELOGRAM); } else if (selectboxRerankFeature.getSelectedIndex() == 4) { // CEDD filter = new RerankFilter(CEDD.class, DocumentBuilder.FIELD_NAME_CEDD); } else if (selectboxRerankFeature.getSelectedIndex() == 5) { // FCTH filter = new RerankFilter(FCTH.class, DocumentBuilder.FIELD_NAME_FCTH); } else if (selectboxRerankFeature.getSelectedIndex() == 6) { // JCD filter = new RerankFilter(JCD.class, DocumentBuilder.FIELD_NAME_JCD); } else if (selectboxRerankFeature.getSelectedIndex() == 7) { // SimpleColorHistogram filter = new RerankFilter(SimpleColorHistogram.class, DocumentBuilder.FIELD_NAME_COLORHISTOGRAM); } else if (selectboxRerankFeature.getSelectedIndex() == 8) { // Tamura filter = new RerankFilter(Tamura.class, DocumentBuilder.FIELD_NAME_TAMURA); } else if (selectboxRerankFeature.getSelectedIndex() == 9) { // Gabor filter = new RerankFilter(Gabor.class, DocumentBuilder.FIELD_NAME_GABOR); } else if (selectboxRerankFeature.getSelectedIndex() == 10) { // JPEG Coeffs filter = new RerankFilter(JpegCoefficientHistogram.class, DocumentBuilder.FIELD_NAME_JPEGCOEFFS); } else if (selectboxRerankFeature.getSelectedIndex() == 11) { // Joint Histogram filter = new RerankFilter(JointHistogram.class, DocumentBuilder.FIELD_NAME_JOINT_HISTOGRAM); } else if (selectboxRerankFeature.getSelectedIndex() == 12) { // OpponentHistogram filter = new RerankFilter(OpponentHistogram.class, DocumentBuilder.FIELD_NAME_OPPONENT_HISTOGRAM); } else if (selectboxRerankFeature.getSelectedIndex() == 13) { // LuminanceLayout filter = new RerankFilter(LuminanceLayout.class, DocumentBuilder.FIELD_NAME_LUMINANCE_LAYOUT); } else if (selectboxRerankFeature.getSelectedIndex() >= 14) { // PHOG filter = new RerankFilter(PHOG.class, DocumentBuilder.FIELD_NAME_PHOG); } try { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(textfieldIndexName.getText()))); tableModel.setHits( filter.filter(tableModel.hits, reader, reader.document(tableModel.hits.documentID(0))), null, reader); } catch (IOException e) { e.printStackTrace(); } }
From source file:liredemo.LireDemoFrame.java
License:Open Source License
private void rerankLsaActionPerformed(java.awt.event.ActionEvent evt) {//GEN-FIRST:event_rerankLsaActionPerformed LsaFilter filter = null;// w w w . j av a 2s . co m filter = new LsaFilter(ColorLayout.class, DocumentBuilder.FIELD_NAME_COLORLAYOUT); if (selectboxDocumentBuilder.getSelectedIndex() == 1) { // ScalableColor filter = new LsaFilter(ScalableColor.class, DocumentBuilder.FIELD_NAME_SCALABLECOLOR); } else if (selectboxDocumentBuilder.getSelectedIndex() == 2) { // EdgeHistogram filter = new LsaFilter(EdgeHistogram.class, DocumentBuilder.FIELD_NAME_EDGEHISTOGRAM); } else if (selectboxDocumentBuilder.getSelectedIndex() == 3) { // ACC filter = new LsaFilter(AutoColorCorrelogram.class, DocumentBuilder.FIELD_NAME_AUTOCOLORCORRELOGRAM); } else if (selectboxDocumentBuilder.getSelectedIndex() == 4) { // CEDD filter = new LsaFilter(CEDD.class, DocumentBuilder.FIELD_NAME_CEDD); } else if (selectboxDocumentBuilder.getSelectedIndex() == 5) { // FCTH filter = new LsaFilter(FCTH.class, DocumentBuilder.FIELD_NAME_FCTH); } else if (selectboxDocumentBuilder.getSelectedIndex() == 6) { // JCD filter = new LsaFilter(JCD.class, DocumentBuilder.FIELD_NAME_JCD); } else if (selectboxDocumentBuilder.getSelectedIndex() == 7) { // SimpleColorHistogram filter = new LsaFilter(SimpleColorHistogram.class, DocumentBuilder.FIELD_NAME_COLORHISTOGRAM); } else if (selectboxDocumentBuilder.getSelectedIndex() == 8) { // Tamura filter = new LsaFilter(Tamura.class, DocumentBuilder.FIELD_NAME_TAMURA); } else if (selectboxDocumentBuilder.getSelectedIndex() == 9) { // Gabor filter = new LsaFilter(Gabor.class, DocumentBuilder.FIELD_NAME_GABOR); } else if (selectboxDocumentBuilder.getSelectedIndex() == 10) { // JPEG Coeffs filter = new LsaFilter(JpegCoefficientHistogram.class, DocumentBuilder.FIELD_NAME_JPEGCOEFFS); } else if (selectboxDocumentBuilder.getSelectedIndex() == 12) { // Joint Histogram filter = new LsaFilter(JointHistogram.class, DocumentBuilder.FIELD_NAME_JOINT_HISTOGRAM); } else if (selectboxDocumentBuilder.getSelectedIndex() == 13) { // OpponentHistogram filter = new LsaFilter(OpponentHistogram.class, DocumentBuilder.FIELD_NAME_OPPONENT_HISTOGRAM); } else if (selectboxDocumentBuilder.getSelectedIndex() == 14) { // LuminanceLayout filter = new LsaFilter(LuminanceLayout.class, DocumentBuilder.FIELD_NAME_LUMINANCE_LAYOUT); } else if (selectboxDocumentBuilder.getSelectedIndex() >= 15) { // PHOG filter = new LsaFilter(PHOG.class, DocumentBuilder.FIELD_NAME_PHOG); } try { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(textfieldIndexName.getText()))); tableModel.setHits( filter.filter(tableModel.hits, reader, reader.document(tableModel.hits.documentID(0))), null, reader); } catch (IOException e) { e.printStackTrace(); } }