List of usage examples for org.apache.lucene.index IndexReader numDocs
public abstract int numDocs();
From source file:com.doculibre.constellio.services.ImportExportServicesImpl.java
License:Open Source License
@SuppressWarnings("unchecked") @Override/* w ww .jav a2 s . c o m*/ public void convertData(Directory directory, OutputStream output) { try { WritableWorkbook workbook = Workbook.createWorkbook(output); WritableSheet sheet = workbook.createSheet("fields", 0); WritableFont arial10font = new WritableFont(WritableFont.ARIAL, 10); WritableCellFormat arial10format = new WritableCellFormat(arial10font); IndexReader indexReader = DirectoryReader.open(directory); // { // int column = 0; // for (String fieldName : (Collection<String>) indexReader.getFieldNames()) { // Label label = new Label(column, 0, fieldName, arial10format); // sheet.addCell(label); // column++; // } // } int row = 1; for (int i = 0; i < indexReader.numDocs() /* && i != 502 */; i++) { Document document = indexReader.document(i); int column = 0; for (IndexableField field : document.getFields()) { if (row == 1) { Label label = new Label(column, 0, field.name(), arial10format); sheet.addCell(label); } if (field != null && field.fieldType().stored() && field.binaryValue() == null) { String indexedContent = field.stringValue(); indexedContent = convertText(indexedContent); Label label = new Label(column, row, indexedContent, arial10format); sheet.addCell(label); } column++; } row++; // if (i == 502) { // break; // } } indexReader.close(); workbook.write(); workbook.close(); } catch (IOException e) { throw new RuntimeException(e); } catch (RowsExceededException e) { throw new RuntimeException(e); } catch (WriteException e) { throw new RuntimeException(e); } }
From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java
License:Open Source License
private TopDocs performSearch(IndexSearcher searcher, Query query, IndexReader reader, Integer maxResultsCount, Sort sort) throws IOException { final TopDocs docs; int resultsCount = maxResultsCount == null ? reader.numDocs() : maxResultsCount; if (sort == null) { docs = searcher.search(query, resultsCount); } else {//w w w. ja v a 2 s.c o m docs = searcher.search(query, resultsCount, sort); } return docs; }
From source file:com.esri.gpt.catalog.lucene.stats.Collectable.java
License:Apache License
/** * Determines the number of documents considered during stats collection. * <br/>If the document filter bitset is not null, the count will be based upon it's cardinality. * <br/>Otherwise the count will be based upon the number of docs returned by the reader. * @param reader the index reader//from www .j a v a2 s . c o m * @param documentFilterBitSet the bitset represing the subset of documents being cosidered * @return the total number of documents */ protected long determineNumberOfDocsConsidered(IndexReader reader, OpenBitSet documentFilterBitSet) { if (documentFilterBitSet != null) { this.setNumberOfDocsConsidered(documentFilterBitSet.cardinality()); } else { this.setNumberOfDocsConsidered(reader.numDocs()); } return this.getNumberOfDocsConsidered(); }
From source file:com.flaptor.hounder.util.Idx.java
License:Apache License
public static void main(String arg[]) throws Exception { check(arg.length > 1, null);//from w w w . j a v a2s. c om String cmd = arg[0]; File idx = new File(arg[1]); if ("list".equals(cmd)) { int num = (arg.length > 2) ? Integer.parseInt(arg[2]) : -1; check(idx.exists(), "Index dir not found"); IndexReader reader = IndexReader.open(idx); int docs = reader.numDocs(); int max = reader.maxDoc(); System.err.println("Index contains " + docs + " documents plus " + (max - docs) + " deleted."); if (num > -1) { if (num == 0) num = docs; for (int i = 0; i < max && i < num; i++) { System.out.println("----------------------------------------"); if (!reader.isDeleted(i)) { Document doc = reader.document(i); List flds = doc.getFields(); Iterator iter = flds.iterator(); while (iter.hasNext()) { Field fld = (Field) iter.next(); String attr = (fld.isIndexed() ? ",i" : "") + (fld.isStored() ? ",s" : "") + (fld.isTokenized() ? ",t" : ""); System.out.println(fld.name() + attr + ": " + fld.stringValue()); } } } reader.close(); System.out.println(); } } else if ("search".equals(cmd)) { check(idx.exists(), "Index dir not found"); check(arg.length > 3, "Not enough arguments"); String field = arg[2]; String value = arg[3]; IndexSearcher searcher = new IndexSearcher(IndexReader.open(idx)); ScorelessHitCollector collector = new HashSetScorelessHitCollector(); searcher.search(new TermQuery(new Term(field, value)), collector); Set<Integer> docIds = collector.getMatchingDocuments(); System.out.println("\nNumber of hits: " + docIds.size() + "\n"); for (Integer docId : docIds) { Document doc = searcher.doc(docId); List flds = doc.getFields(); Iterator iter = flds.iterator(); while (iter.hasNext()) { Field fld = (Field) iter.next(); System.out.println(fld.name() + ": " + fld.stringValue()); } } searcher.close(); System.out.println(); } else if ("delete".equals(cmd)) { check(idx.exists(), "Index dir not found"); check(arg.length > 3, "Not enough arguments"); String field = arg[2]; String value = arg[3]; IndexReader reader = IndexReader.open(idx); reader.deleteDocuments(new Term(field, value)); reader.close(); } else if ("optimize".equals(cmd)) { IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.optimize(); writer.close(); } else if ("merge".equals(cmd)) { check(arg.length == 3, "not enough parameters"); File idx2 = new File(arg[2]); check(idx.exists(), "Index dir 1 not found"); check(idx2.exists(), "Index dir 2 not found"); IndexReader reader = IndexReader.open(idx2); IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.addIndexes(new IndexReader[] { reader }); writer.close(); reader.close(); } else if ("term-count".equals(cmd)) { check(arg.length == 3, "not enough parameters"); check(idx.exists(), "Index dir not found"); IndexReader reader = IndexReader.open(idx); String field = arg[2]; int count = 0; TermEnum terms = reader.terms(); while (terms.next()) { Term term = terms.term(); if (term.field().equals(field)) count++; } terms.close(); reader.close(); System.out.println("Found " + count + " different values for field " + field); } else if ("hit-count".equals(cmd)) { check(arg.length > 3, "Not enough arguments"); check(idx.exists(), "Index dir not found"); String field = arg[2]; String value = arg[3]; IndexSearcher searcher = new IndexSearcher(IndexReader.open(idx)); CountingHitCollector collector = new CountingHitCollector(); searcher.search(new TermQuery(new Term(field, value)), collector); System.out.println("\nNumber of hits: " + collector.getDocCount() + "\n"); searcher.close(); } else if ("uncompound".equals(cmd)) { IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.setUseCompoundFile(false); writer.optimize(); writer.close(); } else if ("compound".equals(cmd)) { IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.setUseCompoundFile(true); writer.optimize(); writer.close(); } else if ("terms".equals(cmd)) { check(arg.length == 3, "not enough parameters"); check(idx.exists(), "Index dir not found"); String field = arg[2]; IndexReader reader = IndexReader.open(idx); TermEnum terms = reader.terms(); while (terms.next()) { Term t = terms.term(); if (t.field().equals(field)) { System.out.println(t.text()); } } } }
From source file:com.foundationdb.lucene.SimpleTest.java
License:Open Source License
private void assertDocumentsAreThere(Directory dir, int amount) throws IOException { IndexReader reader = DirectoryReader.open(dir); try {/* w w w . j a v a2 s . c o m*/ assertEquals(amount, reader.numDocs()); } finally { reader.close(); } }
From source file:com.github.parzonka.esa.indexing.IndexInverter.java
License:Apache License
public void createInvertedIndex() throws CorruptIndexException, IOException, SimilarityException { deleteQuietly(invertedIndexDir);//from w w w . j av a 2 s .c o m invertedIndexDir.mkdirs(); final IndexReader reader = IndexReader.open(FSDirectory.open(luceneIndexDir)); final int maxDocumentDistributionCount = (int) Math.ceil(maxCorpusDistribution * reader.numDocs()); final TermEnum termEnum = reader.terms(); final Set<String> terms = new HashSet<String>(); int totalTerms = 0; while (termEnum.next()) { final String term = termEnum.term().text(); final int termDocFreq = termEnum.docFreq(); if (minDocumentFrequency <= termDocFreq && termDocFreq < maxDocumentDistributionCount) { terms.add(term); } totalTerms++; } reader.close(); System.out.println("Using " + terms.size() + " terms out of " + totalTerms); System.out.println("Input Lucene index: " + luceneIndexDir); final LuceneVectorReader luceneVectorReader = new LuceneVectorReader(luceneIndexDir); configureLuceneVectorReader(luceneVectorReader); System.out.println("Output inverted index: " + invertedIndexDir); final VectorIndexWriter vectorIndexWriter = new VectorIndexWriter(invertedIndexDir, luceneVectorReader.getConceptCount()); final ProgressMeter progressMeter = new ProgressMeter(terms.size()); for (String term : terms) { final Vector vector = luceneVectorReader.getVector(term); vectorIndexWriter.put(term, vector); progressMeter.next(); System.out.println("[" + term + "] " + progressMeter); } vectorIndexWriter.close(); }
From source file:com.github.rnewson.couchdb.lucene.Search.java
License:Apache License
public static void main(final String[] args) { Utils.LOG.info("searcher started."); try {//from w ww . j av a2s .c o m IndexReader reader = null; IndexSearcher searcher = null; final Scanner scanner = new Scanner(System.in); while (scanner.hasNextLine()) { if (reader == null) { // Open a reader and searcher if index exists. if (IndexReader.indexExists(Config.INDEX_DIR)) { reader = IndexReader.open(NIOFSDirectory.getDirectory(Config.INDEX_DIR), true); searcher = new IndexSearcher(reader); } } final String line = scanner.nextLine(); // Process search request if index exists. if (searcher == null) { System.out.println(Utils.error(503, "couchdb-lucene not available.")); continue; } final JSONObject obj; try { obj = JSONObject.fromObject(line); } catch (final JSONException e) { System.out.println(Utils.error(400, "invalid JSON.")); continue; } if (!obj.has("query")) { System.out.println(Utils.error(400, "No query found in request.")); continue; } final JSONObject query = obj.getJSONObject("query"); final boolean reopen = !"ok".equals(query.optString("stale", "not-ok")); // Refresh reader and searcher if necessary. if (reader != null && reopen) { final IndexReader newReader = reader.reopen(); if (reader != newReader) { Utils.LOG.info("Lucene index was updated, reopening searcher."); final IndexReader oldReader = reader; reader = newReader; searcher = new IndexSearcher(reader); oldReader.close(); } } try { // A query. if (query.has("q")) { final JSONArray path = obj.getJSONArray("path"); if (path.size() < 3) { System.out.println(Utils.error(400, "No design document in path.")); continue; } if (path.size() < 4) { System.out.println(Utils.error(400, "No view name in path.")); } if (path.size() > 4) { System.out.println(Utils.error(400, "Extra path info in request.")); } assert path.size() == 4; final SearchRequest request = new SearchRequest(obj); final String result = request.execute(searcher); System.out.println(result); continue; } // info. if (query.keySet().isEmpty()) { final JSONObject json = new JSONObject(); json.put("current", reader.isCurrent()); json.put("disk_size", size(reader.directory())); json.put("doc_count", reader.numDocs()); json.put("doc_del_count", reader.numDeletedDocs()); final JSONArray fields = new JSONArray(); for (final Object field : reader.getFieldNames(FieldOption.INDEXED)) { if (((String) field).startsWith("_")) continue; fields.add(field); } json.put("fields", fields); json.put("last_modified", IndexReader.lastModified(Config.INDEX_DIR)); json.put("optimized", reader.isOptimized()); final JSONObject info = new JSONObject(); info.put("code", 200); info.put("json", json); final JSONObject headers = new JSONObject(); headers.put("Content-Type", "text/plain"); info.put("headers", headers); System.out.println(info); } } catch (final Exception e) { System.out.println(Utils.error(400, e)); } System.out.println(Utils.error(400, "Bad request.")); } if (reader != null) { reader.close(); } } catch (final Exception e) { System.out.println(Utils.error(500, e.getMessage())); } Utils.LOG.info("searcher stopped."); }
From source file:com.globalsight.ling.lucene.Index.java
License:Apache License
/** * Returns the number of documents stored in this index. *//*www . j a v a 2 s . com*/ public int getDocumentCount() throws IOException { synchronized (m_state) { if (m_state != STATE_OPENED) { throw new IOException("index is not available"); } } try { m_lock.readLock().acquire(); try { IndexReader reader = LuceneCache.getLuceneCache(m_directory).getIndexReader(); int result = reader.numDocs(); return result; } finally { m_lock.readLock().release(); } } catch (InterruptedException ex) { throw new IOException(ex.getMessage()); } }
From source file:com.jaeksoft.searchlib.index.IndexStatistics.java
License:Open Source License
protected IndexStatistics(IndexReader indexReader) { maxDoc = indexReader.maxDoc();/*w w w .j av a 2 s .c om*/ numDocs = indexReader.numDocs(); numDeletedDocs = indexReader.numDeletedDocs(); hasDeletions = indexReader.hasDeletions(); isOptimized = indexReader.isOptimized(); }
From source file:com.jamespot.glifpix.index.ResourceDocument.java
License:Open Source License
protected static Set<Long> getCRCs(IndexReader ir) throws IOException { logger.info("Loading CRCs"); int nbDocs = ir.numDocs(); float loadFactor = .75f; int initCapacity = ((int) (nbDocs / loadFactor)) + 1000; Set<Long> toRet = new HashSet<Long>(initCapacity, loadFactor); int nbCrc = 0; TermEnum te = ir.terms(new Term("crc")); for (boolean hasNext = true; hasNext; hasNext = te.next()) { nbCrc++;//from ww w .j a v a2 s .co m if (nbCrc % 100000 == 0) { logger.info(nbCrc + " CRCs loaded"); } Term t = te.term(); if (!t.field().equalsIgnoreCase("crc")) break; Long crc = Long.parseLong(t.text()); toRet.add(crc); } logger.info("Total CRCs loaded :" + nbCrc); return toRet; }