List of usage examples for org.apache.lucene.index IndexReader numDocs
public abstract int numDocs();
From source file:lucee.runtime.search.lucene2.LuceneSearchCollection.java
License:Open Source License
private int _countDocs(String col) { // FUTURE add support for none file resources int totalDocs; IndexReader reader = null; try {//from ww w. j av a2 s . c o m reader = _getReader(col, true); totalDocs = reader.numDocs(); } catch (Exception e) { return 0; } finally { closeEL(reader); } return totalDocs; }
From source file:lucee.runtime.search.lucene2.LuceneSearchCollection.java
License:Open Source License
@Override public int getDocumentCount(String id) { try {//from w w w . j av a2 s .com if (!_getIndexDirectory(id, false).exists()) return 0; IndexReader r = null; int num = 0; try { r = _getReader(id, false); num = r.numDocs(); } finally { close(r); } return num; } catch (Exception e) { } return 0; }
From source file:luceneindexcreator.LuceneIndexCreator.java
public static void main(String[] args) { try {//from ww w .ja v a 2 s . c om Comparator<TermStats> comparator = new Comparator<TermStats>() { @Override public int compare(TermStats t1, TermStats t2) { return t1.totalTermFreq < t2.totalTermFreq ? -1 : 1; }; }; LuceneIndexCreator lw = new LuceneIndexCreator(INDEX_PATH, JSON_FILE_PATH_WEEKLY); lw.createIndex(); //Check the index has been created successfully Directory indexDirectory = FSDirectory.open(new File(INDEX_PATH)); IndexReader indexReader = DirectoryReader.open(indexDirectory); int numDocs = indexReader.numDocs(); /* Keywords SORTED BY DATE * //generation of Date indexes and the associated json files of keyword freq * ArrayList<String> indexedDates = new ArrayList<String>(); * for ( int i = 0; i < numDocs; i++){ * Document document = indexReader.document(i); * //indexRader.toString(i); * String date = document.get("Date"); * if (!contains(indexedDates, date)) { * LuceneIndexCreator lwd = new LuceneIndexCreator(PARENT_INDEX_PATH + date, JSON_FILE_PATH_WEEKLY); * lwd.createSubindexDate(date); * indexedDates.add(date); * } * Directory indexDirectoryDate = FSDirectory.open(new File(PARENT_INDEX_PATH + date)); * IndexReader indexReaderDate = DirectoryReader.open(indexDirectoryDate); * HighFreqTerms hTerms = new HighFreqTerms(); * JSONArray termResultJSONArray = new JSONArray(); * TermStats[] hTermResult = hTerms.getHighFreqTerms(indexReaderDate, 50, "content", comparator); * //creating json object * for (int j = 0; j < hTermResult.length; j++) { * JSONObject termResultJSON = new JSONObject(); * termResultJSON.put("Term", hTermResult[j].termtext.utf8ToString()); * termResultJSON.put("Frequency", hTermResult[j].totalTermFreq); * termResultJSONArray.add(termResultJSON); * //System.out.println("" + hTermResult[i].termtext.utf8ToString() + " " + hTermResult[i].totalTermFreq); * } * //outputting json * try(FileWriter file = new FileWriter("JSONResults/" + date + ".json")) { * file.write(termResultJSONArray.toJSONString()); * System.out.println("Successfully Copied JSON Object to File..."); * System.out.println("\nJSON Object: " + termResultJSONArray ); * * } * //date = date.substring(5, 16).trim(); * //System.out.println( "d=" + document.get("content")); * //System.out.println("date: " + date + "."); * } */ // keywords sorted by week //generation of Date indexes and the associated json files of keyword freq ArrayList<String> indexedWeeks = new ArrayList<String>(); //creating subindexes for each week for (int i = 0; i < numDocs; i++) { Document document = indexReader.document(i); //System.out.println(document.get("Week_number")); //System.out.println(document.get("Date")); String weekNum = document.get("Week_number"); //System.out.println(weekNum); if (!contains(indexedWeeks, weekNum)) { LuceneIndexCreator lww = new LuceneIndexCreator(PARENT_INDEX_PATH + "week" + weekNum, JSON_FILE_PATH_WEEKLY); lww.createSubindexWeek(weekNum); indexedWeeks.add(weekNum); } } JSONArray json1 = new JSONArray(); for (String weekNum : indexedWeeks) { Directory indexDirectoryWeek = FSDirectory.open(new File(PARENT_INDEX_PATH + "week" + weekNum)); IndexReader indexReaderWeek = DirectoryReader.open(indexDirectoryWeek); HighFreqTerms hTerms = new HighFreqTerms(); TermStats[] hTermResult = hTerms.getHighFreqTerms(indexReaderWeek, 100, "content", comparator); //creating json object JSONObject json2 = new JSONObject(); json2.put("Week", weekNum); JSONArray json3 = new JSONArray(); for (int j = 0; j < hTermResult.length; j++) { JSONObject json4 = new JSONObject(); json4.put("Term", hTermResult[j].termtext.utf8ToString()); json4.put("Frequency", hTermResult[j].totalTermFreq); json3.add(json4); } json2.put("Terms", json3); json1.add(json2); } //output json try (FileWriter file = new FileWriter("JSONResults/allWeeklyTerms.json")) { file.write(json1.toJSONString()); System.out.println("Successfully Copied JSON Object to File..."); System.out.println("\nJSON Object: " + json1); } // gets term freq for all docs HighFreqTerms hTerms = new HighFreqTerms(); JSONArray termResultJSONArray = new JSONArray(); //array of termStats TermStats[] hTermResult = hTerms.getHighFreqTerms(indexReader, 150, "content", comparator); //creating json object for (int i = 0; i < hTermResult.length; i++) { JSONObject termResultJSON = new JSONObject(); termResultJSON.put("Term", hTermResult[i].termtext.utf8ToString()); termResultJSON.put("Frequency", hTermResult[i].totalTermFreq); termResultJSONArray.add(termResultJSON); //System.out.println("" + hTermResult[i].termtext.utf8ToString() + " " + hTermResult[i].totalTermFreq); } //outputting json try (FileWriter file = new FileWriter("JSONResults/allTermFreq.json")) { file.write(termResultJSONArray.toJSONString()); System.out.println("Successfully Copied JSON Object to File..."); System.out.println("\nJSON Object: " + termResultJSONArray); } } catch (Exception e) { e.printStackTrace(); } }
From source file:lucli.LuceneMethods.java
License:Apache License
public void info() throws java.io.IOException { IndexReader indexReader = IndexReader.open(indexName); getFieldInfo();/*from w w w. j a v a 2 s . c o m*/ numDocs = indexReader.numDocs(); message("Index has " + numDocs + " documents "); message("All Fields:" + fields.toString()); message("Indexed Fields:" + indexedFields.toString()); if (IndexReader.isLocked(indexName)) { message("Index is locked"); } //IndexReader.getCurrentVersion(indexName); //System.out.println("Version:" + version); indexReader.close(); }
From source file:net.conquiris.index.DefaultWriter.java
License:Apache License
/** * Default writer.//from w ww. j a va 2 s . c o m * @param log Log context. * @param writer Lucene index writer to use. * @param overrideCheckpoint Whether to override the checkpoint. * @param checkpoint Overridden checkpoint value. * @param created Whether the index has been requested to be created. */ DefaultWriter(ContextLog log, IndexWriter writer, boolean overrideCheckpoint, @Nullable String checkpoint, boolean created) throws IndexException { this.log = checkNotNull(log, "The log context must be provided"); this.writer = checkNotNull(writer, "The index writer must be provided"); this.properties = new MapMaker().makeMap(); this.keys = Collections.unmodifiableSet(this.properties.keySet()); // Read properties try { final Map<String, String> commitData; final int documents; if (created) { commitData = ImmutableMap.of(); documents = 0; } else { final IndexReader reader = IndexReader.open(writer, false); boolean threw = true; try { Map<String, String> data = reader.getIndexCommit().getUserData(); if (overrideCheckpoint) { final Map<String, String> modified = Maps.newHashMap(); if (data != null) { modified.putAll(data); } modified.put(IndexInfo.CHECKPOINT, checkpoint); commitData = modified; } else { commitData = data; } documents = reader.numDocs(); threw = false; } finally { Closeables.close(reader, threw); } } this.indexInfo = IndexInfo.fromMap(documents, commitData); this.checkpoint = this.indexInfo.getCheckpoint(); this.targetCheckpoint = this.indexInfo.getTargetCheckpoint(); this.properties.putAll(this.indexInfo.getProperties()); } catch (LockObtainFailedException e) { indexStatus.compareAndSet(IndexStatus.OK, IndexStatus.LOCKED); throw new IndexException(e); } catch (CorruptIndexException e) { indexStatus.compareAndSet(IndexStatus.OK, IndexStatus.CORRUPT); throw new IndexException(e); } catch (IOException e) { indexStatus.compareAndSet(IndexStatus.OK, IndexStatus.IOERROR); throw new IndexException(e); } catch (RuntimeException e) { indexStatus.compareAndSet(IndexStatus.OK, IndexStatus.ERROR); throw e; } }
From source file:net.dataninja.ee.textEngine.MoreLikeThisQuery.java
License:Apache License
/** * Condense the same term in multiple fields into a single term with a * total score.//from ww w .j av a 2 s .co m * * @param words a map of words keyed on the word(String) with Int objects as the values. */ private Map condenseTerms(IndexReader indexReader, Map words) throws IOException { HashMap termScoreMap = new HashMap(); // For reference in score calculations, get the total # of docs in index int numDocs = indexReader.numDocs(); // For each term... Iterator it = words.keySet().iterator(); while (it.hasNext()) { Term term = (Term) it.next(); // Filter out words that don't occur enough times in the source doc int tf = ((Int) words.get(term)).x; if (minTermFreq > 0 && tf < minTermFreq) continue; // Filter out words that don't occur in enough docs int docFreq = indexReader.docFreq(term); if (minDocFreq > 0 && docFreq < minDocFreq) continue; // Filter out words that occur in too many docs if (maxDocFreq > 0 && docFreq > maxDocFreq) continue; // Handle potential index update problem if (docFreq == 0) continue; // Calculate a score for this term. float idf = similarity.idf(docFreq, numDocs); float score = tf * idf; // Boost if necessary. Float found = (Float) boostMap.get(term.field()); if (found != null) score *= found.floatValue(); // Add the score to our map. String word = term.text(); if (!termScoreMap.containsKey(word)) termScoreMap.put(word, new Flt()); Flt cnt = (Flt) termScoreMap.get(word); cnt.x += score; } return termScoreMap; }
From source file:net.jforum.actions.LuceneAdminActions.java
License:Open Source License
/** * Shows the main statistics page/*from w w w.j a v a 2s . c om*/ */ public void list() { IndexReader indexReader = null; ReaderProvider readerProvider = null; try { SearchFactory searchFactory = Search.createFullTextSession(this.sessionFactory.getCurrentSession()) .getSearchFactory(); DirectoryProvider<?> directoryProvider = searchFactory.getDirectoryProviders(Post.class)[0]; readerProvider = searchFactory.getReaderProvider(); indexReader = readerProvider.openReader(directoryProvider); String indexDirectory = directoryProvider.getDirectory().toString(); indexDirectory = indexDirectory.substring(indexDirectory.indexOf('@') + 1); boolean indexExists = IndexReader.indexExists(indexDirectory); this.propertyBag.put("indexExists", indexExists); if (indexExists) { this.propertyBag.put("numberOfDocs", indexReader.numDocs()); this.propertyBag.put("indexLocation", indexDirectory); this.propertyBag.put("totalMessages", this.forumRepository.getTotalMessages()); this.propertyBag.put("isLocked", IndexReader.isLocked(indexDirectory)); this.propertyBag.put("lastModified", new Date(IndexReader.lastModified(indexDirectory))); } } catch (IOException e) { throw new ForumException(e); } finally { if (readerProvider != null && indexReader != null) { readerProvider.closeReader(indexReader); } } }
From source file:net.jforum.controllers.LuceneAdminController.java
License:Open Source License
/** * Shows the main statistics page/* www.jav a 2 s .c o m*/ */ public void list() { IndexReader indexReader = null; ReaderProvider readerProvider = null; try { SearchFactory searchFactory = Search.createFullTextSession(this.sessionFactory.getCurrentSession()) .getSearchFactory(); DirectoryProvider<?> directoryProvider = searchFactory.getDirectoryProviders(Post.class)[0]; readerProvider = searchFactory.getReaderProvider(); indexReader = readerProvider.openReader(directoryProvider); String indexDirectory = directoryProvider.getDirectory().toString(); indexDirectory = indexDirectory.substring(indexDirectory.indexOf('@') + 1); boolean indexExists = IndexReader.indexExists(indexDirectory); this.result.include("indexExists", indexExists); if (indexExists) { this.result.include("numberOfDocs", indexReader.numDocs()); this.result.include("indexLocation", indexDirectory); this.result.include("totalMessages", this.forumRepository.getTotalMessages()); this.result.include("isLocked", IndexReader.isLocked(indexDirectory)); this.result.include("lastModified", new Date(IndexReader.lastModified(indexDirectory))); } } catch (IOException e) { throw new ForumException(e); } finally { if (readerProvider != null && indexReader != null) { readerProvider.closeReader(indexReader); } } }
From source file:net.semanticmetadata.lire.benchmarking.TestNister.java
License:Open Source License
public void testDocLengthIDF(String pathName) throws IOException { df = new double[1024]; int[] len = new int[10200]; avgDocLength = 0;/*from w w w . j av a2 s . co m*/ double numDocs = 0; for (int i = 0; i < df.length; i++) df[i] = 0; for (int i = 0; i < len.length; i++) len[i] = 0; IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(pathName))); for (int i = 0; i < reader.numDocs(); i++) { // if (!reader.isDeleted(i)) { String s = reader.document(i).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; String f = reader.document(i).getValues("featureSURFHistogram")[0]; SimpleFeature sf = new SimpleFeature(); sf.setStringRepresentation(f); double[] h = sf.getDoubleHistogram(); for (int j = 0; j < h.length; j++) { if (h[j] > 0.0) df[j] += 1; // add to the document frequency avgDocLength += h[j]; len[i] += h[j]; } numDocs += 1; // } } // System.out.println("avgDocLength = " + avgDocLength/numDocs); // for (int i = 0; i < df.length; i++) // System.out.print(df[i] + ","); // System.out.println(); // for (int i = 0; i < len.length; i++) // System.out.print(len[i] + ", "); // System.out.println(); }
From source file:net.semanticmetadata.lire.benchmarking.TestWang.java
License:Open Source License
public void tttestGetDistribution() throws IOException { BufferedWriter bw = new BufferedWriter(new FileWriter("data.csv")); IndexReader reader = IndexReader.open(FSDirectory.open(new File(indexPath))); // get the first document: // if (!IndexReader.indexExists(reader.directory())) // throw new FileNotFoundException("No index found at this specific location."); CEDD cedd1 = new CEDD(); FCTH fcth1 = new FCTH(); CEDD cedd2 = new CEDD(); FCTH fcth2 = new FCTH(); JCD jcd1 = new JCD(); JCD jcd2 = new JCD(); String[] cls;//ww w . ja v a2s. co m // Needed for check whether the document is deleted. Bits liveDocs = MultiFields.getLiveDocs(reader); int docs = reader.numDocs(); for (int i = 0; i < docs; i++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document doc = reader.document(i); cls = doc.getValues(DocumentBuilder.FIELD_NAME_CEDD); if (cls != null && cls.length > 0) cedd1.setStringRepresentation(cls[0]); cls = doc.getValues(DocumentBuilder.FIELD_NAME_FCTH); if (cls != null && cls.length > 0) fcth1.setStringRepresentation(cls[0]); for (int j = i + 1; j < docs; j++) { if (reader.hasDeletions() && !liveDocs.get(i)) continue; // if it is deleted, just ignore it. Document doc2 = reader.document(j); cls = doc2.getValues(DocumentBuilder.FIELD_NAME_CEDD); if (cls != null && cls.length > 0) cedd2.setStringRepresentation(cls[0]); cls = doc2.getValues(DocumentBuilder.FIELD_NAME_FCTH); if (cls != null && cls.length > 0) fcth2.setStringRepresentation(cls[0]); jcd1.init(cedd1, fcth1); jcd2.init(cedd2, fcth2); bw.write(cedd1.getDistance(cedd2) + ";" + fcth1.getDistance(fcth2) + ";" + jcd1.getDistance(jcd2) + "\n"); } if (i % 100 == 0) System.out.println(i + " entries processed ... "); } bw.close(); }