List of usage examples for org.apache.lucene.index IndexReader numDocs
public abstract int numDocs();
From source file:com.searchcode.app.service.CodeSearcher.java
License:Open Source License
/** * Returns the total number of documents that are present in the index at this time *//* w w w .j a v a2 s . co m*/ public int getTotalNumberDocumentsIndexed() { int numDocs = 0; try { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(this.INDEXPATH))); numDocs = reader.numDocs(); reader.close(); } catch (Exception ex) { LOGGER.info(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage()); } return numDocs; }
From source file:com.searchcode.app.service.IndexService.java
License:Open Source License
/** * Returns the total number of documents that are present in the index at this time *///from www . ja v a2 s. c o m public int getTotalNumberDocumentsIndexed() { int numDocs = 0; try { IndexReader reader = DirectoryReader.open(FSDirectory.open(this.INDEX_LOCATION)); numDocs = reader.numDocs(); reader.close(); } catch (IOException ex) { this.logger.info(" caught a " + ex.getClass() + "\n with message: " + ex.getMessage()); } return numDocs; }
From source file:com.stimulus.archiva.search.StandardSearch.java
License:Open Source License
public long getTotalMessageCount(Volume volume) throws MessageSearchException { if (volume == null) throw new MessageSearchException("assertion failure: null volume", logger); //logger.debug("get total no emails {indexpath='"+volume.getIndexPath()+"'}"); int count = 0; File indexDir = new File(volume.getIndexPath()); if (!indexDir.exists()) return 0; IndexReader indexReader = null; try {//from w ww . j a v a2s .co m indexReader = IndexReader.open(indexDir); count += indexReader.numDocs(); indexReader.close(); } catch (IOException e) { logger.debug("failed to open index to calculate total email count", e); //throw new MessageSearchException("failed to open index to calculate total email count",e,logger); } return count; }
From source file:com.sun.javaee.blueprints.carstore.search.UpdateIndex.java
License:Berkeley License
public static Document deleteIndex(String indexFile, String sxDocId) throws IOException { // get document to update, so data can be added SearchIndex si = new SearchIndex(); si.query(indexFile, sxDocId, "uid"); Hits hits = si.getHitsNative();/*from w w w. ja va2s.co m*/ // should only have one return if (hits.length() > 1) { // exception, should only be one throw new IllegalStateException("Should only have one document in index with uid=" + sxDocId); } Document doc = (Document) hits.doc(0); if (bDebug) System.out.println("HAVE DOC " + doc); // Read index and delete targeted doc through a term IndexReader reader = IndexReader.open(indexFile); // delete document by term int delcnt = reader.deleteDocuments(new Term("uid", sxDocId)); if (bDebug) { System.out.println("return Number of items deleted:" + delcnt); int deleted = 0; for (int ii = 0; ii < reader.numDocs(); ii++) { if (reader.isDeleted(ii)) { deleted++; } } if (bDebug) System.out.println("Number of deleted items in the whole index:" + deleted); } reader.close(); return doc; }
From source file:com.sxc.lucene.index.IndexingTest.java
License:Apache License
public void testIndexReader() throws IOException { IndexReader reader = DirectoryReader.open(directory); assertEquals(ids.length, reader.maxDoc()); // 8 assertEquals(ids.length, reader.numDocs()); // 8 reader.close();/*from w w w .j a va 2 s .co m*/ }
From source file:CopulaResources.TermCooccurence.java
public static TermCooccurence generateCooccurencebyClass(IndexReader indexReader, String classFieldName, String textFieldName, Analyzer analyzer, int minFreq, int maxFreq) throws IOException { System.out.println(":::Generating Term-Pair List:::"); TermCooccurence CooccurList = new TermCooccurence(); Terms classes = MultiFields.getTerms(indexReader, classFieldName); if (classes != null) { TermsEnum classesEnum = classes.iterator(); BytesRef nextClass;/*from ww w . j ava2 s . co m*/ while ((nextClass = classesEnum.next()) != null) { if (nextClass.length > 0) { Term term = new Term(classFieldName, nextClass); String tpClass = nextClass.utf8ToString(); BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST)); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TopDocs topDocs; topDocs = indexSearcher.search(booleanQuery.build(), indexReader.numDocs()); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { IndexableField[] storableFields = indexSearcher.doc(scoreDoc.doc).getFields(textFieldName); for (IndexableField singleStorableField : storableFields) { if (singleStorableField != null) { BytesRef text = new BytesRef(singleStorableField.stringValue()); generateCooccurences(text.utf8ToString(), analyzer, CooccurList, tpClass); } } } CooccurList.trimbyFreq(tpClass, minFreq, maxFreq); } } } System.out.println(":::Generation Complete:::"); return CooccurList; }
From source file:CopulaResources.TermCooccurence.java
public static void generateCooccurencebyClass(IndexReader indexReader, String classFieldName, String textFieldName, Analyzer analyzer, int minFreq, int maxFreq, Path saveDir) throws IOException { System.out.println(":::Generating Term-Pair List:::"); TermCooccurence CooccurList = new TermCooccurence(); Terms classes = MultiFields.getTerms(indexReader, classFieldName); if (classes != null) { TermsEnum classesEnum = classes.iterator(); BytesRef nextClass;//from ww w . j ava 2s .c o m while ((nextClass = classesEnum.next()) != null) { if (nextClass.length > 0) { Term term = new Term(classFieldName, nextClass); String tpClass = nextClass.utf8ToString(); BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST)); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TopDocs topDocs; topDocs = indexSearcher.search(booleanQuery.build(), indexReader.numDocs()); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { IndexableField[] storableFields = indexSearcher.doc(scoreDoc.doc).getFields(textFieldName); for (IndexableField singleStorableField : storableFields) { if (singleStorableField != null) { BytesRef text = new BytesRef(singleStorableField.stringValue()); generateCooccurences(text.utf8ToString(), analyzer, CooccurList, tpClass); } } } CooccurList.trimbyFreq(tpClass, minFreq, maxFreq); } } } CooccurList.savetoFile(saveDir); System.out.println(":::Generation Complete:::"); }
From source file:CopulaResources.TermCooccurence.java
public static void generateNCooccurencebyClass(IndexReader indexReader, String classFieldName, String textFieldName, Analyzer analyzer, String direction, double percent, Path saveDir) throws IOException { System.out.println(":::Generating Term-Pair List:::"); TermCooccurence CooccurList = new TermCooccurence(); Terms classes = MultiFields.getTerms(indexReader, classFieldName); if (classes != null) { TermsEnum classesEnum = classes.iterator(); BytesRef nextClass;//from ww w .java2 s.c o m while ((nextClass = classesEnum.next()) != null) { if (nextClass.length > 0) { Term term = new Term(classFieldName, nextClass); String tpClass = nextClass.utf8ToString(); BooleanQuery.Builder booleanQuery = new BooleanQuery.Builder(); booleanQuery.add(new BooleanClause(new TermQuery(term), BooleanClause.Occur.MUST)); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TopDocs topDocs; topDocs = indexSearcher.search(booleanQuery.build(), indexReader.numDocs()); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { IndexableField[] storableFields = indexSearcher.doc(scoreDoc.doc).getFields(textFieldName); for (IndexableField singleStorableField : storableFields) { if (singleStorableField != null) { BytesRef text = new BytesRef(singleStorableField.stringValue()); generateCooccurences(text.utf8ToString(), analyzer, CooccurList, tpClass); } } } CooccurList.trimbyPercent(tpClass, direction, percent); } } } CooccurList.savetoFile(saveDir); System.out.println(":::Generation Complete:::"); }
From source file:de.linguatools.disco.Compositionality.java
License:Apache License
/** * Find the most similar words in the DISCO word space for an input word * vector. While the word vector can represent a multi-token word (if it was * produced by one of the methods // w w w. ja va 2 s.co m * <code>Compositionality.composeWordVectors()</code>) the most * similar words will only be single-token words from the index.<br/> * <b>Warning</b>: This method is very time consuming and should only be * used with word spaces that have been loaded into memory! * @param wordvector input word vector * @param disco DISCO word space * @param simMeasure * @return List of all words (with their similarity values) whose similarity * with the <code>wordvector</code> is greater than zero, ordered by * similarity value (highest value first). * @throws java.io.IOException */ public ArrayList<ReturnDataCol> similarWords(HashMap<String, Float> wordvector, DISCO disco, SimilarityMeasures simMeasure) throws IOException { // hole einen IndexReader fuer das indexDir IndexReader ir = disco.getIndexReader(); // durchlaufe alle Dokumente ArrayList<ReturnDataCol> result = new ArrayList(); for (int i = 0; i < ir.numDocs(); i++) { Document doc = null; try { doc = ir.document(i); } catch (CorruptIndexException ex) { continue; } catch (IOException ex) { continue; } // Wortvektor zu Wort Nr. i holen String word = doc.get("word"); HashMap<String, Float> wv = getWordvector(word, disco); // hnlichkeit zwischen Wortvektoren berechnen float sim = semanticSimilarity(wordvector, wv, simMeasure); if (sim > 0.0F) { ReturnDataCol r = new ReturnDataCol(word, sim); result.add(r); } } // nach hchstem hnlichkeitswert sortieren Collections.sort(result, new ValueComparator()); return result; }
From source file:de.linguatools.disco.DISCO.java
License:Apache License
/*************************************************************************** * returns the number of Documents (i.e. words) in the index. * @return number of words in index/*from www . j a v a2 s .c o m*/ * @throws java.io.IOException */ public int numberOfWords() throws IOException { // erzeuge einen IndexReader fuer das indexDir IndexReader ir = IndexReader.open(FSDirectory.open(new File(indexName))); // Hole Anzahl Dokumente im Index return (ir.numDocs()); }