List of usage examples for org.apache.lucene.index IndexReader maxDoc
public abstract int maxDoc();
From source file:com.github.tteofili.looseen.MinHashClassifier.java
License:Apache License
public MinHashClassifier(IndexReader reader, String textField, String categoryField, int min, int hashCount, int hashSize) { this.min = min; this.hashCount = hashCount; this.hashSize = hashSize; try {/* w ww. j a va 2 s. co m*/ Analyzer analyzer = createMinHashAnalyzer(min, hashCount, hashSize); IndexWriterConfig config = new IndexWriterConfig(analyzer); directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, config); for (int i = 0; i < reader.maxDoc(); i++) { Document document = new Document(); Document d = reader.document(i); String textValue = d.getField(textField).stringValue(); String categoryValue = d.getField(categoryField).stringValue(); document.add(new TextField(TEXT_FIELD, textValue, Field.Store.NO)); document.add(new StringField(CLASS_FIELD, categoryValue, Field.Store.YES)); writer.addDocument(document); } writer.commit(); writer.close(); } catch (IOException e) { throw new RuntimeException(e); } BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); }
From source file:com.github.tteofili.looseen.Test20NewsgroupsClassification.java
License:Apache License
@Test public void test20Newsgroups() throws Exception { String indexProperty = System.getProperty("index"); if (indexProperty != null) { try {/*from w w w . j a va 2 s . c om*/ index = Boolean.valueOf(indexProperty); } catch (Exception e) { // ignore } } String splitProperty = System.getProperty("split"); if (splitProperty != null) { try { split = Boolean.valueOf(splitProperty); } catch (Exception e) { // ignore } } Path mainIndexPath = Paths.get(INDEX + "/original"); Directory directory = FSDirectory.open(mainIndexPath); Path trainPath = Paths.get(INDEX + "/train"); Path testPath = Paths.get(INDEX + "/test"); Path cvPath = Paths.get(INDEX + "/cv"); FSDirectory cv = null; FSDirectory test = null; FSDirectory train = null; IndexReader testReader = null; if (split) { cv = FSDirectory.open(cvPath); test = FSDirectory.open(testPath); train = FSDirectory.open(trainPath); } if (index) { delete(mainIndexPath); if (split) { delete(trainPath, testPath, cvPath); } } IndexReader reader = null; List<Classifier<BytesRef>> classifiers = new LinkedList<>(); try { Analyzer analyzer = new StandardAnalyzer(); if (index) { System.out.format("Indexing 20 Newsgroups...%n"); long startIndex = System.currentTimeMillis(); IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(analyzer)); buildIndex(new File(PREFIX + "/20n/20_newsgroups"), indexWriter); long endIndex = System.currentTimeMillis(); System.out.format("Indexed %d pages in %ds %n", indexWriter.maxDoc(), (endIndex - startIndex) / 1000); indexWriter.close(); } if (split && !index) { reader = DirectoryReader.open(train); } else { reader = DirectoryReader.open(directory); } if (index && split) { // split the index System.out.format("Splitting the index...%n"); long startSplit = System.currentTimeMillis(); DatasetSplitter datasetSplitter = new DatasetSplitter(0.1, 0); datasetSplitter.split(reader, train, test, cv, analyzer, false, CATEGORY_FIELD, BODY_FIELD, SUBJECT_FIELD, CATEGORY_FIELD); reader.close(); reader = DirectoryReader.open(train); // using the train index from now on long endSplit = System.currentTimeMillis(); System.out.format("Splitting done in %ds %n", (endSplit - startSplit) / 1000); } final long startTime = System.currentTimeMillis(); classifiers.add(new KNearestNeighborClassifier(reader, new ClassicSimilarity(), analyzer, null, 1, 0, 0, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, null, analyzer, null, 1, 0, 0, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new ClassicSimilarity(), analyzer, null, 3, 0, 0, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new AxiomaticF1EXP(), analyzer, null, 3, 0, 0, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new AxiomaticF1LOG(), analyzer, null, 3, 0, 0, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new LMDirichletSimilarity(), analyzer, null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new LMJelinekMercerSimilarity(0.3f), analyzer, null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, null, analyzer, null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new DFRSimilarity(new BasicModelG(), new AfterEffectB(), new NormalizationH1()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new DFRSimilarity(new BasicModelP(), new AfterEffectL(), new NormalizationH3()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new IBSimilarity(new DistributionSPL(), new LambdaDF(), new Normalization.NoNormalization()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new IBSimilarity(new DistributionLL(), new LambdaTTF(), new NormalizationH1()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new MinHashClassifier(reader, BODY_FIELD, CATEGORY_FIELD, 15, 1, 100)); classifiers.add(new MinHashClassifier(reader, BODY_FIELD, CATEGORY_FIELD, 30, 3, 300)); classifiers.add(new MinHashClassifier(reader, BODY_FIELD, CATEGORY_FIELD, 10, 1, 100)); classifiers.add(new KNearestFuzzyClassifier(reader, new LMJelinekMercerSimilarity(0.3f), analyzer, null, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new IBSimilarity(new DistributionLL(), new LambdaTTF(), new NormalizationH1()), analyzer, null, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new ClassicSimilarity(), analyzer, null, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new ClassicSimilarity(), analyzer, null, 3, CATEGORY_FIELD, BODY_FIELD)); classifiers .add(new KNearestFuzzyClassifier(reader, null, analyzer, null, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers .add(new KNearestFuzzyClassifier(reader, null, analyzer, null, 3, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new AxiomaticF1EXP(), analyzer, null, 3, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new AxiomaticF1LOG(), analyzer, null, 3, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new BM25NBClassifier(reader, analyzer, null, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new CachingNaiveBayesClassifier(reader, analyzer, null, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new SimpleNaiveBayesClassifier(reader, analyzer, null, CATEGORY_FIELD, BODY_FIELD)); int maxdoc; if (split) { testReader = DirectoryReader.open(test); maxdoc = testReader.maxDoc(); } else { maxdoc = reader.maxDoc(); } System.out.format("Starting evaluation on %d docs...%n", maxdoc); ExecutorService service = Executors.newCachedThreadPool(); List<Future<String>> futures = new LinkedList<>(); for (Classifier<BytesRef> classifier : classifiers) { testClassifier(reader, startTime, testReader, service, futures, classifier); } for (Future<String> f : futures) { System.out.println(f.get()); } Thread.sleep(10000); service.shutdown(); } finally { if (reader != null) { reader.close(); } directory.close(); if (test != null) { test.close(); } if (train != null) { train.close(); } if (cv != null) { cv.close(); } if (testReader != null) { testReader.close(); } for (Classifier c : classifiers) { if (c instanceof Closeable) { ((Closeable) c).close(); } } } }
From source file:com.github.tteofili.looseen.TestWikipediaClassification.java
License:Apache License
@Test public void testItalianWikipedia() throws Exception { String indexProperty = System.getProperty("index"); if (indexProperty != null) { try {/* w w w .j av a 2s. c om*/ index = Boolean.valueOf(indexProperty); } catch (Exception e) { // ignore } } String splitProperty = System.getProperty("split"); if (splitProperty != null) { try { split = Boolean.valueOf(splitProperty); } catch (Exception e) { // ignore } } Path mainIndexPath = Paths.get(INDEX + "/original"); Directory directory = FSDirectory.open(mainIndexPath); Path trainPath = Paths.get(INDEX + "/train"); Path testPath = Paths.get(INDEX + "/test"); Path cvPath = Paths.get(INDEX + "/cv"); FSDirectory cv = null; FSDirectory test = null; FSDirectory train = null; DirectoryReader testReader = null; if (split) { cv = FSDirectory.open(cvPath); test = FSDirectory.open(testPath); train = FSDirectory.open(trainPath); } if (index) { delete(mainIndexPath); if (split) { delete(trainPath, testPath, cvPath); } } IndexReader reader = null; try { Collection<String> stopWordsList = Arrays.asList("di", "a", "da", "in", "per", "tra", "fra", "il", "lo", "la", "i", "gli", "le"); CharArraySet stopWords = new CharArraySet(stopWordsList, true); Analyzer analyzer = new ItalianAnalyzer(stopWords); if (index) { System.out.format("Indexing Italian Wikipedia...%n"); long startIndex = System.currentTimeMillis(); IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(analyzer)); importWikipedia(new File(PREFIX + "/itwiki/itwiki-20150405-pages-meta-current1.xml"), indexWriter); importWikipedia(new File(PREFIX + "/itwiki/itwiki-20150405-pages-meta-current2.xml"), indexWriter); importWikipedia(new File(PREFIX + "/itwiki/itwiki-20150405-pages-meta-current3.xml"), indexWriter); importWikipedia(new File(PREFIX + "/itwiki/itwiki-20150405-pages-meta-current4.xml"), indexWriter); long endIndex = System.currentTimeMillis(); System.out.format("Indexed %d pages in %ds %n", indexWriter.maxDoc(), (endIndex - startIndex) / 1000); indexWriter.close(); } if (split && !index) { reader = DirectoryReader.open(train); } else { reader = DirectoryReader.open(directory); } if (index && split) { // split the index System.out.format("Splitting the index...%n"); long startSplit = System.currentTimeMillis(); DatasetSplitter datasetSplitter = new DatasetSplitter(0.1, 0); for (LeafReaderContext context : reader.leaves()) { datasetSplitter.split(context.reader(), train, test, cv, analyzer, false, CATEGORY_FIELD, TEXT_FIELD, CATEGORY_FIELD); } reader.close(); reader = DirectoryReader.open(train); // using the train index from now on long endSplit = System.currentTimeMillis(); System.out.format("Splitting done in %ds %n", (endSplit - startSplit) / 1000); } final long startTime = System.currentTimeMillis(); List<Classifier<BytesRef>> classifiers = new LinkedList<>(); classifiers.add(new KNearestNeighborClassifier(reader, new ClassicSimilarity(), analyzer, null, 1, 0, 0, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new BM25Similarity(), analyzer, null, 1, 0, 0, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, null, analyzer, null, 1, 0, 0, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new LMDirichletSimilarity(), analyzer, null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new LMJelinekMercerSimilarity(0.3f), analyzer, null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new ClassicSimilarity(), analyzer, null, 3, 0, 0, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new ClassicSimilarity(), analyzer, null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new DFRSimilarity(new BasicModelG(), new AfterEffectB(), new NormalizationH1()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new DFRSimilarity(new BasicModelP(), new AfterEffectL(), new NormalizationH3()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new IBSimilarity(new DistributionSPL(), new LambdaDF(), new Normalization.NoNormalization()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new IBSimilarity(new DistributionLL(), new LambdaTTF(), new NormalizationH1()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 5, 1, 100)); classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 10, 1, 100)); classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 15, 1, 100)); classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 15, 3, 100)); classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 15, 3, 300)); classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 5, 3, 100)); classifiers.add(new KNearestFuzzyClassifier(reader, new ClassicSimilarity(), analyzer, null, 3, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new ClassicSimilarity(), analyzer, null, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new BM25Similarity(), analyzer, null, 3, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new BM25Similarity(), analyzer, null, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new BM25NBClassifier(reader, analyzer, null, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new CachingNaiveBayesClassifier(reader, analyzer, null, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new SimpleNaiveBayesClassifier(reader, analyzer, null, CATEGORY_FIELD, TEXT_FIELD)); int maxdoc; if (split) { testReader = DirectoryReader.open(test); maxdoc = testReader.maxDoc(); } else { maxdoc = reader.maxDoc(); } System.out.format("Starting evaluation on %d docs...%n", maxdoc); ExecutorService service = Executors.newCachedThreadPool(); List<Future<String>> futures = new LinkedList<>(); for (Classifier<BytesRef> classifier : classifiers) { final IndexReader finalReader = reader; final DirectoryReader finalTestReader = testReader; futures.add(service.submit(() -> { ConfusionMatrixGenerator.ConfusionMatrix confusionMatrix; if (split) { confusionMatrix = ConfusionMatrixGenerator.getConfusionMatrix(finalTestReader, classifier, CATEGORY_FIELD, TEXT_FIELD, 60000 * 30); } else { confusionMatrix = ConfusionMatrixGenerator.getConfusionMatrix(finalReader, classifier, CATEGORY_FIELD, TEXT_FIELD, 60000 * 30); } final long endTime = System.currentTimeMillis(); final int elapse = (int) (endTime - startTime) / 1000; return " * " + classifier + " \n * accuracy = " + confusionMatrix.getAccuracy() + "\n * precision = " + confusionMatrix.getPrecision() + "\n * recall = " + confusionMatrix.getRecall() + "\n * f1-measure = " + confusionMatrix.getF1Measure() + "\n * avgClassificationTime = " + confusionMatrix.getAvgClassificationTime() + "\n * time = " + elapse + " (sec)\n "; })); } for (Future<String> f : futures) { System.out.println(f.get()); } Thread.sleep(10000); service.shutdown(); } finally { try { if (reader != null) { reader.close(); } if (directory != null) { directory.close(); } if (test != null) { test.close(); } if (train != null) { train.close(); } if (cv != null) { cv.close(); } if (testReader != null) { testReader.close(); } } catch (Throwable e) { e.printStackTrace(); } } }
From source file:com.globalsight.ling.lucene.analysis.ChainedFilter.java
License:Apache License
/** * Delegates to each filter in the chain. * @param reader IndexReader/*from w w w . j a v a2s .c om*/ * @param logic Logical operation * @return BitSet */ private BitSet bits(IndexReader reader, int logic) throws IOException { BitSet result; int i = 0; // First AND operation takes place against a completely false // bitset and will always return zero results. Thanks to // Daniel Armbrust for pointing this out and suggesting workaround. if (logic == AND) { result = new BitSet(); //result = (BitSet)chain[i].bits(reader).clone(); ++i; } else { result = new BitSet(reader.maxDoc()); } for (; i < chain.length; i++) { doChain(result, reader, logic, chain[i]); } return result; }
From source file:com.globalsight.ling.lucene.analysis.ChainedFilter.java
License:Apache License
/** * Delegates to each filter in the chain. * @param reader IndexReader//from ww w. j av a2s. c o m * @param logic Logical operation * @return BitSet */ private BitSet bits(IndexReader reader, int[] logic) throws IOException { if (logic.length != chain.length) { throw new IllegalArgumentException("Invalid number of elements in logic array"); } BitSet result; int i = 0; // First AND operation takes place against a completely false // bitset and will always return zero results. Thanks to // Daniel Armbrust for pointing this out and suggesting workaround. if (logic[0] == AND) { result = new BitSet(); //result = (BitSet)chain[i].bits(reader).clone(); ++i; } else { result = new BitSet(reader.maxDoc()); } for (; i < chain.length; i++) { doChain(result, reader, logic[i], chain[i]); } return result; }
From source file:com.greplin.lucene.filter.PhraseFilter.java
License:Apache License
@Override public DocIdSet getDocIdSet(final IndexReader reader) throws IOException { List<IndexReader> subReaders = IndexReaders.gatherSubReaders(reader); PhraseFilterMatchList[] results = new PhraseFilterMatchList[subReaders.size()]; int matchCount = 0; int readerNumber = 0; for (IndexReader subReader : subReaders) { SortedSet<TermWithFrequency> termsOrderedByFrequency = Sets.newTreeSet(); for (int i = 0; i < this.terms.length; i++) { Term t = this.terms[i]; termsOrderedByFrequency.add(new TermWithFrequency(t, subReader.docFreq(t), i)); }/*from w w w . j a va 2s .c o m*/ PhraseFilterMatchList matches = null; TermPositions termPositions = subReader.termPositions(); try { for (TermWithFrequency term : termsOrderedByFrequency) { if (term.docFreq == 0) { break; } termPositions.seek(term.term); if (matches == null) { // If this is the first term, collect all matches that intersect // with the provided initial document set. Intersection intersection = this.intersectionProvider.get(reader); matches = new PhraseFilterMatchList(term.docFreq); while (intersection.advanceToNextIntersection(termPositions)) { int freq = termPositions.freq(); PhraseFilterIntList list = new PhraseFilterIntList(freq); for (int i = 0; i < freq; i++) { list.add(termPositions.nextPosition() - term.offset); } matches.add(termPositions.doc(), list); } } else { // Otherwise, intersect with the existing matches. matches.intersect(termPositions, term.offset); } if (matches.getCount() == 0) { break; } } } finally { termPositions.close(); } if (matches != null) { results[readerNumber] = matches; matchCount += matches.getCount(); } readerNumber++; } final int bitsPerIntPowerLogTwo = 5; // 2^5 = 32 if (matchCount > reader.maxDoc() >> bitsPerIntPowerLogTwo) { FixedBitSet result = new FixedBitSet(reader.maxDoc()); int readerOffset = 0; for (int readerIndex = 0; readerIndex < results.length; readerIndex++) { PhraseFilterMatchList matches = results[readerIndex]; if (matches != null) { int count = matches.getCount(); int[] docIds = matches.getDocIds(); for (int i = 0; i < count; i++) { result.set(docIds[i] + readerOffset); } } readerOffset += subReaders.get(readerIndex).maxDoc(); } return result; } else if (matchCount == 0) { return DocIdSets.EMPTY; } else { int[] result = new int[matchCount]; int base = 0; int readerOffset = 0; for (int readerIndex = 0; readerIndex < results.length; readerIndex++) { PhraseFilterMatchList matches = results[readerIndex]; if (matches != null) { int count = matches.getCount(); int[] docIds = matches.getDocIds(); for (int i = 0; i < count; i++) { result[base + i] = docIds[i] + readerOffset; } base += count; } readerOffset += subReaders.get(readerIndex).maxDoc(); } return new SortedIntArrayDocIdSet(result); } }
From source file:com.greplin.lucene.filter.TermsFilter.java
License:Apache License
@Override public DocIdSet getDocIdSet(final IndexReader reader) throws IOException { FixedBitSet result = new FixedBitSet(reader.maxDoc()); TermDocs td = reader.termDocs();/* w ww . ja va 2 s . c o m*/ try { for (Term term : this.terms) { td.seek(term); while (td.next()) { result.set(td.doc()); } } } finally { td.close(); } return result; }
From source file:com.ideabase.repository.core.index.ExtendedRangeFilter.java
License:Open Source License
@Override public BitSet bits(IndexReader reader) throws IOException { final BitSet bits = new BitSet(reader.maxDoc()); final TermEnum enumerator = (mIncludeLower ? reader.terms(new Term(mFieldName, mLowerTerm)) : reader.terms(new Term(mFieldName, ""))); try {/*from w w w. j a va 2s .c o m*/ if (enumerator.term() == null) { return bits; } boolean checkLower = mIncludeLower; final TermDocs termDocs = reader.termDocs(); try { do { final Term currentTerm = enumerator.term(); final boolean sameField = (currentTerm != null && currentTerm.field().equals(mFieldName)); if (sameField) { Long valueNumber = 0L; String valueText = currentTerm.text(); final boolean noLowerBound = !checkLower || mLowerTerm == null; final int compareResult; if (mNumberField) { valueNumber = Long.valueOf(valueText); compareResult = valueNumber.compareTo(mLowerTermNumber); } else { compareResult = valueText.compareTo(mLowerTerm); } if (noLowerBound || compareResult > 0) { checkLower = false; if (mUpperTerm != null) { final int compare; if (mNumberField) { compare = mUpperTermNumber.compareTo(valueNumber); } else { compare = mUpperTerm.compareTo(valueText); } /* * if beyond the upper term, or is exclusive and * this is equal to the upper term, break out */ if ((compare < 0) || (!mIncludeUpper && compare == 0)) { break; } } /* we have a good term, find the docs */ termDocs.seek(enumerator.term()); while (termDocs.next()) { bits.set(termDocs.doc()); } } } else { break; } } while (enumerator.next()); } finally { termDocs.close(); } } finally { enumerator.close(); } return bits; }
From source file:com.ikon.module.db.stuff.IndexHelper.java
License:Open Source License
public void checkIndexOnStartup() { //log.info("Observed event {1} from Thread {0}", Thread.currentThread().getName(), App.INIT_SUCCESS); // See if we need to rebuild the index during startup ... FullTextEntityManager ftEm = Search.getFullTextEntityManager(entityManager); SearchFactory searchFactory = ftEm.getSearchFactory(); ReaderProvider readerProvider = searchFactory.getReaderProvider(); IndexReader reader = readerProvider .openReader(searchFactory.getDirectoryProviders(NodeDocumentVersion.class)[0]); int maxDoc = 0; try {//from w w w.j a va2 s. c o m maxDoc = reader.maxDoc(); } finally { readerProvider.closeReader(reader); } if (maxDoc == 0) { log.warn("No objects indexed ... rebuilding Lucene search index from database ..."); long _exit = 0L; long _entr = System.currentTimeMillis(); try { int docs = doRebuildIndex(); _exit = System.currentTimeMillis(); log.info("Took " + (_exit - _entr) + " (ms) to re-build the index containing " + docs + " documents."); } catch (Exception exc) { if (exc instanceof RuntimeException) { throw (RuntimeException) exc; } else { throw new RuntimeException(exc); } } // build the spell checker index off of the HS index. buildSpellCheckerIndex(searchFactory); } }
From source file:com.ikon.servlet.admin.ListIndexesServlet.java
License:Open Source License
/** * List Lucene indexes// w ww. j a v a 2s . c om */ @SuppressWarnings("unchecked") private void showLuceneDocument(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException { boolean showTerms = WebUtils.getBoolean(request, "showTerms"); int id = WebUtils.getInt(request, "id", 0); FullTextSession ftSession = null; ReaderProvider rProv = null; Session session = null; IndexReader idx = null; List<Map<String, String>> fields = new ArrayList<Map<String, String>>(); try { session = HibernateUtil.getSessionFactory().openSession(); ftSession = Search.getFullTextSession(session); SearchFactory sFactory = ftSession.getSearchFactory(); rProv = sFactory.getReaderProvider(); DirectoryProvider<Directory>[] dirProv = sFactory.getDirectoryProviders(NodeDocument.class); idx = rProv.openReader(dirProv[0]); // Print Lucene documents if (!idx.isDeleted(id)) { Document doc = idx.document(id); String hibClass = null; for (Fieldable fld : doc.getFields()) { Map<String, String> field = new HashMap<String, String>(); field.put("name", fld.name()); field.put("value", fld.stringValue()); fields.add(field); if (fld.name().equals("_hibernate_class")) { hibClass = fld.stringValue(); } } /** * 1) Get all the terms using indexReader.terms() * 2) Process the term only if it belongs to the target field. * 3) Get all the docs using indexReader.termDocs(term); * 4) So, we have the term-doc pairs at this point. */ if (showTerms && NodeDocument.class.getCanonicalName().equals(hibClass)) { List<String> terms = new ArrayList<String>(); for (TermEnum te = idx.terms(); te.next();) { Term t = te.term(); if ("text".equals(t.field())) { for (TermDocs tds = idx.termDocs(t); tds.next();) { if (id == tds.doc()) { terms.add(t.text()); } } } } Map<String, String> field = new HashMap<String, String>(); field.put("name", "terms"); field.put("value", terms.toString()); fields.add(field); } } ServletContext sc = getServletContext(); sc.setAttribute("fields", fields); sc.setAttribute("id", id); sc.setAttribute("max", idx.maxDoc() - 1); sc.setAttribute("prev", id > 0); sc.setAttribute("next", id < idx.maxDoc() - 1); sc.setAttribute("showTerms", showTerms); sc.getRequestDispatcher("/admin/list_indexes.jsp").forward(request, response); } finally { if (rProv != null && idx != null) { rProv.closeReader(idx); } HibernateUtil.close(ftSession); HibernateUtil.close(session); } }