List of usage examples for org.apache.lucene.index IndexReader close
@Override public final synchronized void close() throws IOException
From source file:com.gauronit.tagmata.core.Indexer.java
License:Open Source License
public ArrayList getIndexNames() { IndexSearcher mainIndexSearcher = null; IndexReader ir = null; try {//from w ww . ja v a2 s .c om ir = IndexReader.open(FSDirectory.open(new File(indexDir + File.separator + MAIN_INDEX), new SimpleFSLockFactory(indexDir + File.separator + MAIN_INDEX))); mainIndexSearcher = new IndexSearcher(ir); ArrayList<String[]> indexNames = new ArrayList<String[]>(); mainIndexSearcher = new IndexSearcher(ir); Query q = new WildcardQuery(new Term("indexName", "*")); TopScoreDocCollector collector = TopScoreDocCollector.create(10000, false); mainIndexSearcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (ScoreDoc hit : hits) { Document doc = mainIndexSearcher.doc(hit.doc); String indexName = doc.get("indexName"); String indexDisplayName = doc.get("displayName"); indexNames.add(new String[] { indexName, indexDisplayName }); } return indexNames; } catch (Exception ex) { ex.printStackTrace(); return null; } finally { try { ir.close(); mainIndexSearcher.close(); ir = null; mainIndexSearcher = null; } catch (IOException e) { logger.info("Error: Unable to close index."); System.exit(0); e.printStackTrace(); } } }
From source file:com.gauronit.tagmata.core.Indexer.java
License:Open Source License
public ArrayList<CardSnapshot> getBookmarks() { ArrayList<CardSnapshot> cardSnaps = new ArrayList(); try {//from www . jav a 2s . c o m IndexReader ir = IndexReader.open(FSDirectory.open(new File(indexDir + File.separator + MAIN_INDEX), new SimpleFSLockFactory(indexDir + File.separator + MAIN_INDEX))); IndexSearcher mainIndexSearcher = new IndexSearcher(ir); Query q = new WildcardQuery(new Term("qcId", "*")); TopScoreDocCollector collector = TopScoreDocCollector.create(10000, false); mainIndexSearcher.search(q, collector); ScoreDoc[] hits = collector.topDocs().scoreDocs; for (ScoreDoc hit : hits) { Document doc = mainIndexSearcher.doc(hit.doc); IndexReader reader = IndexReader .open(FSDirectory.open(new File(indexDir + File.separator + doc.get("qcIndexName")), new SimpleFSLockFactory(indexDir + File.separator + doc.get("qcIndexName")))); IndexSearcher searcher = new IndexSearcher(reader); q = new TermQuery(new Term("id", doc.get("qcId"))); collector = TopScoreDocCollector.create(10000, false); searcher.search(q, collector); ScoreDoc[] hits2 = collector.topDocs().scoreDocs; doc = searcher.doc(hits2[0].doc); cardSnaps.add(new CardSnapshot("", doc)); reader.close(); searcher.close(); reader = null; searcher = null; } ir.close(); mainIndexSearcher.close(); ir = null; mainIndexSearcher = null; } catch (Exception ex) { ex.printStackTrace(); } return cardSnaps; }
From source file:com.gentics.cr.lucene.indexaccessor.DefaultIndexAccessor.java
License:Apache License
/** Release the reader that was opened for read-only operations. */ private synchronized void releaseReadingReader(IndexReader reader) { // do nothing if no reader was passed to the method or the reader was already released if (reader == null || readingReaderUseCount == 0) { return;/* ww w . j ava 2 s .c o m*/ } // check if reader is one of the old reading readers if (reader != cachedReadingReader && oldReadingReaders.get(reader) != null) { Integer oldReaderUseCount = oldReadingReaders.get(reader) - 1; if (oldReaderUseCount > 0) { oldReadingReaders.put(reader, oldReaderUseCount); } else { oldReadingReaders.remove(reader); // close unused old reader try { reader.close(); } catch (IOException e) { LOGGER.error("error closing reading Reader", e); } } } else if (reader != cachedReadingReader) { throw new IllegalArgumentException("reading reader not opened by this index accessor"); } else { readingReaderUseCount--; } notifyAll(); }
From source file:com.gentics.cr.lucene.indexaccessor.DefaultIndexAccessor.java
License:Apache License
/** * Reopens the cached reading Reader. This method assumes it is invoked in a * synchronized context.//from w w w . j a v a 2 s . com */ private void reopenReadingReader() { if (cachedReadingReader == null) { return; } LOGGER.debug("reopening cached reading reader"); IndexReader oldReader = cachedReadingReader; if (readingReaderUseCount > 0) { // if the reading reader is currently in use, save it with use count in Map oldReadingReaders.put(oldReader, readingReaderUseCount); } try { cachedReadingReader = cachedReadingReader.reopen(); if (oldReader != cachedReadingReader) { // if the old reader was not used close it if (readingReaderUseCount == 0) { oldReader.close(); } else { // set use count to 0 for the new reader readingReaderUseCount = 0; } } } catch (IOException e) { LOGGER.error("error reopening reading Reader", e); } }
From source file:com.github.mosuka.apache.lucene.example.cmd.SearchCommand.java
License:Apache License
@Override public void execute(Map<String, Object> attrs) { Map<String, Object> responseMap = new LinkedHashMap<String, Object>(); String responseJSON = null;/* w w w. jav a 2 s .co m*/ Directory indexDir = null; IndexReader reader = null; try { String index = (String) attrs.get("index"); String queryStr = (String) attrs.get("query"); indexDir = FSDirectory.open(new File(index).toPath()); QueryParser queryParser = new QueryParser("text", new JapaneseAnalyzer()); Query query = queryParser.parse(queryStr); reader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(query, 10); List<Map<String, Object>> documentList = new LinkedList<Map<String, Object>>(); for (ScoreDoc scoreDoc : topDocs.scoreDocs) { Document document = searcher.doc(scoreDoc.doc); Map<String, Object> documentMap = new LinkedHashMap<String, Object>(); for (IndexableField f : document.getFields()) { documentMap.put(f.name(), f.stringValue()); } documentMap.put("score", scoreDoc.score); documentList.add(documentMap); } responseMap.put("status", 0); responseMap.put("message", "OK"); responseMap.put("totalHits", topDocs.totalHits); responseMap.put("maxScore", topDocs.getMaxScore()); responseMap.put("result", documentList); } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } catch (ParseException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } finally { try { if (reader != null) { reader.close(); } } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } try { if (indexDir != null) { indexDir.close(); } } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } } try { ObjectMapper mapper = new ObjectMapper(); responseJSON = mapper.writeValueAsString(responseMap); } catch (IOException e) { responseJSON = String.format("{\"status\":1, \"message\":\"%s\"}", e.getMessage()); } System.out.println(responseJSON); }
From source file:com.github.parzonka.esa.indexing.IndexInverter.java
License:Apache License
public void createInvertedIndex() throws CorruptIndexException, IOException, SimilarityException { deleteQuietly(invertedIndexDir);//from w w w. j av a 2s . co m invertedIndexDir.mkdirs(); final IndexReader reader = IndexReader.open(FSDirectory.open(luceneIndexDir)); final int maxDocumentDistributionCount = (int) Math.ceil(maxCorpusDistribution * reader.numDocs()); final TermEnum termEnum = reader.terms(); final Set<String> terms = new HashSet<String>(); int totalTerms = 0; while (termEnum.next()) { final String term = termEnum.term().text(); final int termDocFreq = termEnum.docFreq(); if (minDocumentFrequency <= termDocFreq && termDocFreq < maxDocumentDistributionCount) { terms.add(term); } totalTerms++; } reader.close(); System.out.println("Using " + terms.size() + " terms out of " + totalTerms); System.out.println("Input Lucene index: " + luceneIndexDir); final LuceneVectorReader luceneVectorReader = new LuceneVectorReader(luceneIndexDir); configureLuceneVectorReader(luceneVectorReader); System.out.println("Output inverted index: " + invertedIndexDir); final VectorIndexWriter vectorIndexWriter = new VectorIndexWriter(invertedIndexDir, luceneVectorReader.getConceptCount()); final ProgressMeter progressMeter = new ProgressMeter(terms.size()); for (String term : terms) { final Vector vector = luceneVectorReader.getVector(term); vectorIndexWriter.put(term, vector); progressMeter.next(); System.out.println("[" + term + "] " + progressMeter); } vectorIndexWriter.close(); }
From source file:com.github.rnewson.couchdb.lucene.Search.java
License:Apache License
public static void main(final String[] args) { Utils.LOG.info("searcher started."); try {/*from w ww . j a v a 2s.c om*/ IndexReader reader = null; IndexSearcher searcher = null; final Scanner scanner = new Scanner(System.in); while (scanner.hasNextLine()) { if (reader == null) { // Open a reader and searcher if index exists. if (IndexReader.indexExists(Config.INDEX_DIR)) { reader = IndexReader.open(NIOFSDirectory.getDirectory(Config.INDEX_DIR), true); searcher = new IndexSearcher(reader); } } final String line = scanner.nextLine(); // Process search request if index exists. if (searcher == null) { System.out.println(Utils.error(503, "couchdb-lucene not available.")); continue; } final JSONObject obj; try { obj = JSONObject.fromObject(line); } catch (final JSONException e) { System.out.println(Utils.error(400, "invalid JSON.")); continue; } if (!obj.has("query")) { System.out.println(Utils.error(400, "No query found in request.")); continue; } final JSONObject query = obj.getJSONObject("query"); final boolean reopen = !"ok".equals(query.optString("stale", "not-ok")); // Refresh reader and searcher if necessary. if (reader != null && reopen) { final IndexReader newReader = reader.reopen(); if (reader != newReader) { Utils.LOG.info("Lucene index was updated, reopening searcher."); final IndexReader oldReader = reader; reader = newReader; searcher = new IndexSearcher(reader); oldReader.close(); } } try { // A query. if (query.has("q")) { final JSONArray path = obj.getJSONArray("path"); if (path.size() < 3) { System.out.println(Utils.error(400, "No design document in path.")); continue; } if (path.size() < 4) { System.out.println(Utils.error(400, "No view name in path.")); } if (path.size() > 4) { System.out.println(Utils.error(400, "Extra path info in request.")); } assert path.size() == 4; final SearchRequest request = new SearchRequest(obj); final String result = request.execute(searcher); System.out.println(result); continue; } // info. if (query.keySet().isEmpty()) { final JSONObject json = new JSONObject(); json.put("current", reader.isCurrent()); json.put("disk_size", size(reader.directory())); json.put("doc_count", reader.numDocs()); json.put("doc_del_count", reader.numDeletedDocs()); final JSONArray fields = new JSONArray(); for (final Object field : reader.getFieldNames(FieldOption.INDEXED)) { if (((String) field).startsWith("_")) continue; fields.add(field); } json.put("fields", fields); json.put("last_modified", IndexReader.lastModified(Config.INDEX_DIR)); json.put("optimized", reader.isOptimized()); final JSONObject info = new JSONObject(); info.put("code", 200); info.put("json", json); final JSONObject headers = new JSONObject(); headers.put("Content-Type", "text/plain"); info.put("headers", headers); System.out.println(info); } } catch (final Exception e) { System.out.println(Utils.error(400, e)); } System.out.println(Utils.error(400, "Bad request.")); } if (reader != null) { reader.close(); } } catch (final Exception e) { System.out.println(Utils.error(500, e.getMessage())); } Utils.LOG.info("searcher stopped."); }
From source file:com.github.tenorviol.gitsearch.SearchFiles.java
License:Apache License
/** Simple command-line based search demo. */ public static void search(CommandLine cl) throws Exception { String index = "index"; String field = "contents"; String queries = null;//from w ww .j ava 2s .c o m int repeat = 0; boolean raw = false; String queryString = ""; int hitsPerPage = 25; Iterator it = cl.commandArgs.iterator(); while (it.hasNext()) { queryString += " " + it.next(); } IndexReader reader = IndexReader.open(FSDirectory.open(new File(cl.indexPath))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); QueryParser parser = new QueryParser(Version.LUCENE_31, field, analyzer); String line = queryString; line = line.trim(); Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, null, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(searcher, query, hitsPerPage); searcher.close(); reader.close(); }
From source file:com.github.tteofili.looseen.Test20NewsgroupsClassification.java
License:Apache License
@Test public void test20Newsgroups() throws Exception { String indexProperty = System.getProperty("index"); if (indexProperty != null) { try {/*ww w .j a v a 2 s . c om*/ index = Boolean.valueOf(indexProperty); } catch (Exception e) { // ignore } } String splitProperty = System.getProperty("split"); if (splitProperty != null) { try { split = Boolean.valueOf(splitProperty); } catch (Exception e) { // ignore } } Path mainIndexPath = Paths.get(INDEX + "/original"); Directory directory = FSDirectory.open(mainIndexPath); Path trainPath = Paths.get(INDEX + "/train"); Path testPath = Paths.get(INDEX + "/test"); Path cvPath = Paths.get(INDEX + "/cv"); FSDirectory cv = null; FSDirectory test = null; FSDirectory train = null; IndexReader testReader = null; if (split) { cv = FSDirectory.open(cvPath); test = FSDirectory.open(testPath); train = FSDirectory.open(trainPath); } if (index) { delete(mainIndexPath); if (split) { delete(trainPath, testPath, cvPath); } } IndexReader reader = null; List<Classifier<BytesRef>> classifiers = new LinkedList<>(); try { Analyzer analyzer = new StandardAnalyzer(); if (index) { System.out.format("Indexing 20 Newsgroups...%n"); long startIndex = System.currentTimeMillis(); IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(analyzer)); buildIndex(new File(PREFIX + "/20n/20_newsgroups"), indexWriter); long endIndex = System.currentTimeMillis(); System.out.format("Indexed %d pages in %ds %n", indexWriter.maxDoc(), (endIndex - startIndex) / 1000); indexWriter.close(); } if (split && !index) { reader = DirectoryReader.open(train); } else { reader = DirectoryReader.open(directory); } if (index && split) { // split the index System.out.format("Splitting the index...%n"); long startSplit = System.currentTimeMillis(); DatasetSplitter datasetSplitter = new DatasetSplitter(0.1, 0); datasetSplitter.split(reader, train, test, cv, analyzer, false, CATEGORY_FIELD, BODY_FIELD, SUBJECT_FIELD, CATEGORY_FIELD); reader.close(); reader = DirectoryReader.open(train); // using the train index from now on long endSplit = System.currentTimeMillis(); System.out.format("Splitting done in %ds %n", (endSplit - startSplit) / 1000); } final long startTime = System.currentTimeMillis(); classifiers.add(new KNearestNeighborClassifier(reader, new ClassicSimilarity(), analyzer, null, 1, 0, 0, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, null, analyzer, null, 1, 0, 0, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new ClassicSimilarity(), analyzer, null, 3, 0, 0, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new AxiomaticF1EXP(), analyzer, null, 3, 0, 0, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new AxiomaticF1LOG(), analyzer, null, 3, 0, 0, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new LMDirichletSimilarity(), analyzer, null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new LMJelinekMercerSimilarity(0.3f), analyzer, null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, null, analyzer, null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new DFRSimilarity(new BasicModelG(), new AfterEffectB(), new NormalizationH1()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new DFRSimilarity(new BasicModelP(), new AfterEffectL(), new NormalizationH3()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new IBSimilarity(new DistributionSPL(), new LambdaDF(), new Normalization.NoNormalization()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new IBSimilarity(new DistributionLL(), new LambdaTTF(), new NormalizationH1()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new MinHashClassifier(reader, BODY_FIELD, CATEGORY_FIELD, 15, 1, 100)); classifiers.add(new MinHashClassifier(reader, BODY_FIELD, CATEGORY_FIELD, 30, 3, 300)); classifiers.add(new MinHashClassifier(reader, BODY_FIELD, CATEGORY_FIELD, 10, 1, 100)); classifiers.add(new KNearestFuzzyClassifier(reader, new LMJelinekMercerSimilarity(0.3f), analyzer, null, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new IBSimilarity(new DistributionLL(), new LambdaTTF(), new NormalizationH1()), analyzer, null, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new ClassicSimilarity(), analyzer, null, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new ClassicSimilarity(), analyzer, null, 3, CATEGORY_FIELD, BODY_FIELD)); classifiers .add(new KNearestFuzzyClassifier(reader, null, analyzer, null, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers .add(new KNearestFuzzyClassifier(reader, null, analyzer, null, 3, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new AxiomaticF1EXP(), analyzer, null, 3, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new AxiomaticF1LOG(), analyzer, null, 3, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new BM25NBClassifier(reader, analyzer, null, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new CachingNaiveBayesClassifier(reader, analyzer, null, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new SimpleNaiveBayesClassifier(reader, analyzer, null, CATEGORY_FIELD, BODY_FIELD)); int maxdoc; if (split) { testReader = DirectoryReader.open(test); maxdoc = testReader.maxDoc(); } else { maxdoc = reader.maxDoc(); } System.out.format("Starting evaluation on %d docs...%n", maxdoc); ExecutorService service = Executors.newCachedThreadPool(); List<Future<String>> futures = new LinkedList<>(); for (Classifier<BytesRef> classifier : classifiers) { testClassifier(reader, startTime, testReader, service, futures, classifier); } for (Future<String> f : futures) { System.out.println(f.get()); } Thread.sleep(10000); service.shutdown(); } finally { if (reader != null) { reader.close(); } directory.close(); if (test != null) { test.close(); } if (train != null) { train.close(); } if (cv != null) { cv.close(); } if (testReader != null) { testReader.close(); } for (Classifier c : classifiers) { if (c instanceof Closeable) { ((Closeable) c).close(); } } } }
From source file:com.github.tteofili.looseen.TestWikipediaClassification.java
License:Apache License
@Test public void testItalianWikipedia() throws Exception { String indexProperty = System.getProperty("index"); if (indexProperty != null) { try {/*from www. j a va 2 s . c om*/ index = Boolean.valueOf(indexProperty); } catch (Exception e) { // ignore } } String splitProperty = System.getProperty("split"); if (splitProperty != null) { try { split = Boolean.valueOf(splitProperty); } catch (Exception e) { // ignore } } Path mainIndexPath = Paths.get(INDEX + "/original"); Directory directory = FSDirectory.open(mainIndexPath); Path trainPath = Paths.get(INDEX + "/train"); Path testPath = Paths.get(INDEX + "/test"); Path cvPath = Paths.get(INDEX + "/cv"); FSDirectory cv = null; FSDirectory test = null; FSDirectory train = null; DirectoryReader testReader = null; if (split) { cv = FSDirectory.open(cvPath); test = FSDirectory.open(testPath); train = FSDirectory.open(trainPath); } if (index) { delete(mainIndexPath); if (split) { delete(trainPath, testPath, cvPath); } } IndexReader reader = null; try { Collection<String> stopWordsList = Arrays.asList("di", "a", "da", "in", "per", "tra", "fra", "il", "lo", "la", "i", "gli", "le"); CharArraySet stopWords = new CharArraySet(stopWordsList, true); Analyzer analyzer = new ItalianAnalyzer(stopWords); if (index) { System.out.format("Indexing Italian Wikipedia...%n"); long startIndex = System.currentTimeMillis(); IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(analyzer)); importWikipedia(new File(PREFIX + "/itwiki/itwiki-20150405-pages-meta-current1.xml"), indexWriter); importWikipedia(new File(PREFIX + "/itwiki/itwiki-20150405-pages-meta-current2.xml"), indexWriter); importWikipedia(new File(PREFIX + "/itwiki/itwiki-20150405-pages-meta-current3.xml"), indexWriter); importWikipedia(new File(PREFIX + "/itwiki/itwiki-20150405-pages-meta-current4.xml"), indexWriter); long endIndex = System.currentTimeMillis(); System.out.format("Indexed %d pages in %ds %n", indexWriter.maxDoc(), (endIndex - startIndex) / 1000); indexWriter.close(); } if (split && !index) { reader = DirectoryReader.open(train); } else { reader = DirectoryReader.open(directory); } if (index && split) { // split the index System.out.format("Splitting the index...%n"); long startSplit = System.currentTimeMillis(); DatasetSplitter datasetSplitter = new DatasetSplitter(0.1, 0); for (LeafReaderContext context : reader.leaves()) { datasetSplitter.split(context.reader(), train, test, cv, analyzer, false, CATEGORY_FIELD, TEXT_FIELD, CATEGORY_FIELD); } reader.close(); reader = DirectoryReader.open(train); // using the train index from now on long endSplit = System.currentTimeMillis(); System.out.format("Splitting done in %ds %n", (endSplit - startSplit) / 1000); } final long startTime = System.currentTimeMillis(); List<Classifier<BytesRef>> classifiers = new LinkedList<>(); classifiers.add(new KNearestNeighborClassifier(reader, new ClassicSimilarity(), analyzer, null, 1, 0, 0, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new BM25Similarity(), analyzer, null, 1, 0, 0, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, null, analyzer, null, 1, 0, 0, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new LMDirichletSimilarity(), analyzer, null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new LMJelinekMercerSimilarity(0.3f), analyzer, null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new ClassicSimilarity(), analyzer, null, 3, 0, 0, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new ClassicSimilarity(), analyzer, null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new DFRSimilarity(new BasicModelG(), new AfterEffectB(), new NormalizationH1()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new DFRSimilarity(new BasicModelP(), new AfterEffectL(), new NormalizationH3()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new IBSimilarity(new DistributionSPL(), new LambdaDF(), new Normalization.NoNormalization()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new IBSimilarity(new DistributionLL(), new LambdaTTF(), new NormalizationH1()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 5, 1, 100)); classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 10, 1, 100)); classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 15, 1, 100)); classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 15, 3, 100)); classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 15, 3, 300)); classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 5, 3, 100)); classifiers.add(new KNearestFuzzyClassifier(reader, new ClassicSimilarity(), analyzer, null, 3, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new ClassicSimilarity(), analyzer, null, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new BM25Similarity(), analyzer, null, 3, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new BM25Similarity(), analyzer, null, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new BM25NBClassifier(reader, analyzer, null, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new CachingNaiveBayesClassifier(reader, analyzer, null, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new SimpleNaiveBayesClassifier(reader, analyzer, null, CATEGORY_FIELD, TEXT_FIELD)); int maxdoc; if (split) { testReader = DirectoryReader.open(test); maxdoc = testReader.maxDoc(); } else { maxdoc = reader.maxDoc(); } System.out.format("Starting evaluation on %d docs...%n", maxdoc); ExecutorService service = Executors.newCachedThreadPool(); List<Future<String>> futures = new LinkedList<>(); for (Classifier<BytesRef> classifier : classifiers) { final IndexReader finalReader = reader; final DirectoryReader finalTestReader = testReader; futures.add(service.submit(() -> { ConfusionMatrixGenerator.ConfusionMatrix confusionMatrix; if (split) { confusionMatrix = ConfusionMatrixGenerator.getConfusionMatrix(finalTestReader, classifier, CATEGORY_FIELD, TEXT_FIELD, 60000 * 30); } else { confusionMatrix = ConfusionMatrixGenerator.getConfusionMatrix(finalReader, classifier, CATEGORY_FIELD, TEXT_FIELD, 60000 * 30); } final long endTime = System.currentTimeMillis(); final int elapse = (int) (endTime - startTime) / 1000; return " * " + classifier + " \n * accuracy = " + confusionMatrix.getAccuracy() + "\n * precision = " + confusionMatrix.getPrecision() + "\n * recall = " + confusionMatrix.getRecall() + "\n * f1-measure = " + confusionMatrix.getF1Measure() + "\n * avgClassificationTime = " + confusionMatrix.getAvgClassificationTime() + "\n * time = " + elapse + " (sec)\n "; })); } for (Future<String> f : futures) { System.out.println(f.get()); } Thread.sleep(10000); service.shutdown(); } finally { try { if (reader != null) { reader.close(); } if (directory != null) { directory.close(); } if (test != null) { test.close(); } if (train != null) { train.close(); } if (cv != null) { cv.close(); } if (testReader != null) { testReader.close(); } } catch (Throwable e) { e.printStackTrace(); } } }