List of usage examples for org.apache.lucene.index IndexWriter close
@Override public void close() throws IOException
From source file:com.github.lucene.store.jdbc.AbstractJdbcDirectoryITest.java
License:Apache License
protected void addDocuments(final Directory directory, final OpenMode openMode, final boolean useCompoundFile, final Collection<String> docs) throws IOException { final IndexWriterConfig config = new IndexWriterConfig(analyzer); config.setOpenMode(OpenMode.CREATE); config.setUseCompoundFile(useCompoundFile); final DirectoryTemplate template = new DirectoryTemplate(directory); template.execute(new DirectoryTemplate.DirectoryCallbackWithoutResult() { @Override// w w w.ja v a2s. c om public void doInDirectoryWithoutResult(final Directory dir) throws IOException { final IndexWriter writer = new IndexWriter(dir, config); for (final Object element : docs) { final Document doc = new Document(); final String word = (String) element; // FIXME: review // doc.add(new Field("keyword", word, Field.Store.YES, // Field.Index.UN_TOKENIZED)); // doc.add(new Field("unindexed", word, Field.Store.YES, // Field.Index.NO)); // doc.add(new Field("unstored", word, Field.Store.NO, // Field.Index.TOKENIZED)); // doc.add(new Field("text", word, Field.Store.YES, // Field.Index.TOKENIZED)); doc.add(new StringField("keyword", word, Field.Store.YES)); doc.add(new StringField("unindexed", word, Field.Store.YES)); doc.add(new StringField("unstored", word, Field.Store.NO)); doc.add(new StringField("text", word, Field.Store.YES)); writer.addDocument(doc); } // FIXME: review // writer.optimize(); writer.close(); } }); }
From source file:com.github.mosuka.apache.lucene.example.cmd.AddCommand.java
License:Apache License
@Override public void execute(Map<String, Object> attrs) { Map<String, Object> responseMap = new LinkedHashMap<String, Object>(); String responseJSON = null;//from www. j a v a2 s. c om Directory indexDir = null; IndexWriter writer = null; try { String index = (String) attrs.get("index"); String uniqueId = (String) attrs.get("unique_id"); String text = (String) attrs.get("text"); indexDir = FSDirectory.open(new File(index).toPath()); Document document = LuceneExampleUtil.createDocument(uniqueId, text); IndexWriterConfig config = new IndexWriterConfig(LuceneExampleUtil.createAnalyzerWrapper()); config.setOpenMode(OpenMode.CREATE_OR_APPEND); writer = new IndexWriter(indexDir, config); writer.addDocument(document); writer.commit(); responseMap.put("status", 0); responseMap.put("message", "OK"); } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } finally { try { if (writer != null) { writer.close(); } } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } try { if (indexDir != null) { indexDir.close(); } } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } } try { ObjectMapper mapper = new ObjectMapper(); responseJSON = mapper.writeValueAsString(responseMap); } catch (IOException e) { responseJSON = String.format("{\"status\":1, \"message\":\"%s\"}", e.getMessage()); } System.out.println(responseJSON); }
From source file:com.github.mosuka.apache.lucene.example.cmd.DeleteCommand.java
License:Apache License
@Override public void execute(Map<String, Object> attrs) { Map<String, Object> responseMap = new LinkedHashMap<String, Object>(); String responseJSON = null;// w ww. j a v a 2s . c o m Directory indexDir = null; IndexWriter writer = null; try { String index = (String) attrs.get("index"); String uniqueId = (String) attrs.get("unique_id"); indexDir = FSDirectory.open(new File(index).toPath()); IndexWriterConfig config = new IndexWriterConfig(LuceneExampleUtil.createAnalyzerWrapper()); config.setOpenMode(OpenMode.CREATE_OR_APPEND); writer = new IndexWriter(indexDir, config); writer.deleteDocuments(new Term("id", uniqueId)); writer.commit(); responseMap.put("status", 0); responseMap.put("message", "OK"); } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } finally { try { if (writer != null) { writer.close(); } } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } try { if (indexDir != null) { indexDir.close(); } } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } } try { ObjectMapper mapper = new ObjectMapper(); responseJSON = mapper.writeValueAsString(responseMap); } catch (IOException e) { responseJSON = String.format("{\"status\":1, \"message\":\"%s\"}", e.getMessage()); } System.out.println(responseJSON); }
From source file:com.github.mosuka.apache.lucene.example.cmd.UpdateCommand.java
License:Apache License
@Override public void execute(Map<String, Object> attrs) { Map<String, Object> responseMap = new LinkedHashMap<String, Object>(); String responseJSON = null;/* w ww .j a v a 2 s.com*/ Directory indexDir = null; IndexWriter writer = null; try { String index = (String) attrs.get("index"); String uniqueId = (String) attrs.get("unique_id"); String text = (String) attrs.get("text"); indexDir = FSDirectory.open(new File(index).toPath()); Document document = LuceneExampleUtil.createDocument(uniqueId, text); IndexWriterConfig config = new IndexWriterConfig(LuceneExampleUtil.createAnalyzerWrapper()); config.setOpenMode(OpenMode.CREATE_OR_APPEND); writer = new IndexWriter(indexDir, config); writer.updateDocument(new Term("id", document.get("id")), document); writer.commit(); responseMap.put("status", 0); responseMap.put("message", "OK"); } catch (IOException e) { responseMap.put("status", -1); responseMap.put("message", e.getMessage()); } finally { try { if (writer != null) { writer.close(); } } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } try { if (indexDir != null) { indexDir.close(); } } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } } try { ObjectMapper mapper = new ObjectMapper(); responseJSON = mapper.writeValueAsString(responseMap); } catch (IOException e) { responseJSON = String.format("{\"status\":1, \"message\":\"%s\"}", e.getMessage()); } System.out.println(responseJSON); }
From source file:com.github.msarhan.lucene.ArabicRootExtractorAnalyzerTests.java
License:Open Source License
@Test public void testArabicRootIndex() throws IOException, ParseException, URISyntaxException { Directory index = new RAMDirectory(); ArabicRootExtractorAnalyzer analyzer = new ArabicRootExtractorAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); final AtomicInteger id = new AtomicInteger(0); IndexWriter w = new IndexWriter(index, config); URL url = ArabicRootExtractorStemmer.class.getClassLoader() .getResource("com/github/msarhan/lucene/fateha.txt"); if (url == null) { fail("Not able to load data file!"); }//from ww w . j a v a 2 s .co m Files.lines(new File(url.toURI()).toPath()) .forEach(line -> addDoc(w, line, String.valueOf(id.incrementAndGet()))); w.close(); String querystr = ""; Query q = new QueryParser("title", analyzer).parse(querystr); int hitsPerPage = 10; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopDocs docs = searcher.search(q, hitsPerPage); //print(searcher, docs); assertEquals(2, docs.scoreDocs.length); }
From source file:com.github.msarhan.lucene.ArabicRootExtractorAnalyzerTests.java
License:Open Source License
@Test public void testInlineStemmer() throws IOException, ParseException { //Initialize the index Directory index = new RAMDirectory(); Analyzer analyzer = new ArabicRootExtractorAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(index, config); Document doc = new Document(); doc.add(new StringField("number", "1", Field.Store.YES)); doc.add(new TextField("title", "?? ? ? ??", Field.Store.YES));/*w w w . ja v a2s . c om*/ writer.addDocument(doc); doc = new Document(); doc.add(new StringField("number", "2", Field.Store.YES)); doc.add(new TextField("title", "? ?? ? ?", Field.Store.YES)); writer.addDocument(doc); doc = new Document(); doc.add(new StringField("number", "3", Field.Store.YES)); doc.add(new TextField("title", "? ??", Field.Store.YES)); writer.addDocument(doc); writer.close(); //~ //Query the index String queryStr = ""; Query query = new QueryParser("title", analyzer).parse(queryStr); int hitsPerPage = 5; IndexReader reader = DirectoryReader.open(index); IndexSearcher searcher = new IndexSearcher(reader); TopDocs docs = searcher.search(query, hitsPerPage, Sort.INDEXORDER); ScoreDoc[] hits = docs.scoreDocs; //~ //Print results /* System.out.println("Found " + hits.length + " hits:"); for (ScoreDoc hit : hits) { int docId = hit.doc; Document d = searcher.doc(docId); System.out.printf("\t(%s): %s\n", d.get("number"), d.get("title")); } */ //~ }
From source file:com.github.tenorviol.gitsearch.IndexFiles.java
License:Apache License
/** Index all text files under a directory. */ public static void index(CommandLine cl) { boolean create = true; // TODO: multi files final File docDir = new File(cl.commandArgs.get(0)); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1);/*w w w . j av a 2 s . co m*/ } Date start = new Date(); try { System.out.println("Indexing to directory '" + cl.indexPath + "'..."); Directory dir = FSDirectory.open(new File(cl.indexPath)); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:com.github.tteofili.looseen.MinHashClassifier.java
License:Apache License
public MinHashClassifier(IndexReader reader, String textField, String categoryField, int min, int hashCount, int hashSize) { this.min = min; this.hashCount = hashCount; this.hashSize = hashSize; try {//from w w w. ja v a 2s .c o m Analyzer analyzer = createMinHashAnalyzer(min, hashCount, hashSize); IndexWriterConfig config = new IndexWriterConfig(analyzer); directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, config); for (int i = 0; i < reader.maxDoc(); i++) { Document document = new Document(); Document d = reader.document(i); String textValue = d.getField(textField).stringValue(); String categoryValue = d.getField(categoryField).stringValue(); document.add(new TextField(TEXT_FIELD, textValue, Field.Store.NO)); document.add(new StringField(CLASS_FIELD, categoryValue, Field.Store.YES)); writer.addDocument(document); } writer.commit(); writer.close(); } catch (IOException e) { throw new RuntimeException(e); } BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); }
From source file:com.github.tteofili.looseen.Test20NewsgroupsClassification.java
License:Apache License
@Test public void test20Newsgroups() throws Exception { String indexProperty = System.getProperty("index"); if (indexProperty != null) { try {// w w w. j a va 2 s .c o m index = Boolean.valueOf(indexProperty); } catch (Exception e) { // ignore } } String splitProperty = System.getProperty("split"); if (splitProperty != null) { try { split = Boolean.valueOf(splitProperty); } catch (Exception e) { // ignore } } Path mainIndexPath = Paths.get(INDEX + "/original"); Directory directory = FSDirectory.open(mainIndexPath); Path trainPath = Paths.get(INDEX + "/train"); Path testPath = Paths.get(INDEX + "/test"); Path cvPath = Paths.get(INDEX + "/cv"); FSDirectory cv = null; FSDirectory test = null; FSDirectory train = null; IndexReader testReader = null; if (split) { cv = FSDirectory.open(cvPath); test = FSDirectory.open(testPath); train = FSDirectory.open(trainPath); } if (index) { delete(mainIndexPath); if (split) { delete(trainPath, testPath, cvPath); } } IndexReader reader = null; List<Classifier<BytesRef>> classifiers = new LinkedList<>(); try { Analyzer analyzer = new StandardAnalyzer(); if (index) { System.out.format("Indexing 20 Newsgroups...%n"); long startIndex = System.currentTimeMillis(); IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(analyzer)); buildIndex(new File(PREFIX + "/20n/20_newsgroups"), indexWriter); long endIndex = System.currentTimeMillis(); System.out.format("Indexed %d pages in %ds %n", indexWriter.maxDoc(), (endIndex - startIndex) / 1000); indexWriter.close(); } if (split && !index) { reader = DirectoryReader.open(train); } else { reader = DirectoryReader.open(directory); } if (index && split) { // split the index System.out.format("Splitting the index...%n"); long startSplit = System.currentTimeMillis(); DatasetSplitter datasetSplitter = new DatasetSplitter(0.1, 0); datasetSplitter.split(reader, train, test, cv, analyzer, false, CATEGORY_FIELD, BODY_FIELD, SUBJECT_FIELD, CATEGORY_FIELD); reader.close(); reader = DirectoryReader.open(train); // using the train index from now on long endSplit = System.currentTimeMillis(); System.out.format("Splitting done in %ds %n", (endSplit - startSplit) / 1000); } final long startTime = System.currentTimeMillis(); classifiers.add(new KNearestNeighborClassifier(reader, new ClassicSimilarity(), analyzer, null, 1, 0, 0, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, null, analyzer, null, 1, 0, 0, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new ClassicSimilarity(), analyzer, null, 3, 0, 0, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new AxiomaticF1EXP(), analyzer, null, 3, 0, 0, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new AxiomaticF1LOG(), analyzer, null, 3, 0, 0, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new LMDirichletSimilarity(), analyzer, null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new LMJelinekMercerSimilarity(0.3f), analyzer, null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, null, analyzer, null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new DFRSimilarity(new BasicModelG(), new AfterEffectB(), new NormalizationH1()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new DFRSimilarity(new BasicModelP(), new AfterEffectL(), new NormalizationH3()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new IBSimilarity(new DistributionSPL(), new LambdaDF(), new Normalization.NoNormalization()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new IBSimilarity(new DistributionLL(), new LambdaTTF(), new NormalizationH1()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new MinHashClassifier(reader, BODY_FIELD, CATEGORY_FIELD, 15, 1, 100)); classifiers.add(new MinHashClassifier(reader, BODY_FIELD, CATEGORY_FIELD, 30, 3, 300)); classifiers.add(new MinHashClassifier(reader, BODY_FIELD, CATEGORY_FIELD, 10, 1, 100)); classifiers.add(new KNearestFuzzyClassifier(reader, new LMJelinekMercerSimilarity(0.3f), analyzer, null, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new IBSimilarity(new DistributionLL(), new LambdaTTF(), new NormalizationH1()), analyzer, null, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new ClassicSimilarity(), analyzer, null, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new ClassicSimilarity(), analyzer, null, 3, CATEGORY_FIELD, BODY_FIELD)); classifiers .add(new KNearestFuzzyClassifier(reader, null, analyzer, null, 1, CATEGORY_FIELD, BODY_FIELD)); classifiers .add(new KNearestFuzzyClassifier(reader, null, analyzer, null, 3, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new AxiomaticF1EXP(), analyzer, null, 3, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new AxiomaticF1LOG(), analyzer, null, 3, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new BM25NBClassifier(reader, analyzer, null, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new CachingNaiveBayesClassifier(reader, analyzer, null, CATEGORY_FIELD, BODY_FIELD)); classifiers.add(new SimpleNaiveBayesClassifier(reader, analyzer, null, CATEGORY_FIELD, BODY_FIELD)); int maxdoc; if (split) { testReader = DirectoryReader.open(test); maxdoc = testReader.maxDoc(); } else { maxdoc = reader.maxDoc(); } System.out.format("Starting evaluation on %d docs...%n", maxdoc); ExecutorService service = Executors.newCachedThreadPool(); List<Future<String>> futures = new LinkedList<>(); for (Classifier<BytesRef> classifier : classifiers) { testClassifier(reader, startTime, testReader, service, futures, classifier); } for (Future<String> f : futures) { System.out.println(f.get()); } Thread.sleep(10000); service.shutdown(); } finally { if (reader != null) { reader.close(); } directory.close(); if (test != null) { test.close(); } if (train != null) { train.close(); } if (cv != null) { cv.close(); } if (testReader != null) { testReader.close(); } for (Classifier c : classifiers) { if (c instanceof Closeable) { ((Closeable) c).close(); } } } }
From source file:com.github.tteofili.looseen.TestWikipediaClassification.java
License:Apache License
@Test public void testItalianWikipedia() throws Exception { String indexProperty = System.getProperty("index"); if (indexProperty != null) { try {/*w w w.ja v a2s. c o m*/ index = Boolean.valueOf(indexProperty); } catch (Exception e) { // ignore } } String splitProperty = System.getProperty("split"); if (splitProperty != null) { try { split = Boolean.valueOf(splitProperty); } catch (Exception e) { // ignore } } Path mainIndexPath = Paths.get(INDEX + "/original"); Directory directory = FSDirectory.open(mainIndexPath); Path trainPath = Paths.get(INDEX + "/train"); Path testPath = Paths.get(INDEX + "/test"); Path cvPath = Paths.get(INDEX + "/cv"); FSDirectory cv = null; FSDirectory test = null; FSDirectory train = null; DirectoryReader testReader = null; if (split) { cv = FSDirectory.open(cvPath); test = FSDirectory.open(testPath); train = FSDirectory.open(trainPath); } if (index) { delete(mainIndexPath); if (split) { delete(trainPath, testPath, cvPath); } } IndexReader reader = null; try { Collection<String> stopWordsList = Arrays.asList("di", "a", "da", "in", "per", "tra", "fra", "il", "lo", "la", "i", "gli", "le"); CharArraySet stopWords = new CharArraySet(stopWordsList, true); Analyzer analyzer = new ItalianAnalyzer(stopWords); if (index) { System.out.format("Indexing Italian Wikipedia...%n"); long startIndex = System.currentTimeMillis(); IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(analyzer)); importWikipedia(new File(PREFIX + "/itwiki/itwiki-20150405-pages-meta-current1.xml"), indexWriter); importWikipedia(new File(PREFIX + "/itwiki/itwiki-20150405-pages-meta-current2.xml"), indexWriter); importWikipedia(new File(PREFIX + "/itwiki/itwiki-20150405-pages-meta-current3.xml"), indexWriter); importWikipedia(new File(PREFIX + "/itwiki/itwiki-20150405-pages-meta-current4.xml"), indexWriter); long endIndex = System.currentTimeMillis(); System.out.format("Indexed %d pages in %ds %n", indexWriter.maxDoc(), (endIndex - startIndex) / 1000); indexWriter.close(); } if (split && !index) { reader = DirectoryReader.open(train); } else { reader = DirectoryReader.open(directory); } if (index && split) { // split the index System.out.format("Splitting the index...%n"); long startSplit = System.currentTimeMillis(); DatasetSplitter datasetSplitter = new DatasetSplitter(0.1, 0); for (LeafReaderContext context : reader.leaves()) { datasetSplitter.split(context.reader(), train, test, cv, analyzer, false, CATEGORY_FIELD, TEXT_FIELD, CATEGORY_FIELD); } reader.close(); reader = DirectoryReader.open(train); // using the train index from now on long endSplit = System.currentTimeMillis(); System.out.format("Splitting done in %ds %n", (endSplit - startSplit) / 1000); } final long startTime = System.currentTimeMillis(); List<Classifier<BytesRef>> classifiers = new LinkedList<>(); classifiers.add(new KNearestNeighborClassifier(reader, new ClassicSimilarity(), analyzer, null, 1, 0, 0, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new BM25Similarity(), analyzer, null, 1, 0, 0, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, null, analyzer, null, 1, 0, 0, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new LMDirichletSimilarity(), analyzer, null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new LMJelinekMercerSimilarity(0.3f), analyzer, null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new ClassicSimilarity(), analyzer, null, 3, 0, 0, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new ClassicSimilarity(), analyzer, null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new DFRSimilarity(new BasicModelG(), new AfterEffectB(), new NormalizationH1()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new DFRSimilarity(new BasicModelP(), new AfterEffectL(), new NormalizationH3()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new IBSimilarity(new DistributionSPL(), new LambdaDF(), new Normalization.NoNormalization()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestNeighborClassifier(reader, new IBSimilarity(new DistributionLL(), new LambdaTTF(), new NormalizationH1()), analyzer, null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 5, 1, 100)); classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 10, 1, 100)); classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 15, 1, 100)); classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 15, 3, 100)); classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 15, 3, 300)); classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 5, 3, 100)); classifiers.add(new KNearestFuzzyClassifier(reader, new ClassicSimilarity(), analyzer, null, 3, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new ClassicSimilarity(), analyzer, null, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new BM25Similarity(), analyzer, null, 3, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new KNearestFuzzyClassifier(reader, new BM25Similarity(), analyzer, null, 1, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new BM25NBClassifier(reader, analyzer, null, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new CachingNaiveBayesClassifier(reader, analyzer, null, CATEGORY_FIELD, TEXT_FIELD)); classifiers.add(new SimpleNaiveBayesClassifier(reader, analyzer, null, CATEGORY_FIELD, TEXT_FIELD)); int maxdoc; if (split) { testReader = DirectoryReader.open(test); maxdoc = testReader.maxDoc(); } else { maxdoc = reader.maxDoc(); } System.out.format("Starting evaluation on %d docs...%n", maxdoc); ExecutorService service = Executors.newCachedThreadPool(); List<Future<String>> futures = new LinkedList<>(); for (Classifier<BytesRef> classifier : classifiers) { final IndexReader finalReader = reader; final DirectoryReader finalTestReader = testReader; futures.add(service.submit(() -> { ConfusionMatrixGenerator.ConfusionMatrix confusionMatrix; if (split) { confusionMatrix = ConfusionMatrixGenerator.getConfusionMatrix(finalTestReader, classifier, CATEGORY_FIELD, TEXT_FIELD, 60000 * 30); } else { confusionMatrix = ConfusionMatrixGenerator.getConfusionMatrix(finalReader, classifier, CATEGORY_FIELD, TEXT_FIELD, 60000 * 30); } final long endTime = System.currentTimeMillis(); final int elapse = (int) (endTime - startTime) / 1000; return " * " + classifier + " \n * accuracy = " + confusionMatrix.getAccuracy() + "\n * precision = " + confusionMatrix.getPrecision() + "\n * recall = " + confusionMatrix.getRecall() + "\n * f1-measure = " + confusionMatrix.getF1Measure() + "\n * avgClassificationTime = " + confusionMatrix.getAvgClassificationTime() + "\n * time = " + elapse + " (sec)\n "; })); } for (Future<String> f : futures) { System.out.println(f.get()); } Thread.sleep(10000); service.shutdown(); } finally { try { if (reader != null) { reader.close(); } if (directory != null) { directory.close(); } if (test != null) { test.close(); } if (train != null) { train.close(); } if (cv != null) { cv.close(); } if (testReader != null) { testReader.close(); } } catch (Throwable e) { e.printStackTrace(); } } }