Example usage for org.apache.lucene.index IndexWriter close

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter close.

Prototype

@Override
public void close() throws IOException

Source Link

Document

Closes all open resources and releases the write lock.

Usage

From source file:com.github.lucene.store.jdbc.AbstractJdbcDirectoryITest.java

License:Apache License

protected void addDocuments(final Directory directory, final OpenMode openMode, final boolean useCompoundFile,
        final Collection<String> docs) throws IOException {
    final IndexWriterConfig config = new IndexWriterConfig(analyzer);
    config.setOpenMode(OpenMode.CREATE);
    config.setUseCompoundFile(useCompoundFile);

    final DirectoryTemplate template = new DirectoryTemplate(directory);
    template.execute(new DirectoryTemplate.DirectoryCallbackWithoutResult() {
        @Override// w w w.ja  v  a2s. c om
        public void doInDirectoryWithoutResult(final Directory dir) throws IOException {
            final IndexWriter writer = new IndexWriter(dir, config);
            for (final Object element : docs) {
                final Document doc = new Document();
                final String word = (String) element;
                // FIXME: review
                // doc.add(new Field("keyword", word, Field.Store.YES,
                // Field.Index.UN_TOKENIZED));
                // doc.add(new Field("unindexed", word, Field.Store.YES,
                // Field.Index.NO));
                // doc.add(new Field("unstored", word, Field.Store.NO,
                // Field.Index.TOKENIZED));
                // doc.add(new Field("text", word, Field.Store.YES,
                // Field.Index.TOKENIZED));
                doc.add(new StringField("keyword", word, Field.Store.YES));
                doc.add(new StringField("unindexed", word, Field.Store.YES));
                doc.add(new StringField("unstored", word, Field.Store.NO));
                doc.add(new StringField("text", word, Field.Store.YES));
                writer.addDocument(doc);
            }

            // FIXME: review
            // writer.optimize();
            writer.close();
        }
    });
}

From source file:com.github.mosuka.apache.lucene.example.cmd.AddCommand.java

License:Apache License

@Override
public void execute(Map<String, Object> attrs) {
    Map<String, Object> responseMap = new LinkedHashMap<String, Object>();

    String responseJSON = null;//from www.  j  a v  a2 s.  c  om
    Directory indexDir = null;
    IndexWriter writer = null;

    try {
        String index = (String) attrs.get("index");
        String uniqueId = (String) attrs.get("unique_id");
        String text = (String) attrs.get("text");

        indexDir = FSDirectory.open(new File(index).toPath());

        Document document = LuceneExampleUtil.createDocument(uniqueId, text);

        IndexWriterConfig config = new IndexWriterConfig(LuceneExampleUtil.createAnalyzerWrapper());
        config.setOpenMode(OpenMode.CREATE_OR_APPEND);

        writer = new IndexWriter(indexDir, config);
        writer.addDocument(document);
        writer.commit();

        responseMap.put("status", 0);
        responseMap.put("message", "OK");
    } catch (IOException e) {
        responseMap.put("status", 1);
        responseMap.put("message", e.getMessage());
    } finally {
        try {
            if (writer != null) {
                writer.close();
            }
        } catch (IOException e) {
            responseMap.put("status", 1);
            responseMap.put("message", e.getMessage());
        }
        try {
            if (indexDir != null) {
                indexDir.close();
            }
        } catch (IOException e) {
            responseMap.put("status", 1);
            responseMap.put("message", e.getMessage());
        }
    }

    try {
        ObjectMapper mapper = new ObjectMapper();
        responseJSON = mapper.writeValueAsString(responseMap);
    } catch (IOException e) {
        responseJSON = String.format("{\"status\":1, \"message\":\"%s\"}", e.getMessage());
    }
    System.out.println(responseJSON);
}

From source file:com.github.mosuka.apache.lucene.example.cmd.DeleteCommand.java

License:Apache License

@Override
public void execute(Map<String, Object> attrs) {
    Map<String, Object> responseMap = new LinkedHashMap<String, Object>();

    String responseJSON = null;// w  ww.  j a  v  a  2s .  c o m
    Directory indexDir = null;
    IndexWriter writer = null;

    try {
        String index = (String) attrs.get("index");
        String uniqueId = (String) attrs.get("unique_id");

        indexDir = FSDirectory.open(new File(index).toPath());

        IndexWriterConfig config = new IndexWriterConfig(LuceneExampleUtil.createAnalyzerWrapper());
        config.setOpenMode(OpenMode.CREATE_OR_APPEND);

        writer = new IndexWriter(indexDir, config);
        writer.deleteDocuments(new Term("id", uniqueId));
        writer.commit();

        responseMap.put("status", 0);
        responseMap.put("message", "OK");
    } catch (IOException e) {
        responseMap.put("status", 1);
        responseMap.put("message", e.getMessage());
    } finally {
        try {
            if (writer != null) {
                writer.close();
            }
        } catch (IOException e) {
            responseMap.put("status", 1);
            responseMap.put("message", e.getMessage());
        }
        try {
            if (indexDir != null) {
                indexDir.close();
            }
        } catch (IOException e) {
            responseMap.put("status", 1);
            responseMap.put("message", e.getMessage());
        }
    }

    try {
        ObjectMapper mapper = new ObjectMapper();
        responseJSON = mapper.writeValueAsString(responseMap);
    } catch (IOException e) {
        responseJSON = String.format("{\"status\":1, \"message\":\"%s\"}", e.getMessage());
    }
    System.out.println(responseJSON);
}

From source file:com.github.mosuka.apache.lucene.example.cmd.UpdateCommand.java

License:Apache License

@Override
public void execute(Map<String, Object> attrs) {
    Map<String, Object> responseMap = new LinkedHashMap<String, Object>();

    String responseJSON = null;/* w ww .j a  v  a 2  s.com*/
    Directory indexDir = null;
    IndexWriter writer = null;

    try {
        String index = (String) attrs.get("index");
        String uniqueId = (String) attrs.get("unique_id");
        String text = (String) attrs.get("text");

        indexDir = FSDirectory.open(new File(index).toPath());

        Document document = LuceneExampleUtil.createDocument(uniqueId, text);

        IndexWriterConfig config = new IndexWriterConfig(LuceneExampleUtil.createAnalyzerWrapper());
        config.setOpenMode(OpenMode.CREATE_OR_APPEND);

        writer = new IndexWriter(indexDir, config);
        writer.updateDocument(new Term("id", document.get("id")), document);
        writer.commit();

        responseMap.put("status", 0);
        responseMap.put("message", "OK");
    } catch (IOException e) {
        responseMap.put("status", -1);
        responseMap.put("message", e.getMessage());
    } finally {
        try {
            if (writer != null) {
                writer.close();
            }
        } catch (IOException e) {
            responseMap.put("status", 1);
            responseMap.put("message", e.getMessage());
        }
        try {
            if (indexDir != null) {
                indexDir.close();
            }
        } catch (IOException e) {
            responseMap.put("status", 1);
            responseMap.put("message", e.getMessage());
        }
    }

    try {
        ObjectMapper mapper = new ObjectMapper();
        responseJSON = mapper.writeValueAsString(responseMap);
    } catch (IOException e) {
        responseJSON = String.format("{\"status\":1, \"message\":\"%s\"}", e.getMessage());
    }
    System.out.println(responseJSON);
}

From source file:com.github.msarhan.lucene.ArabicRootExtractorAnalyzerTests.java

License:Open Source License

@Test
public void testArabicRootIndex() throws IOException, ParseException, URISyntaxException {
    Directory index = new RAMDirectory();
    ArabicRootExtractorAnalyzer analyzer = new ArabicRootExtractorAnalyzer();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);

    final AtomicInteger id = new AtomicInteger(0);
    IndexWriter w = new IndexWriter(index, config);
    URL url = ArabicRootExtractorStemmer.class.getClassLoader()
            .getResource("com/github/msarhan/lucene/fateha.txt");

    if (url == null) {
        fail("Not able to load data file!");
    }//from ww w . j a  v  a  2 s  .co m

    Files.lines(new File(url.toURI()).toPath())
            .forEach(line -> addDoc(w, line, String.valueOf(id.incrementAndGet())));
    w.close();

    String querystr = "";
    Query q = new QueryParser("title", analyzer).parse(querystr);

    int hitsPerPage = 10;
    IndexReader reader = DirectoryReader.open(index);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs docs = searcher.search(q, hitsPerPage);

    //print(searcher, docs);

    assertEquals(2, docs.scoreDocs.length);
}

From source file:com.github.msarhan.lucene.ArabicRootExtractorAnalyzerTests.java

License:Open Source License

@Test
public void testInlineStemmer() throws IOException, ParseException {

    //Initialize the index
    Directory index = new RAMDirectory();
    Analyzer analyzer = new ArabicRootExtractorAnalyzer();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    IndexWriter writer = new IndexWriter(index, config);

    Document doc = new Document();
    doc.add(new StringField("number", "1", Field.Store.YES));
    doc.add(new TextField("title", "?? ? ? ??",
            Field.Store.YES));/*w  w w  . ja  v a2s .  c  om*/
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new StringField("number", "2", Field.Store.YES));
    doc.add(new TextField("title", "? ?? ? ?",
            Field.Store.YES));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new StringField("number", "3", Field.Store.YES));
    doc.add(new TextField("title", "? ??", Field.Store.YES));
    writer.addDocument(doc);
    writer.close();
    //~

    //Query the index
    String queryStr = "";
    Query query = new QueryParser("title", analyzer).parse(queryStr);

    int hitsPerPage = 5;
    IndexReader reader = DirectoryReader.open(index);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs docs = searcher.search(query, hitsPerPage, Sort.INDEXORDER);

    ScoreDoc[] hits = docs.scoreDocs;
    //~

    //Print results
    /*
    System.out.println("Found " + hits.length + " hits:");
    for (ScoreDoc hit : hits) {
       int docId = hit.doc;
       Document d = searcher.doc(docId);
       System.out.printf("\t(%s): %s\n", d.get("number"), d.get("title"));
    }
    */
    //~

}

From source file:com.github.tenorviol.gitsearch.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void index(CommandLine cl) {
    boolean create = true;

    // TODO: multi files
    final File docDir = new File(cl.commandArgs.get(0));
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);/*w w  w  .  j av  a  2 s  .  co m*/
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + cl.indexPath + "'...");

        Directory dir = FSDirectory.open(new File(cl.indexPath));
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_31);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_31, analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:com.github.tteofili.looseen.MinHashClassifier.java

License:Apache License

public MinHashClassifier(IndexReader reader, String textField, String categoryField, int min, int hashCount,
        int hashSize) {
    this.min = min;
    this.hashCount = hashCount;
    this.hashSize = hashSize;
    try {//from  w  w w.  ja  v  a  2s  .c o  m
        Analyzer analyzer = createMinHashAnalyzer(min, hashCount, hashSize);
        IndexWriterConfig config = new IndexWriterConfig(analyzer);
        directory = new RAMDirectory();
        IndexWriter writer = new IndexWriter(directory, config);
        for (int i = 0; i < reader.maxDoc(); i++) {
            Document document = new Document();
            Document d = reader.document(i);
            String textValue = d.getField(textField).stringValue();
            String categoryValue = d.getField(categoryField).stringValue();
            document.add(new TextField(TEXT_FIELD, textValue, Field.Store.NO));
            document.add(new StringField(CLASS_FIELD, categoryValue, Field.Store.YES));
            writer.addDocument(document);
        }
        writer.commit();
        writer.close();
    } catch (IOException e) {
        throw new RuntimeException(e);
    }
    BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE);
}

From source file:com.github.tteofili.looseen.Test20NewsgroupsClassification.java

License:Apache License

@Test
public void test20Newsgroups() throws Exception {

    String indexProperty = System.getProperty("index");
    if (indexProperty != null) {
        try {//  w  w w.  j a  va  2 s .c o  m
            index = Boolean.valueOf(indexProperty);
        } catch (Exception e) {
            // ignore
        }
    }

    String splitProperty = System.getProperty("split");
    if (splitProperty != null) {
        try {
            split = Boolean.valueOf(splitProperty);
        } catch (Exception e) {
            // ignore
        }
    }

    Path mainIndexPath = Paths.get(INDEX + "/original");
    Directory directory = FSDirectory.open(mainIndexPath);
    Path trainPath = Paths.get(INDEX + "/train");
    Path testPath = Paths.get(INDEX + "/test");
    Path cvPath = Paths.get(INDEX + "/cv");
    FSDirectory cv = null;
    FSDirectory test = null;
    FSDirectory train = null;
    IndexReader testReader = null;
    if (split) {
        cv = FSDirectory.open(cvPath);
        test = FSDirectory.open(testPath);
        train = FSDirectory.open(trainPath);
    }

    if (index) {
        delete(mainIndexPath);
        if (split) {
            delete(trainPath, testPath, cvPath);
        }
    }

    IndexReader reader = null;
    List<Classifier<BytesRef>> classifiers = new LinkedList<>();
    try {
        Analyzer analyzer = new StandardAnalyzer();
        if (index) {

            System.out.format("Indexing 20 Newsgroups...%n");

            long startIndex = System.currentTimeMillis();
            IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(analyzer));

            buildIndex(new File(PREFIX + "/20n/20_newsgroups"), indexWriter);

            long endIndex = System.currentTimeMillis();
            System.out.format("Indexed %d pages in %ds %n", indexWriter.maxDoc(),
                    (endIndex - startIndex) / 1000);

            indexWriter.close();

        }

        if (split && !index) {
            reader = DirectoryReader.open(train);
        } else {
            reader = DirectoryReader.open(directory);
        }

        if (index && split) {
            // split the index
            System.out.format("Splitting the index...%n");

            long startSplit = System.currentTimeMillis();
            DatasetSplitter datasetSplitter = new DatasetSplitter(0.1, 0);
            datasetSplitter.split(reader, train, test, cv, analyzer, false, CATEGORY_FIELD, BODY_FIELD,
                    SUBJECT_FIELD, CATEGORY_FIELD);
            reader.close();
            reader = DirectoryReader.open(train); // using the train index from now on
            long endSplit = System.currentTimeMillis();
            System.out.format("Splitting done in %ds %n", (endSplit - startSplit) / 1000);
        }

        final long startTime = System.currentTimeMillis();

        classifiers.add(new KNearestNeighborClassifier(reader, new ClassicSimilarity(), analyzer, null, 1, 0, 0,
                CATEGORY_FIELD, BODY_FIELD));
        classifiers.add(new KNearestNeighborClassifier(reader, null, analyzer, null, 1, 0, 0, CATEGORY_FIELD,
                BODY_FIELD));
        classifiers.add(new KNearestNeighborClassifier(reader, new ClassicSimilarity(), analyzer, null, 3, 0, 0,
                CATEGORY_FIELD, BODY_FIELD));
        classifiers.add(new KNearestNeighborClassifier(reader, new AxiomaticF1EXP(), analyzer, null, 3, 0, 0,
                CATEGORY_FIELD, BODY_FIELD));
        classifiers.add(new KNearestNeighborClassifier(reader, new AxiomaticF1LOG(), analyzer, null, 3, 0, 0,
                CATEGORY_FIELD, BODY_FIELD));
        classifiers.add(new KNearestNeighborClassifier(reader, new LMDirichletSimilarity(), analyzer, null, 3,
                1, 1, CATEGORY_FIELD, BODY_FIELD));
        classifiers.add(new KNearestNeighborClassifier(reader, new LMJelinekMercerSimilarity(0.3f), analyzer,
                null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD));
        classifiers.add(new KNearestNeighborClassifier(reader, null, analyzer, null, 3, 1, 1, CATEGORY_FIELD,
                BODY_FIELD));
        classifiers.add(new KNearestNeighborClassifier(reader,
                new DFRSimilarity(new BasicModelG(), new AfterEffectB(), new NormalizationH1()), analyzer, null,
                3, 1, 1, CATEGORY_FIELD, BODY_FIELD));
        classifiers.add(new KNearestNeighborClassifier(reader,
                new DFRSimilarity(new BasicModelP(), new AfterEffectL(), new NormalizationH3()), analyzer, null,
                3, 1, 1, CATEGORY_FIELD, BODY_FIELD));
        classifiers.add(new KNearestNeighborClassifier(reader,
                new IBSimilarity(new DistributionSPL(), new LambdaDF(), new Normalization.NoNormalization()),
                analyzer, null, 3, 1, 1, CATEGORY_FIELD, BODY_FIELD));
        classifiers.add(new KNearestNeighborClassifier(reader,
                new IBSimilarity(new DistributionLL(), new LambdaTTF(), new NormalizationH1()), analyzer, null,
                3, 1, 1, CATEGORY_FIELD, BODY_FIELD));
        classifiers.add(new MinHashClassifier(reader, BODY_FIELD, CATEGORY_FIELD, 15, 1, 100));
        classifiers.add(new MinHashClassifier(reader, BODY_FIELD, CATEGORY_FIELD, 30, 3, 300));
        classifiers.add(new MinHashClassifier(reader, BODY_FIELD, CATEGORY_FIELD, 10, 1, 100));
        classifiers.add(new KNearestFuzzyClassifier(reader, new LMJelinekMercerSimilarity(0.3f), analyzer, null,
                1, CATEGORY_FIELD, BODY_FIELD));
        classifiers.add(new KNearestFuzzyClassifier(reader,
                new IBSimilarity(new DistributionLL(), new LambdaTTF(), new NormalizationH1()), analyzer, null,
                1, CATEGORY_FIELD, BODY_FIELD));
        classifiers.add(new KNearestFuzzyClassifier(reader, new ClassicSimilarity(), analyzer, null, 1,
                CATEGORY_FIELD, BODY_FIELD));
        classifiers.add(new KNearestFuzzyClassifier(reader, new ClassicSimilarity(), analyzer, null, 3,
                CATEGORY_FIELD, BODY_FIELD));
        classifiers
                .add(new KNearestFuzzyClassifier(reader, null, analyzer, null, 1, CATEGORY_FIELD, BODY_FIELD));
        classifiers
                .add(new KNearestFuzzyClassifier(reader, null, analyzer, null, 3, CATEGORY_FIELD, BODY_FIELD));
        classifiers.add(new KNearestFuzzyClassifier(reader, new AxiomaticF1EXP(), analyzer, null, 3,
                CATEGORY_FIELD, BODY_FIELD));
        classifiers.add(new KNearestFuzzyClassifier(reader, new AxiomaticF1LOG(), analyzer, null, 3,
                CATEGORY_FIELD, BODY_FIELD));
        classifiers.add(new BM25NBClassifier(reader, analyzer, null, CATEGORY_FIELD, BODY_FIELD));
        classifiers.add(new CachingNaiveBayesClassifier(reader, analyzer, null, CATEGORY_FIELD, BODY_FIELD));
        classifiers.add(new SimpleNaiveBayesClassifier(reader, analyzer, null, CATEGORY_FIELD, BODY_FIELD));

        int maxdoc;

        if (split) {
            testReader = DirectoryReader.open(test);
            maxdoc = testReader.maxDoc();
        } else {
            maxdoc = reader.maxDoc();
        }

        System.out.format("Starting evaluation on %d docs...%n", maxdoc);

        ExecutorService service = Executors.newCachedThreadPool();
        List<Future<String>> futures = new LinkedList<>();
        for (Classifier<BytesRef> classifier : classifiers) {
            testClassifier(reader, startTime, testReader, service, futures, classifier);
        }
        for (Future<String> f : futures) {
            System.out.println(f.get());
        }

        Thread.sleep(10000);
        service.shutdown();

    } finally {
        if (reader != null) {
            reader.close();
        }
        directory.close();
        if (test != null) {
            test.close();
        }
        if (train != null) {
            train.close();
        }
        if (cv != null) {
            cv.close();
        }
        if (testReader != null) {
            testReader.close();
        }

        for (Classifier c : classifiers) {
            if (c instanceof Closeable) {
                ((Closeable) c).close();
            }
        }
    }
}

From source file:com.github.tteofili.looseen.TestWikipediaClassification.java

License:Apache License

@Test
public void testItalianWikipedia() throws Exception {

    String indexProperty = System.getProperty("index");
    if (indexProperty != null) {
        try {/*w  w  w.ja v a2s.  c o m*/
            index = Boolean.valueOf(indexProperty);
        } catch (Exception e) {
            // ignore
        }
    }

    String splitProperty = System.getProperty("split");
    if (splitProperty != null) {
        try {
            split = Boolean.valueOf(splitProperty);
        } catch (Exception e) {
            // ignore
        }
    }

    Path mainIndexPath = Paths.get(INDEX + "/original");
    Directory directory = FSDirectory.open(mainIndexPath);
    Path trainPath = Paths.get(INDEX + "/train");
    Path testPath = Paths.get(INDEX + "/test");
    Path cvPath = Paths.get(INDEX + "/cv");
    FSDirectory cv = null;
    FSDirectory test = null;
    FSDirectory train = null;
    DirectoryReader testReader = null;
    if (split) {
        cv = FSDirectory.open(cvPath);
        test = FSDirectory.open(testPath);
        train = FSDirectory.open(trainPath);
    }

    if (index) {
        delete(mainIndexPath);
        if (split) {
            delete(trainPath, testPath, cvPath);
        }
    }

    IndexReader reader = null;
    try {
        Collection<String> stopWordsList = Arrays.asList("di", "a", "da", "in", "per", "tra", "fra", "il", "lo",
                "la", "i", "gli", "le");
        CharArraySet stopWords = new CharArraySet(stopWordsList, true);
        Analyzer analyzer = new ItalianAnalyzer(stopWords);
        if (index) {

            System.out.format("Indexing Italian Wikipedia...%n");

            long startIndex = System.currentTimeMillis();
            IndexWriter indexWriter = new IndexWriter(directory, new IndexWriterConfig(analyzer));

            importWikipedia(new File(PREFIX + "/itwiki/itwiki-20150405-pages-meta-current1.xml"), indexWriter);
            importWikipedia(new File(PREFIX + "/itwiki/itwiki-20150405-pages-meta-current2.xml"), indexWriter);
            importWikipedia(new File(PREFIX + "/itwiki/itwiki-20150405-pages-meta-current3.xml"), indexWriter);
            importWikipedia(new File(PREFIX + "/itwiki/itwiki-20150405-pages-meta-current4.xml"), indexWriter);

            long endIndex = System.currentTimeMillis();
            System.out.format("Indexed %d pages in %ds %n", indexWriter.maxDoc(),
                    (endIndex - startIndex) / 1000);

            indexWriter.close();

        }

        if (split && !index) {
            reader = DirectoryReader.open(train);
        } else {
            reader = DirectoryReader.open(directory);
        }

        if (index && split) {
            // split the index
            System.out.format("Splitting the index...%n");

            long startSplit = System.currentTimeMillis();
            DatasetSplitter datasetSplitter = new DatasetSplitter(0.1, 0);
            for (LeafReaderContext context : reader.leaves()) {
                datasetSplitter.split(context.reader(), train, test, cv, analyzer, false, CATEGORY_FIELD,
                        TEXT_FIELD, CATEGORY_FIELD);
            }
            reader.close();
            reader = DirectoryReader.open(train); // using the train index from now on
            long endSplit = System.currentTimeMillis();
            System.out.format("Splitting done in %ds %n", (endSplit - startSplit) / 1000);
        }

        final long startTime = System.currentTimeMillis();

        List<Classifier<BytesRef>> classifiers = new LinkedList<>();
        classifiers.add(new KNearestNeighborClassifier(reader, new ClassicSimilarity(), analyzer, null, 1, 0, 0,
                CATEGORY_FIELD, TEXT_FIELD));
        classifiers.add(new KNearestNeighborClassifier(reader, new BM25Similarity(), analyzer, null, 1, 0, 0,
                CATEGORY_FIELD, TEXT_FIELD));
        classifiers.add(new KNearestNeighborClassifier(reader, null, analyzer, null, 1, 0, 0, CATEGORY_FIELD,
                TEXT_FIELD));
        classifiers.add(new KNearestNeighborClassifier(reader, new LMDirichletSimilarity(), analyzer, null, 3,
                1, 1, CATEGORY_FIELD, TEXT_FIELD));
        classifiers.add(new KNearestNeighborClassifier(reader, new LMJelinekMercerSimilarity(0.3f), analyzer,
                null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD));
        classifiers.add(new KNearestNeighborClassifier(reader, new ClassicSimilarity(), analyzer, null, 3, 0, 0,
                CATEGORY_FIELD, TEXT_FIELD));
        classifiers.add(new KNearestNeighborClassifier(reader, new ClassicSimilarity(), analyzer, null, 3, 1, 1,
                CATEGORY_FIELD, TEXT_FIELD));
        classifiers.add(new KNearestNeighborClassifier(reader,
                new DFRSimilarity(new BasicModelG(), new AfterEffectB(), new NormalizationH1()), analyzer, null,
                3, 1, 1, CATEGORY_FIELD, TEXT_FIELD));
        classifiers.add(new KNearestNeighborClassifier(reader,
                new DFRSimilarity(new BasicModelP(), new AfterEffectL(), new NormalizationH3()), analyzer, null,
                3, 1, 1, CATEGORY_FIELD, TEXT_FIELD));
        classifiers.add(new KNearestNeighborClassifier(reader,
                new IBSimilarity(new DistributionSPL(), new LambdaDF(), new Normalization.NoNormalization()),
                analyzer, null, 3, 1, 1, CATEGORY_FIELD, TEXT_FIELD));
        classifiers.add(new KNearestNeighborClassifier(reader,
                new IBSimilarity(new DistributionLL(), new LambdaTTF(), new NormalizationH1()), analyzer, null,
                3, 1, 1, CATEGORY_FIELD, TEXT_FIELD));
        classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 5, 1, 100));
        classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 10, 1, 100));
        classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 15, 1, 100));
        classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 15, 3, 100));
        classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 15, 3, 300));
        classifiers.add(new MinHashClassifier(reader, TEXT_FIELD, CATEGORY_FIELD, 5, 3, 100));
        classifiers.add(new KNearestFuzzyClassifier(reader, new ClassicSimilarity(), analyzer, null, 3,
                CATEGORY_FIELD, TEXT_FIELD));
        classifiers.add(new KNearestFuzzyClassifier(reader, new ClassicSimilarity(), analyzer, null, 1,
                CATEGORY_FIELD, TEXT_FIELD));
        classifiers.add(new KNearestFuzzyClassifier(reader, new BM25Similarity(), analyzer, null, 3,
                CATEGORY_FIELD, TEXT_FIELD));
        classifiers.add(new KNearestFuzzyClassifier(reader, new BM25Similarity(), analyzer, null, 1,
                CATEGORY_FIELD, TEXT_FIELD));
        classifiers.add(new BM25NBClassifier(reader, analyzer, null, CATEGORY_FIELD, TEXT_FIELD));
        classifiers.add(new CachingNaiveBayesClassifier(reader, analyzer, null, CATEGORY_FIELD, TEXT_FIELD));
        classifiers.add(new SimpleNaiveBayesClassifier(reader, analyzer, null, CATEGORY_FIELD, TEXT_FIELD));

        int maxdoc;

        if (split) {
            testReader = DirectoryReader.open(test);
            maxdoc = testReader.maxDoc();
        } else {
            maxdoc = reader.maxDoc();
        }

        System.out.format("Starting evaluation on %d docs...%n", maxdoc);

        ExecutorService service = Executors.newCachedThreadPool();
        List<Future<String>> futures = new LinkedList<>();
        for (Classifier<BytesRef> classifier : classifiers) {

            final IndexReader finalReader = reader;
            final DirectoryReader finalTestReader = testReader;
            futures.add(service.submit(() -> {
                ConfusionMatrixGenerator.ConfusionMatrix confusionMatrix;
                if (split) {
                    confusionMatrix = ConfusionMatrixGenerator.getConfusionMatrix(finalTestReader, classifier,
                            CATEGORY_FIELD, TEXT_FIELD, 60000 * 30);
                } else {
                    confusionMatrix = ConfusionMatrixGenerator.getConfusionMatrix(finalReader, classifier,
                            CATEGORY_FIELD, TEXT_FIELD, 60000 * 30);
                }

                final long endTime = System.currentTimeMillis();
                final int elapse = (int) (endTime - startTime) / 1000;

                return " * " + classifier + " \n    * accuracy = " + confusionMatrix.getAccuracy()
                        + "\n    * precision = " + confusionMatrix.getPrecision() + "\n    * recall = "
                        + confusionMatrix.getRecall() + "\n    * f1-measure = " + confusionMatrix.getF1Measure()
                        + "\n    * avgClassificationTime = " + confusionMatrix.getAvgClassificationTime()
                        + "\n    * time = " + elapse + " (sec)\n ";
            }));

        }
        for (Future<String> f : futures) {
            System.out.println(f.get());
        }

        Thread.sleep(10000);
        service.shutdown();

    } finally {
        try {
            if (reader != null) {
                reader.close();
            }
            if (directory != null) {
                directory.close();
            }
            if (test != null) {
                test.close();
            }
            if (train != null) {
                train.close();
            }
            if (cv != null) {
                cv.close();
            }
            if (testReader != null) {
                testReader.close();
            }
        } catch (Throwable e) {
            e.printStackTrace();
        }
    }
}