Example usage for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException

Source Link

Document

Adds a document to this index.

Usage

From source file:com.intuit.tank.search.lucene.LuceneService.java

License:Open Source License

/**
 * Indexes a list of documents./* w w  w . j a  v  a2 s. c o m*/
 * 
 * @param docs
 */
public void indexDocuments(List<Document> docs) {
    IndexWriter writer = getWriter();
    for (Document document : docs) {
        try {
            writer.addDocument(document);
        } catch (Exception e) {
            e.printStackTrace();
            closeWriter(writer);
            throw new RuntimeException(e);
        }
    }
    closeWriter(writer);
}

From source file:com.ivannotes.searchbee.SearchBee.java

License:Apache License

public final void doIndex(DataFetcher<T> df) throws CorruptIndexException, IOException {
    df.reset();// ww w .  ja  va  2 s .  co  m
    IndexWriter idxWriter = getIndexWriter();
    int contiuousException = 0;
    try {
        while (df.hasMore()) {
            try {
                List<T> data = df.fetchData();
                for (T bean : data) {
                    Document doc = buildDocument(bean);
                    idxWriter.addDocument(doc);
                }

                idxWriter.commit();
                contiuousException = 0;
            } catch (Exception e) {
                contiuousException++;
                logger.error("build index error", e);
                if (contiuousException > 100) {
                    logger.error("build index exceed max continuous exception count(100), exit build.");
                    break;
                }
            }
        }
    } finally {
        if (null != idxWriter) {
            idxWriter.close();
        }
    }

}

From source file:com.jaeksoft.searchlib.index.WriterLocal.java

License:Open Source License

@Deprecated
public void addDocument(Document document) throws IOException, SearchLibException {
    IndexWriter indexWriter = null;
    try {/*  w w w .j  av a  2s .  com*/
        indexWriter = open();
        indexWriter.addDocument(document);
    } finally {
        close(indexWriter);
    }
}

From source file:com.jaeksoft.searchlib.index.WriterLucene.java

License:Open Source License

@Deprecated
public void addDocument(Document document) throws IOException, SearchLibException {
    IndexWriter indexWriter = null;
    lock.rl.lock();//from w w w. j av a 2 s  .c  om
    try {
        indexWriter = open();
        indexWriter.addDocument(document);
        close(indexWriter);
        indexWriter = null;
    } finally {
        lock.rl.unlock();
        close(indexWriter);
    }
}

From source file:com.javapr.plaintextindex.search.Index.java

License:Apache License

public static void indexDocs(IndexWriter writer, File file) throws IOException, SAXException, TikaException {

    // nur lesbare Dateien verwenden
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();

            if (files != null) {
                for (int i = 0; i < files.length; i++) {

                    indexDocs(writer, new File(file, files[i]));

                }//from w  ww. j a  va  2 s  .com
            }
        } else {
            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {

                return;
            }

            try {

                //Word Dokumente mit Tika parsen
                ContentHandler contenthandler = new BodyContentHandler();
                Metadata metadata = new Metadata();
                metadata.set(Metadata.RESOURCE_NAME_KEY, file.getName());
                Parser parser = new AutoDetectParser();
                parser.parse(fis, contenthandler, metadata, new ParseContext());

                // Lucene Dokumenten-Objekt erstellen und geparsten Tika-Inhalt speichern
                Document doc = new Document();

                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                Field filename = new StringField("filename", file.getName(), Field.Store.YES);
                doc.add(filename);

                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                doc.add(new TextField("contents", contenthandler.toString(), Field.Store.NO));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    //neuer Index, wenn neues Dokument
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    long size = file.length() / 1024;
                    list.add(file + ", " + size + "kb");
                    //Index updaten, wenn lteres Index-Dokument schon vorhanden
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:com.jivesoftware.forum.database.DbSearchManager.java

License:Open Source License

/**
  * Indexes an indivual message. The writer is assumed to be open when
  * passed in and will remain open after the method is done executing.
  *///from w w w  .  j a v a  2s .  co  m
protected final void addMessageToIndex(long messageID, long userID, long threadID, long forumID, String subject,
        String body, java.util.Date creationDate, IndexWriter writer) throws IOException {
    if (writer == null) {
        return;
    }

    Document doc = new Document();
    doc.add(Field.Keyword("messageID", Long.toString(messageID)));
    doc.add(new Field("userID", Long.toString(userID), false, true, false));
    doc.add(new Field("threadID", Long.toString(threadID), false, true, false));
    doc.add(new Field("forumID", Long.toString(forumID), false, true, false));
    doc.add(Field.UnStored("subject", subject));
    doc.add(Field.UnStored("body", body));
    doc.add(new Field("creationDate", DateField.dateToString(creationDate), false, true, false));

    writer.addDocument(doc);
}

From source file:com.justinleegrant.myluceneplayground.SimpleFacetsExample.java

License:Apache License

/** Build the example index. */
private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(Version.LATEST, new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    Document doc = new Document();
    doc.add(new FacetField("Author", "Bob"));
    doc.add(new FacetField("Publish Date", "2010", "10", "15"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2010", "10", "20"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Lisa"));
    doc.add(new FacetField("Publish Date", "2012", "1", "1"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Susan"));
    doc.add(new FacetField("Publish Date", "2012", "1", "7"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new FacetField("Author", "Frank"));
    doc.add(new FacetField("Publish Date", "1999", "5", "5"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    indexWriter.close();//from  ww  w . jav  a 2  s.c om
    taxoWriter.close();
}

From source file:com.khepry.frackhem.entities.Blendeds.java

License:Apache License

public void indexViaLucene(String textFilePath, String textColSeparator, String casEdfIdFieldName,
        Map<String, Toxicity> toxicities) throws IOException {

    String message;/*from w  w w  .j  av a  2s  . com*/

    message = "Start Indexing Blendeds via Lucene...";
    if (outputToSystemOut) {
        System.out.println(message);
    }
    if (outputToMsgQueue) {
        progressMessageQueue.send(new MessageInput(message));
    }

    File textFile = new File(textFilePath);
    if (textFile.exists()) {

        File indexFolder = new File(indexFolderPath);
        if (!indexFolder.exists()) {
            indexFolder.mkdir();
        } else {
            deleteFolder(indexFolder);
            if (!indexFolder.exists()) {
                indexFolder.mkdir();
            }
        }

        File taxonomyFolder = new File(taxonomyFolderPath);
        if (!taxonomyFolder.exists()) {
            taxonomyFolder.mkdir();
        } else {
            deleteFolder(taxonomyFolder);
            if (!taxonomyFolder.exists()) {
                taxonomyFolder.mkdir();
            }
        }

        if (indexFolder.exists() && taxonomyFolder.exists()) {

            List<String> colHeaders = new ArrayList<>();
            Map<String, Integer> colIndexes = new LinkedHashMap<>();
            Map<String, String> mapIndexFields = new LinkedHashMap<>();
            Map<String, String> mapStatsFields = new LinkedHashMap<>();

            String[] pieces;
            String[] tuples;

            pieces = indexFields.split(",");
            for (String indexField : pieces) {
                mapIndexFields.put(indexField, indexField);
            }

            pieces = statsFields.split(",");
            for (String statField : pieces) {
                tuples = statField.split(":");
                mapStatsFields.put(tuples[0], tuples.length > 1 ? tuples[1] : tuples[0]);
            }

            SimpleFSDirectory indexDirectory = new SimpleFSDirectory(indexFolder);
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_44, analyzer);
            IndexWriter indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);

            SimpleFSDirectory taxonomyDirectory = new SimpleFSDirectory(taxonomyFolder);
            TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxonomyDirectory, OpenMode.CREATE);
            FacetFields facetFields = new FacetFields(taxonomyWriter);

            List<CategoryPath> taxonomyCategories = new ArrayList<>();

            String line;
            Integer rcdCount = 0;
            StringBuilder sb = new StringBuilder();
            BufferedReader br = new BufferedReader(new FileReader(textFile));
            while ((line = br.readLine()) != null) {
                rcdCount++;
                pieces = line.split(textColSeparator);
                if (rcdCount == 1) {
                    int i = 0;
                    for (String colHeader : pieces) {
                        colHeaders.add(colHeader.trim());
                        colIndexes.put(colHeader, i);
                    }
                } else {
                    if (pieces.length == colHeaders.size()) {
                        sb.setLength(0);
                        Document document = new Document();
                        for (int i = 0; i < pieces.length; i++) {
                            Field field = new TextField(colHeaders.get(i), pieces[i].trim(), Store.YES);
                            document.add(field);
                            if (mapIndexFields.containsKey(colHeaders.get(i))) {
                                if (!pieces[i].trim().equals("")) {
                                    sb.append(pieces[i].trim());
                                    sb.append(" ");
                                }
                            }
                        }
                        // append toxicity information to the document
                        String toxCasEdfId = document.get(casEdfIdFieldName).trim();
                        Toxicity toxicity = new Toxicity();
                        if (toxicities.containsKey(toxCasEdfId)) {
                            toxicity = toxicities.get(toxCasEdfId);
                            document.add(new TextField("toxChemicalName", toxicity.getToxChemicalName().trim(),
                                    Store.YES));
                            sb.append(toxicity.getToxChemicalName().trim());
                            sb.append(" ");
                            document.add(new TextField("toxRecognized", toxicity.getToxRecognized().trim(),
                                    Store.YES));
                            sb.append(toxicity.getToxRecognized().trim());
                            sb.append(" ");
                            document.add(new TextField("toxSuspected", toxicity.getToxSuspected().trim(),
                                    Store.YES));
                            sb.append(toxicity.getToxSuspected().trim());
                            sb.append(" ");
                        } else {
                            document.add(new TextField("toxChemicalName", "", Store.YES));
                            document.add(new TextField("toxRecognized", "", Store.YES));
                            document.add(new TextField("toxSuspected", "", Store.YES));
                        }
                        Field field = new TextField("text", sb.toString().trim(), Store.NO);
                        document.add(field);

                        String toxChemical = toxicity.getToxChemicalName().trim();

                        // categorize recognized toxicities
                        String toxRecognized = toxicity.getToxRecognized().trim();
                        if (!toxRecognized.equals("")) {
                            taxonomyCategories.add(new CategoryPath("toxRecognized", "CasEdfId", toxCasEdfId));
                            taxonomyCategories.add(new CategoryPath("toxRecognized", "Chemical",
                                    toxChemical.replace("/", "|")));
                            for (String value : toxRecognized.replace(" ", ",").split(",")) {
                                if (!value.trim().equals("")) {
                                    taxonomyCategories
                                            .add(new CategoryPath("toxRecognized", "Toxicity", value));
                                }
                            }
                        }

                        // categorize suspected toxicities
                        String toxSuspected = toxicity.getToxSuspected().trim();
                        if (!toxSuspected.equals("")) {
                            taxonomyCategories.add(new CategoryPath("toxSuspected", "CasEdfId", toxCasEdfId));
                            taxonomyCategories.add(new CategoryPath("toxSuspected", "Chemical",
                                    toxChemical.replace("/", "|")));
                            for (String value : toxSuspected.replace(" ", ",").split(",")) {
                                if (!value.trim().equals("")) {
                                    taxonomyCategories.add(new CategoryPath("toxSuspected", "Toxicity", value));
                                }
                            }
                        }

                        // build up "stats" taxonomy categories
                        for (String statsKey : mapStatsFields.keySet()) {
                            if (mapIndexFields.containsKey(statsKey)) {
                                String fieldValue = mapIndexFields.get(statsKey);
                                if (!statsKey.trim().equals("") && !fieldValue.trim().equals("")) {
                                    taxonomyCategories.add(new CategoryPath("Blendeds", statsKey, fieldValue));
                                }
                            }
                        }

                        if (taxonomyCategories.size() > 0) {
                            facetFields.addFields(document, taxonomyCategories);
                            // System.out.println("Taxonomies added: " +
                            // taxonomyCategories.size());
                        }

                        indexWriter.addDocument(document);
                        if (progressInterval > 0 && rcdCount % progressInterval == 0) {
                            message = "Records indexed: " + rcdCount;
                            if (outputToSystemOut) {
                                System.out.println(message);
                            }
                            if (outputToMsgQueue) {
                                progressMessageQueue.send(new MessageInput(message));
                            }
                        }

                        taxonomyCategories.clear();
                    }
                }
            }
            br.close();
            message = "Records indexed: " + rcdCount;
            if (outputToSystemOut) {
                System.out.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }

            sb.setLength(0);
            sb.trimToSize();

            indexWriter.commit();
            indexWriter.forceMerge(1);
            indexWriter.close();

            taxonomyWriter.commit();
            taxonomyWriter.close();

            analyzer.close();

            indexDirectory.close();
            taxonomyDirectory.close();
        } else {
            message = "Lucene Index Folder: " + indexFolder + " or Lucene Taxonomy folder: " + taxonomyFolder
                    + " does not exist!";
            if (outputToSystemErr) {
                System.err.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }
        }
        message = "Ended Indexing Blendeds via Lucene!";
        if (outputToSystemOut) {
            System.out.println(message);
        }
        if (outputToMsgQueue) {
            progressMessageQueue.send(new MessageInput(message));
        }
    }
}

From source file:com.khepry.frackhem.entities.Chemicals.java

License:Apache License

public void indexViaLucene(String textFilePath, String textColSeparator, String casEdfIdFieldName,
        Map<String, Toxicity> toxicities) throws IOException {

    String message;/*  w w w . j  a v  a  2s  .  co m*/

    message = "Start Indexing Chemicals via Lucene...";
    if (outputToSystemOut) {
        System.out.println(message);
    }
    if (outputToMsgQueue) {
        progressMessageQueue.send(new MessageInput(message));
    }

    File textFile = new File(textFilePath);
    if (textFile.exists()) {

        File indexFolder = new File(indexFolderPath);
        if (!indexFolder.exists()) {
            indexFolder.mkdir();
        } else {
            deleteFolder(indexFolder);
            if (!indexFolder.exists()) {
                indexFolder.mkdir();
            }
        }

        File taxonomyFolder = new File(taxonomyFolderPath);
        if (!taxonomyFolder.exists()) {
            taxonomyFolder.mkdir();
        } else {
            deleteFolder(taxonomyFolder);
            if (!taxonomyFolder.exists()) {
                taxonomyFolder.mkdir();
            }
        }

        if (indexFolder.exists() && taxonomyFolder.exists()) {

            List<String> colHeaders = new ArrayList<>();
            Map<String, Integer> colIndexes = new LinkedHashMap<>();
            Map<String, String> mapIndexFields = new LinkedHashMap<>();
            Map<String, String> mapStatsFields = new LinkedHashMap<>();

            String[] pieces;
            String[] tuples;

            pieces = indexFields.split(",");
            for (String indexField : pieces) {
                mapIndexFields.put(indexField, indexField);
            }

            pieces = statsFields.split(",");
            for (String statField : pieces) {
                tuples = statField.split(":");
                mapStatsFields.put(tuples[0], tuples.length > 1 ? tuples[1] : tuples[0]);
            }

            SimpleFSDirectory indexDirectory = new SimpleFSDirectory(indexFolder);
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_44, analyzer);
            IndexWriter indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);

            SimpleFSDirectory taxonomyDirectory = new SimpleFSDirectory(taxonomyFolder);
            TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxonomyDirectory, OpenMode.CREATE);
            FacetFields facetFields = new FacetFields(taxonomyWriter);

            List<CategoryPath> taxonomyCategories = new ArrayList<>();

            String line;
            Integer rcdCount = 0;
            StringBuilder sb = new StringBuilder();
            BufferedReader br = new BufferedReader(new FileReader(textFile));
            while ((line = br.readLine()) != null) {
                rcdCount++;
                pieces = line.split(textColSeparator);
                if (rcdCount == 1) {
                    int i = 0;
                    for (String colHeader : pieces) {
                        colHeaders.add(colHeader.trim());
                        colIndexes.put(colHeader, i);
                    }
                } else {
                    if (pieces.length == colHeaders.size()) {
                        sb.setLength(0);
                        Document document = new Document();
                        for (int i = 0; i < pieces.length; i++) {
                            Field field = new TextField(colHeaders.get(i), pieces[i].trim(), Store.YES);
                            document.add(field);
                            if (mapIndexFields.containsKey(colHeaders.get(i))) {
                                if (!pieces[i].trim().equals("")) {
                                    sb.append(pieces[i].trim());
                                    sb.append(" ");
                                }
                            }
                        }
                        // append toxicity information to the document
                        String toxCasEdfId = document.get(casEdfIdFieldName).trim();
                        Toxicity toxicity = new Toxicity();
                        if (toxicities.containsKey(toxCasEdfId)) {
                            toxicity = toxicities.get(toxCasEdfId);
                            document.add(new TextField("toxChemicalName", toxicity.getToxChemicalName().trim(),
                                    Store.YES));
                            sb.append(toxicity.getToxChemicalName().trim());
                            sb.append(" ");
                            document.add(new TextField("toxRecognized", toxicity.getToxRecognized().trim(),
                                    Store.YES));
                            sb.append(toxicity.getToxRecognized().trim());
                            sb.append(" ");
                            document.add(new TextField("toxSuspected", toxicity.getToxSuspected().trim(),
                                    Store.YES));
                            sb.append(toxicity.getToxSuspected().trim());
                            sb.append(" ");
                        } else {
                            document.add(new TextField("toxChemicalName", "", Store.YES));
                            document.add(new TextField("toxRecognized", "", Store.YES));
                            document.add(new TextField("toxSuspected", "", Store.YES));
                        }
                        Field field = new TextField("text", sb.toString().trim(), Store.NO);
                        document.add(field);

                        String toxChemical = toxicity.getToxChemicalName().trim();

                        // categorize recognized toxicities
                        String toxRecognized = toxicity.getToxRecognized().trim();
                        if (!toxRecognized.equals("")) {
                            taxonomyCategories.add(new CategoryPath("toxRecognized", "CasEdfId", toxCasEdfId));
                            taxonomyCategories.add(new CategoryPath("toxRecognized", "Chemical",
                                    toxChemical.replace("/", "|")));
                            for (String value : toxRecognized.replace(" ", ",").split(",")) {
                                if (!value.trim().equals("")) {
                                    taxonomyCategories
                                            .add(new CategoryPath("toxRecognized", "Toxicity", value));
                                }
                            }
                        }

                        // categorize suspected toxicities
                        String toxSuspected = toxicity.getToxSuspected().trim();
                        if (!toxSuspected.equals("")) {
                            taxonomyCategories.add(new CategoryPath("toxSuspected", "CasEdfId", toxCasEdfId));
                            taxonomyCategories.add(new CategoryPath("toxSuspected", "Chemical",
                                    toxChemical.replace("/", "|")));
                            for (String value : toxSuspected.replace(" ", ",").split(",")) {
                                if (!value.trim().equals("")) {
                                    taxonomyCategories.add(new CategoryPath("toxSuspected", "Toxicity", value));
                                }
                            }
                        }

                        // build up "stats" taxonomy categories
                        for (String statsKey : mapStatsFields.keySet()) {
                            if (mapIndexFields.containsKey(statsKey)) {
                                String fieldValue = mapIndexFields.get(statsKey);
                                if (!statsKey.trim().equals("") && !fieldValue.trim().equals("")) {
                                    taxonomyCategories.add(new CategoryPath("Chemicals", statsKey, fieldValue));
                                }
                            }
                        }

                        if (taxonomyCategories.size() > 0) {
                            facetFields.addFields(document, taxonomyCategories);
                            // System.out.println("Taxonomies added: " +
                            // taxonomyCategories.size());
                        }

                        indexWriter.addDocument(document);
                        if (progressInterval > 0 && rcdCount % progressInterval == 0) {
                            message = "Records indexed: " + rcdCount;
                            if (outputToSystemOut) {
                                System.out.println(message);
                            }
                            if (outputToMsgQueue) {
                                progressMessageQueue.send(new MessageInput(message));
                            }
                        }

                        taxonomyCategories.clear();
                    }
                }
            }
            br.close();
            message = "Records indexed: " + rcdCount;
            if (outputToSystemOut) {
                System.out.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }

            sb.setLength(0);
            sb.trimToSize();

            indexWriter.commit();
            indexWriter.forceMerge(1);
            indexWriter.close();

            taxonomyWriter.commit();
            taxonomyWriter.close();

            analyzer.close();

            indexDirectory.close();
            taxonomyDirectory.close();
        } else {
            message = "Lucene Index Folder: " + indexFolder + " or Lucene Taxonomy folder: " + taxonomyFolder
                    + " does not exist!";
            if (outputToSystemErr) {
                System.err.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }
        }
        message = "Ended Indexing Chemicals via Lucene!";
        if (outputToSystemOut) {
            System.out.println(message);
        }
        if (outputToMsgQueue) {
            progressMessageQueue.send(new MessageInput(message));
        }
    }
}

From source file:com.khepry.frackhem.entities.Reports.java

License:Apache License

public void indexViaLucene(String textPath, String textColSeparator, Map<String, Toxicity> toxicities,
        String... parseFields) throws IOException {

    String message;/*  w  w w.  j a  va 2s  .c  om*/

    message = "Start Indexing Reports via Lucene...";
    if (outputToSystemOut) {
        System.out.println(message);
    }
    if (outputToMsgQueue) {
        progressMessageQueue.send(new MessageInput(message));
    }

    File textFile = new File(textPath);
    if (textFile.exists()) {

        File indexFolder = new File(indexFolderPath);

        if (!indexFolder.exists()) {
            indexFolder.mkdir();
        }

        File taxonomyFolder = new File(taxonomyFolderPath);
        if (!taxonomyFolder.exists()) {
            taxonomyFolder.mkdir();
        }

        if (indexFolder.exists() && taxonomyFolder.exists()) {

            deleteFolder(indexFolder);
            if (!indexFolder.exists()) {
                indexFolder.mkdir();
            }

            deleteFolder(taxonomyFolder);
            if (!taxonomyFolder.exists()) {
                taxonomyFolder.mkdir();
            }

            Map<String, String> mapBreakFields = new LinkedHashMap<>();
            Map<String, String> mapIndexFields = new LinkedHashMap<>();
            Map<String, String> mapLevelFields = new LinkedHashMap<>();
            Map<String, String> mapStatsFields = new LinkedHashMap<>();
            Map<String, Integer> mapColIndexes = new LinkedHashMap<>();

            String[] pieces;
            String[] tuples;

            pieces = indexFields.split(",");
            for (String indexField : pieces) {
                mapIndexFields.put(indexField, "");
            }

            pieces = levelFields.split(",");
            for (String levelField : pieces) {
                mapBreakFields.put(levelField, "");
                mapLevelFields.put(levelField, "");
            }

            pieces = statsFields.split(",");
            for (String statField : pieces) {
                tuples = statField.split(":");
                mapStatsFields.put(tuples[0], tuples.length > 1 ? tuples[1] : tuples[0]);
            }

            Map<String, Map<String, String>> mapToxValues = new LinkedHashMap<>();
            for (String parseField : parseFields) {
                mapToxValues.put(parseField, new TreeMap<String, String>());
            }

            SimpleFSDirectory indexDirectory = new SimpleFSDirectory(indexFolder);
            SimpleFSDirectory taxonomyDirectory = new SimpleFSDirectory(taxonomyFolder);
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_44, analyzer);
            IndexWriter indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);
            TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxonomyDirectory, OpenMode.CREATE);
            FacetFields facetFields = new FacetFields(taxonomyWriter);

            List<CategoryPath> taxonomyCategories = new ArrayList<>();

            String line;

            StringBuilder sbIndex = new StringBuilder();
            StringBuilder sbLevel = new StringBuilder();

            Integer outCount = 0;
            Integer rcdCount = 0;

            Boolean firstDataRecordHandled = false;

            BufferedReader br = new BufferedReader(new FileReader(textFile));
            while ((line = br.readLine()) != null) {
                rcdCount++;
                pieces = line.split(textColSeparator);
                if (rcdCount == 1) {
                    int i = 0;
                    for (String colHeader : pieces) {
                        mapColIndexes.put(colHeader.trim(), i);
                        i++;
                    }
                } else {
                    for (String key : mapLevelFields.keySet()) {
                        if (mapColIndexes.containsKey(key)) {
                            String value = pieces[mapColIndexes.get(key)].trim();
                            // build up level-break values
                            if (mapLevelFields.containsKey(key)) {
                                mapLevelFields.put(key, value);
                            }
                        }
                    }
                    if (!firstDataRecordHandled) {
                        mapBreakFields.putAll(mapLevelFields);
                        firstDataRecordHandled = true;
                    }
                    // if there is a "level break"
                    if (!mapLevelFields.equals(mapBreakFields)) {
                        Document tgtDocument = new Document();
                        for (Map.Entry<String, String> entry : mapBreakFields.entrySet()) {
                            Field field = new TextField(entry.getKey(), entry.getValue(), Store.YES);
                            tgtDocument.add(field);
                        }
                        for (Map.Entry<String, Map<String, String>> toxEntry : mapToxValues.entrySet()) {
                            String fieldName = toxEntry.getKey();
                            String fieldValue = GenericUtilities.joinString(toxEntry.getValue().values(), " ");
                            // System.out.println(fieldName + ": " + fieldValue);
                            sbIndex.append(fieldValue);
                            sbIndex.append(" ");
                            tgtDocument.add(new TextField(fieldName, fieldValue, Store.YES));
                            // build up "Toxicity" taxonomy categories
                            for (String value : fieldValue.replace(" ", ",").split(",")) {
                                if (!value.trim().equals("")) {
                                    taxonomyCategories.add(new CategoryPath(fieldName, "Toxicity", value));
                                }
                            }
                            // build up "stats" taxonomy categories
                            for (String statsKey : mapStatsFields.keySet()) {
                                if (mapLevelFields.containsKey(statsKey)) {
                                    String levelValue = mapLevelFields.get(statsKey);
                                    if (!statsKey.trim().equals("") && !levelValue.trim().equals("")) {
                                        taxonomyCategories
                                                .add(new CategoryPath("Reports", statsKey, levelValue));
                                    }
                                }
                            }
                        }
                        tgtDocument.add(new TextField("text", sbIndex.toString().trim(), Store.NO));
                        if (taxonomyCategories.size() > 0) {
                            facetFields.addFields(tgtDocument, taxonomyCategories);
                            // System.out.println("Taxonomies added: " +
                            // taxonomyCategories.size());
                        }
                        indexWriter.addDocument(tgtDocument);
                        outCount++;
                        sbIndex.setLength(0);
                        for (String key : mapToxValues.keySet()) {
                            mapToxValues.get(key).clear();
                        }
                        taxonomyCategories.clear();
                        mapBreakFields.putAll(mapLevelFields);
                    }
                    // build up text index values
                    for (String key : mapLevelFields.keySet()) {
                        if (mapColIndexes.containsKey(key)) {
                            String value = pieces[mapColIndexes.get(key)].trim();
                            if (!value.equals("")) {
                                // build up 'text' field index value
                                if (mapIndexFields.containsKey(key)) {
                                    sbIndex.append(value);
                                    sbIndex.append(" ");
                                }
                            }
                        }
                    }
                    // build up toxicity values for later level-break use
                    if (mapColIndexes.containsKey(casEdfIdFieldName)) {
                        Toxicity toxicity = toxicities.get(pieces[mapColIndexes.get(casEdfIdFieldName)].trim());
                        if (toxicity != null) {
                            // build up recognized toxicity values
                            String[] toxRValues = toxicity.getToxRecognized().split(",");
                            for (String toxValue : toxRValues) {
                                if (!toxValue.equals("")) {
                                    if (!mapToxValues.get("toxRecognized").containsKey(toxValue)) {
                                        mapToxValues.get("toxRecognized").put(toxValue, toxValue);
                                    }
                                }
                            }
                            // build up suspected toxicity values
                            String[] toxSValues = toxicity.getToxSuspected().split(",");
                            for (String toxValue : toxSValues) {
                                if (!toxValue.equals("")) {
                                    if (!mapToxValues.get("toxSuspected").containsKey(toxValue)) {
                                        mapToxValues.get("toxSuspected").put(toxValue, toxValue);
                                    }
                                }
                            }
                        }
                    }
                    if (progressInterval > 0 && rcdCount % progressInterval == 0) {
                        message = "Records indexed: " + rcdCount;
                        if (outputToSystemOut) {
                            System.out.println(message);
                        }
                        if (outputToMsgQueue) {
                            progressMessageQueue.send(new MessageInput(message));
                        }
                    }
                }
            }
            br.close();
            // handle end-of-file processing
            Document tgtDocument = new Document();
            for (Map.Entry<String, String> entry : mapBreakFields.entrySet()) {
                Field field = new TextField(entry.getKey(), entry.getValue(), Store.YES);
                tgtDocument.add(field);
            }
            for (Map.Entry<String, Map<String, String>> toxEntry : mapToxValues.entrySet()) {
                String fieldName = toxEntry.getKey();
                String fieldValue = GenericUtilities.joinString(toxEntry.getValue().values(), " ");
                // System.out.println(fieldName + ": " + fieldValue);
                sbIndex.append(fieldValue);
                sbIndex.append(" ");
                tgtDocument.add(new TextField(fieldName, fieldValue, Store.YES));
                // build up "Toxicity" taxonomy categories
                for (String value : fieldValue.replace(" ", ",").split(",")) {
                    if (!value.trim().equals("")) {
                        taxonomyCategories.add(new CategoryPath(fieldName, "Toxicity", value));
                    }
                }
                // build up "stats" taxonomy categories
                for (String statsKey : mapStatsFields.keySet()) {
                    if (mapLevelFields.containsKey(statsKey)) {
                        String levelValue = mapLevelFields.get(statsKey);
                        if (!statsKey.trim().equals("") && !levelValue.trim().equals("")) {
                            taxonomyCategories.add(new CategoryPath("Reports", statsKey, levelValue));
                        }
                    }
                }
            }
            tgtDocument.add(new TextField("text", sbIndex.toString().trim(), Store.NO));
            if (taxonomyCategories.size() > 0) {
                facetFields.addFields(tgtDocument, taxonomyCategories);
                // System.out.println("Taxonomies added: " +
                // taxonomyCategories.size());
            }
            indexWriter.addDocument(tgtDocument);
            outCount++;
            message = "Records processed: " + rcdCount;
            if (outputToSystemOut) {
                System.out.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }
            message = "Records indexed: " + outCount;
            if (outputToSystemOut) {
                System.out.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }

            sbIndex.setLength(0);
            sbIndex.trimToSize();

            sbLevel.setLength(0);
            sbLevel.trimToSize();

            mapToxValues.clear();

            indexWriter.commit();
            indexWriter.forceMerge(1);
            indexWriter.close();

            analyzer.close();
            indexDirectory.close();

            taxonomyWriter.commit();
            taxonomyWriter.close();
            taxonomyDirectory.close();
        } else {
            message = "Lucene Index Folder: " + indexFolder + " or Lucene Taxonomy folder: " + taxonomyFolder
                    + " does not exist!";
            if (outputToSystemErr) {
                System.err.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }
        }
        message = "Ended Indexing Reports via Lucene!";
        if (outputToSystemOut) {
            System.out.println(message);
        }
        if (outputToMsgQueue) {
            progressMessageQueue.send(new MessageInput(message));
        }
    }
}