Example usage for org.apache.lucene.facet.taxonomy.directory DirectoryTaxonomyWriter DirectoryTaxonomyWriter

List of usage examples for org.apache.lucene.facet.taxonomy.directory DirectoryTaxonomyWriter DirectoryTaxonomyWriter

Introduction

In this page you can find the example usage for org.apache.lucene.facet.taxonomy.directory DirectoryTaxonomyWriter DirectoryTaxonomyWriter.

Prototype

public DirectoryTaxonomyWriter(Directory directory, OpenMode openMode) throws IOException 

Source Link

Document

Creates a new instance with a default cache as defined by #defaultTaxonomyWriterCache() .

Usage

From source file:com.chimpler.example.FacetLuceneIndexer.java

License:Apache License

public static void main(String args[]) throws Exception {
    //      if (args.length != 3) {
    //         System.err.println("Parameters: [index directory] [taxonomy directory] [json file]");
    //         System.exit(1);
    //      }//from  w w  w .ja va  2 s  .  com

    String indexDirectory = "index";
    String taxonomyDirectory = "taxonomy";
    String jsonFileName = "/home/qiuqiang/workspace/facet-lucene-example/books.json";

    IndexWriterConfig writerConfig = new IndexWriterConfig(LUCENE_VERSION,
            new WhitespaceAnalyzer(LUCENE_VERSION));
    writerConfig.setOpenMode(OpenMode.APPEND);
    IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexDirectory)), writerConfig);

    TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(MMapDirectory.open(new File(taxonomyDirectory)),
            OpenMode.APPEND);

    TaxonomyReader taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.open(new File(taxonomyDirectory)));

    String content = IOUtils.toString(new FileInputStream(jsonFileName));
    JSONArray bookArray = new JSONArray(content);

    Field idField = new IntField("id", 0, Store.YES);
    Field titleField = new TextField("title", "", Store.YES);
    Field authorsField = new TextField("authors", "", Store.YES);
    Field bookCategoryField = new TextField("book_category", "", Store.YES);

    indexWriter.deleteAll();

    FacetFields facetFields = new FacetFields(taxonomyWriter);

    for (int i = 0; i < bookArray.length(); i++) {
        Document document = new Document();

        JSONObject book = bookArray.getJSONObject(i);
        int id = book.getInt("id");
        String title = book.getString("title");
        String bookCategory = book.getString("book_category");

        List<CategoryPath> categoryPaths = new ArrayList<CategoryPath>();

        String authorsString = "";
        JSONArray authors = book.getJSONArray("authors");
        for (int j = 0; j < authors.length(); j++) {
            String author = authors.getString(j);
            if (j > 0) {
                authorsString += ", ";
            }
            categoryPaths.add(new CategoryPath("author", author));
            authorsString += author;
        }
        categoryPaths.add(new CategoryPath("book_category" + bookCategory, '/'));

        idField.setIntValue(id);
        titleField.setStringValue(title);
        authorsField.setStringValue(authorsString);
        bookCategoryField.setStringValue(bookCategory);

        facetFields.addFields(document, categoryPaths);

        document.add(idField);
        document.add(titleField);
        document.add(authorsField);
        document.add(bookCategoryField);

        indexWriter.addDocument(document);

        System.out.printf("Book: id=%d, title=%s, book_category=%s, authors=%s\n", id, title, bookCategory,
                authors);
    }

    taxonomyWriter.prepareCommit();
    try {
        taxonomyWriter.commit();
    } catch (Exception e) {
        taxonomyWriter.rollback();
    }

    //      taxonomyWriter.close();
    //      
    //      indexWriter.commit();
    //      indexWriter.close();

    String query = "story";

    IndexReader indexReader = DirectoryReader.open(indexWriter, false);
    IndexReader indexReader2 = DirectoryReader.open(indexWriter, false);
    System.out.println(indexReader == indexReader2);

    IndexSearcher indexSearcher = new IndexSearcher(indexReader);

    TaxonomyReader newTaxonomyReader = DirectoryTaxonomyReader.openIfChanged(taxonomyReader);
    if (newTaxonomyReader != null) {
        TaxonomyReader tmp = taxonomyReader;
        taxonomyReader = newTaxonomyReader;
        tmp.close();
    } else {
        System.out.println("null");
    }

    ArrayList<FacetRequest> facetRequests = new ArrayList<FacetRequest>();
    facetRequests.add(new CountFacetRequest(new CategoryPath("author"), 100));
    facetRequests.add(new CountFacetRequest(new CategoryPath("book_category"), 100));

    FacetSearchParams searchParams = new FacetSearchParams(facetRequests);

    ComplexPhraseQueryParser queryParser = new ComplexPhraseQueryParser(LUCENE_VERSION, "title",
            new StandardAnalyzer(LUCENE_VERSION));
    Query luceneQuery = queryParser.parse(query);

    // Collectors to get top results and facets
    TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, true);
    FacetsCollector facetsCollector = FacetsCollector.create(searchParams, indexReader, taxonomyReader);
    indexSearcher.search(luceneQuery, MultiCollector.wrap(topScoreDocCollector, facetsCollector));
    System.out.println("Found:");

    for (ScoreDoc scoreDoc : topScoreDocCollector.topDocs().scoreDocs) {
        Document document = indexReader.document(scoreDoc.doc);
        System.out.printf("- book: id=%s, title=%s, book_category=%s, authors=%s, score=%f\n",
                document.get("id"), document.get("title"), document.get("book_category"),
                document.get("authors"), scoreDoc.score);
    }

    System.out.println("Facets:");
    for (FacetResult facetResult : facetsCollector.getFacetResults()) {
        System.out.println("- " + facetResult.getFacetResultNode().label);
        for (FacetResultNode facetResultNode : facetResult.getFacetResultNode().subResults) {
            System.out.printf("    - %s (%f)\n", facetResultNode.label.toString(), facetResultNode.value);
            for (FacetResultNode subFacetResultNode : facetResultNode.subResults) {
                System.out.printf("        - %s (%f)\n", subFacetResultNode.label.toString(),
                        subFacetResultNode.value);
            }
        }
    }
    taxonomyReader.close();
    indexReader.close();

    taxonomyWriter.commit();
    taxonomyWriter.close();

    indexWriter.commit();
    indexWriter.close();

}

From source file:com.fuerve.villageelder.actions.results.SearchResultItemTest.java

License:Apache License

private void buildDummyIndex(final Directory indexDirectory, final Directory taxonomyDirectory)
        throws IOException {
    IndexWriterConfig iwc = new IndexWriterConfig(Lucene.LUCENE_VERSION, Lucene.getPerFieldAnalyzer());
    iwc.setOpenMode(OpenMode.CREATE);/*from   w ww.  j a  v  a2s  . com*/
    IndexWriter iw = new IndexWriter(indexDirectory, iwc);
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxonomyDirectory, OpenMode.CREATE);
    List<CategoryPath> categories = new ArrayList<CategoryPath>();
    FacetFields facetFields = new FacetFields(tw);

    Document doc = new Document();
    categories.clear();
    doc.add(new StringField("Author", "foo", Store.YES));
    categories.add(new CategoryPath("Author", "foo"));
    doc.add(new LongField("RevisionNumber", 50L, Store.YES));
    doc.add(new StringField("Revision", "50", Store.YES));
    doc.add(new TextField("Message", "stuff", Store.YES));
    iw.addDocument(doc);
    facetFields.addFields(doc, categories);

    doc = new Document();
    facetFields = new FacetFields(tw);
    categories.clear();
    doc.add(new StringField("Author", "bar", Store.YES));
    categories.add(new CategoryPath("Author", "bar"));
    doc.add(new LongField("RevisionNumber", 5000L, Store.YES));
    doc.add(new StringField("Revision", "5000", Store.YES));
    doc.add(new TextField("Message", "stuff", Store.YES));
    iw.addDocument(doc);
    facetFields.addFields(doc, categories);

    tw.commit();
    tw.close();
    iw.commit();
    iw.close();
}

From source file:com.fuerve.villageelder.indexing.IndexManager.java

License:Apache License

/**
 * Gets the writers for the regular and taxonomy indices ready to go.
 * @throws IOException A fatal exception occurred while trying to
 * construct the index writers./*from w ww  . ja va  2 s. co m*/
 */
private void initializeWriters() throws IOException {
    if (luceneVersion == null || analyzer == null) {
        throw new IllegalArgumentException("The Lucene version and the index analyzer were unspecified "
                + "when attempting to create the index writers");
    }
    IndexWriterConfig iwc = new IndexWriterConfig(luceneVersion, analyzer);
    iwc.setOpenMode(openMode);

    indexWriter = new IndexWriter(indexDirectory, iwc);
    taxonomyWriter = new DirectoryTaxonomyWriter(taxonomyDirectory, openMode);
}

From source file:com.fuerve.villageelder.search.SearcherTest.java

License:Apache License

/**
 * Test method for {@link com.fuerve.villageelder.search.Searcher#initializeSearch()}.
 *//*from w  w  w .j a v a 2  s.c  om*/
@SuppressWarnings("unused")
@Test
public final void testInitializeSearch() throws Exception {
    // Gather declared fields.
    Field indexDirectoryField = Searcher.class.getDeclaredField("indexDirectory");
    Field taxonomyDirectoryField = Searcher.class.getDeclaredField("taxonomyDirectory");
    Field indexDirectoryNameField = Searcher.class.getDeclaredField("indexDirectoryName");
    Field taxonomyDirectoryNameField = Searcher.class.getDeclaredField("taxonomyDirectoryName");
    Field stringDirectoriesField = Searcher.class.getDeclaredField("stringDirectories");
    Field initializedField = Searcher.class.getDeclaredField("initialized");
    Field searchField = Searcher.class.getDeclaredField("search");
    Field indexReaderField = Searcher.class.getDeclaredField("indexReader");
    Field indexSearcherField = Searcher.class.getDeclaredField("indexSearcher");
    Field taxonomyReaderField = Searcher.class.getDeclaredField("taxonomyReader");

    indexDirectoryField.setAccessible(true);
    taxonomyDirectoryField.setAccessible(true);
    indexDirectoryNameField.setAccessible(true);
    taxonomyDirectoryNameField.setAccessible(true);
    stringDirectoriesField.setAccessible(true);
    initializedField.setAccessible(true);
    searchField.setAccessible(true);
    indexReaderField.setAccessible(true);
    indexSearcherField.setAccessible(true);
    taxonomyReaderField.setAccessible(true);

    // Setup
    Directory indexDirectoryExpected = new RAMDirectory();
    Directory taxonomyDirectoryExpected = new RAMDirectory();

    IndexWriterConfig iwc = new IndexWriterConfig(Lucene.LUCENE_VERSION, Lucene.getPerFieldAnalyzer());
    IndexWriter iw = new IndexWriter(indexDirectoryExpected, iwc);
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxonomyDirectoryExpected, OpenMode.CREATE);

    iw.commit();
    tw.commit();

    Searcher target = new Searcher(indexDirectoryExpected, taxonomyDirectoryExpected);
    target.initializeSearch();

    // Gather field values.
    Directory indexDirectoryActual = (Directory) indexDirectoryField.get(target);
    Directory taxonomyDirectoryActual = (Directory) taxonomyDirectoryField.get(target);
    String indexDirectoryNameActual = (String) indexDirectoryNameField.get(target);
    String taxonomyDirectoryNameActual = (String) taxonomyDirectoryNameField.get(target);
    boolean stringDirectoriesActual = stringDirectoriesField.getBoolean(target);
    boolean initializedActual = initializedField.getBoolean(target);
    Search searchFieldActual = (Search) searchField.get(target);
    IndexReader indexReaderActual = (IndexReader) indexReaderField.get(target);
    IndexSearcher indexSearcherActual = (IndexSearcher) indexSearcherField.get(target);
    TaxonomyReader taxonomyReaderActual = (TaxonomyReader) taxonomyReaderField.get(target);

    // Test
    assertEquals(true, initializedActual);
    assertNotNull(indexReaderActual);
    assertNotNull(indexSearcherActual);
    assertNotNull(taxonomyReaderActual);

    // Finish
    tw.close();
    iw.close();
}

From source file:com.fuerve.villageelder.search.SearchTest.java

License:Apache License

/**
 * Test method for {@link com.fuerve.villageelder.search.Search#getFacetsCollector(org.apache.lucene.index.DirectoryReader, org.apache.lucene.facet.taxonomy.TaxonomyReader)}.
 *//*  ww  w. j ava  2s  .c o  m*/
@Test
@SuppressWarnings({ "unchecked", "unused" })
public final void testGetFacetsCollector() throws Exception {
    // Constants
    Field defaultSortField = Search.class.getDeclaredField("DEFAULT_SORT");
    Field defaultFacetsField = Search.class.getDeclaredField("DEFAULT_FACETS");
    Field defaultFacetStringsField = Search.class.getDeclaredField("DEFAULT_FACET_STRINGS");
    Field defaultAnalyzerField = Search.class.getDeclaredField("DEFAULT_ANALYZER");
    Field defaultHitsField = Search.class.getDeclaredField("DEFAULT_HITS");

    defaultSortField.setAccessible(true);
    defaultFacetsField.setAccessible(true);
    defaultFacetStringsField.setAccessible(true);
    defaultAnalyzerField.setAccessible(true);
    defaultHitsField.setAccessible(true);

    final Sort defaultSort = (Sort) defaultSortField.get(null);
    final List<FacetRequest> defaultFacets = (List<FacetRequest>) defaultFacetsField.get(null);
    final Map<String, Integer> defaultFacetStrings = (Map<String, Integer>) defaultFacetStringsField.get(null);
    final Analyzer defaultAnalyzer = (Analyzer) defaultAnalyzerField.get(null);
    final int defaultHits = defaultHitsField.getInt(null);

    // Private members
    Field queryField = Search.class.getDeclaredField("query");
    Field sortField = Search.class.getDeclaredField("sort");
    Field facetsField = Search.class.getDeclaredField("facets");

    queryField.setAccessible(true);
    sortField.setAccessible(true);
    facetsField.setAccessible(true);

    // Test setup
    QueryParser parser = getQueryParser();
    Query queryExpected = parser.parse("test:foo");
    List<FacetRequest> facetsExpected = new ArrayList<FacetRequest>();
    Sort sortExpected = Sort.RELEVANCE;

    Search target = new Search(queryExpected, facetsExpected, sortExpected);
    target.addFacet("test", 100);

    // Gather fields
    Query queryActual = (Query) queryField.get(target);
    Sort sortActual = (Sort) sortField.get(target);
    List<FacetRequest> facetsActual = (List<FacetRequest>) facetsField.get(target);

    // Set up some dummy indices.
    Directory indexDirectory = new RAMDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(Lucene.LUCENE_VERSION, Lucene.getPerFieldAnalyzer());
    IndexWriter iw = new IndexWriter(indexDirectory, iwc);
    Directory taxonomyDirectory = new RAMDirectory();
    TaxonomyWriter tw = new DirectoryTaxonomyWriter(taxonomyDirectory, OpenMode.CREATE);

    iw.commit();
    tw.commit();

    // Test
    FacetsCollector actual = target.getFacetsCollector(DirectoryReader.open(indexDirectory),
            new DirectoryTaxonomyReader(taxonomyDirectory));

    assertEquals("DocsOnlyCollector", actual.getClass().getSimpleName());
    iw.close();
    tw.close();
    taxonomyDirectory.close();
}

From source file:com.khepry.frackhem.entities.Blendeds.java

License:Apache License

public void indexViaLucene(String textFilePath, String textColSeparator, String casEdfIdFieldName,
        Map<String, Toxicity> toxicities) throws IOException {

    String message;//from  w w w.j  a va 2s.co m

    message = "Start Indexing Blendeds via Lucene...";
    if (outputToSystemOut) {
        System.out.println(message);
    }
    if (outputToMsgQueue) {
        progressMessageQueue.send(new MessageInput(message));
    }

    File textFile = new File(textFilePath);
    if (textFile.exists()) {

        File indexFolder = new File(indexFolderPath);
        if (!indexFolder.exists()) {
            indexFolder.mkdir();
        } else {
            deleteFolder(indexFolder);
            if (!indexFolder.exists()) {
                indexFolder.mkdir();
            }
        }

        File taxonomyFolder = new File(taxonomyFolderPath);
        if (!taxonomyFolder.exists()) {
            taxonomyFolder.mkdir();
        } else {
            deleteFolder(taxonomyFolder);
            if (!taxonomyFolder.exists()) {
                taxonomyFolder.mkdir();
            }
        }

        if (indexFolder.exists() && taxonomyFolder.exists()) {

            List<String> colHeaders = new ArrayList<>();
            Map<String, Integer> colIndexes = new LinkedHashMap<>();
            Map<String, String> mapIndexFields = new LinkedHashMap<>();
            Map<String, String> mapStatsFields = new LinkedHashMap<>();

            String[] pieces;
            String[] tuples;

            pieces = indexFields.split(",");
            for (String indexField : pieces) {
                mapIndexFields.put(indexField, indexField);
            }

            pieces = statsFields.split(",");
            for (String statField : pieces) {
                tuples = statField.split(":");
                mapStatsFields.put(tuples[0], tuples.length > 1 ? tuples[1] : tuples[0]);
            }

            SimpleFSDirectory indexDirectory = new SimpleFSDirectory(indexFolder);
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_44, analyzer);
            IndexWriter indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);

            SimpleFSDirectory taxonomyDirectory = new SimpleFSDirectory(taxonomyFolder);
            TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxonomyDirectory, OpenMode.CREATE);
            FacetFields facetFields = new FacetFields(taxonomyWriter);

            List<CategoryPath> taxonomyCategories = new ArrayList<>();

            String line;
            Integer rcdCount = 0;
            StringBuilder sb = new StringBuilder();
            BufferedReader br = new BufferedReader(new FileReader(textFile));
            while ((line = br.readLine()) != null) {
                rcdCount++;
                pieces = line.split(textColSeparator);
                if (rcdCount == 1) {
                    int i = 0;
                    for (String colHeader : pieces) {
                        colHeaders.add(colHeader.trim());
                        colIndexes.put(colHeader, i);
                    }
                } else {
                    if (pieces.length == colHeaders.size()) {
                        sb.setLength(0);
                        Document document = new Document();
                        for (int i = 0; i < pieces.length; i++) {
                            Field field = new TextField(colHeaders.get(i), pieces[i].trim(), Store.YES);
                            document.add(field);
                            if (mapIndexFields.containsKey(colHeaders.get(i))) {
                                if (!pieces[i].trim().equals("")) {
                                    sb.append(pieces[i].trim());
                                    sb.append(" ");
                                }
                            }
                        }
                        // append toxicity information to the document
                        String toxCasEdfId = document.get(casEdfIdFieldName).trim();
                        Toxicity toxicity = new Toxicity();
                        if (toxicities.containsKey(toxCasEdfId)) {
                            toxicity = toxicities.get(toxCasEdfId);
                            document.add(new TextField("toxChemicalName", toxicity.getToxChemicalName().trim(),
                                    Store.YES));
                            sb.append(toxicity.getToxChemicalName().trim());
                            sb.append(" ");
                            document.add(new TextField("toxRecognized", toxicity.getToxRecognized().trim(),
                                    Store.YES));
                            sb.append(toxicity.getToxRecognized().trim());
                            sb.append(" ");
                            document.add(new TextField("toxSuspected", toxicity.getToxSuspected().trim(),
                                    Store.YES));
                            sb.append(toxicity.getToxSuspected().trim());
                            sb.append(" ");
                        } else {
                            document.add(new TextField("toxChemicalName", "", Store.YES));
                            document.add(new TextField("toxRecognized", "", Store.YES));
                            document.add(new TextField("toxSuspected", "", Store.YES));
                        }
                        Field field = new TextField("text", sb.toString().trim(), Store.NO);
                        document.add(field);

                        String toxChemical = toxicity.getToxChemicalName().trim();

                        // categorize recognized toxicities
                        String toxRecognized = toxicity.getToxRecognized().trim();
                        if (!toxRecognized.equals("")) {
                            taxonomyCategories.add(new CategoryPath("toxRecognized", "CasEdfId", toxCasEdfId));
                            taxonomyCategories.add(new CategoryPath("toxRecognized", "Chemical",
                                    toxChemical.replace("/", "|")));
                            for (String value : toxRecognized.replace(" ", ",").split(",")) {
                                if (!value.trim().equals("")) {
                                    taxonomyCategories
                                            .add(new CategoryPath("toxRecognized", "Toxicity", value));
                                }
                            }
                        }

                        // categorize suspected toxicities
                        String toxSuspected = toxicity.getToxSuspected().trim();
                        if (!toxSuspected.equals("")) {
                            taxonomyCategories.add(new CategoryPath("toxSuspected", "CasEdfId", toxCasEdfId));
                            taxonomyCategories.add(new CategoryPath("toxSuspected", "Chemical",
                                    toxChemical.replace("/", "|")));
                            for (String value : toxSuspected.replace(" ", ",").split(",")) {
                                if (!value.trim().equals("")) {
                                    taxonomyCategories.add(new CategoryPath("toxSuspected", "Toxicity", value));
                                }
                            }
                        }

                        // build up "stats" taxonomy categories
                        for (String statsKey : mapStatsFields.keySet()) {
                            if (mapIndexFields.containsKey(statsKey)) {
                                String fieldValue = mapIndexFields.get(statsKey);
                                if (!statsKey.trim().equals("") && !fieldValue.trim().equals("")) {
                                    taxonomyCategories.add(new CategoryPath("Blendeds", statsKey, fieldValue));
                                }
                            }
                        }

                        if (taxonomyCategories.size() > 0) {
                            facetFields.addFields(document, taxonomyCategories);
                            // System.out.println("Taxonomies added: " +
                            // taxonomyCategories.size());
                        }

                        indexWriter.addDocument(document);
                        if (progressInterval > 0 && rcdCount % progressInterval == 0) {
                            message = "Records indexed: " + rcdCount;
                            if (outputToSystemOut) {
                                System.out.println(message);
                            }
                            if (outputToMsgQueue) {
                                progressMessageQueue.send(new MessageInput(message));
                            }
                        }

                        taxonomyCategories.clear();
                    }
                }
            }
            br.close();
            message = "Records indexed: " + rcdCount;
            if (outputToSystemOut) {
                System.out.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }

            sb.setLength(0);
            sb.trimToSize();

            indexWriter.commit();
            indexWriter.forceMerge(1);
            indexWriter.close();

            taxonomyWriter.commit();
            taxonomyWriter.close();

            analyzer.close();

            indexDirectory.close();
            taxonomyDirectory.close();
        } else {
            message = "Lucene Index Folder: " + indexFolder + " or Lucene Taxonomy folder: " + taxonomyFolder
                    + " does not exist!";
            if (outputToSystemErr) {
                System.err.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }
        }
        message = "Ended Indexing Blendeds via Lucene!";
        if (outputToSystemOut) {
            System.out.println(message);
        }
        if (outputToMsgQueue) {
            progressMessageQueue.send(new MessageInput(message));
        }
    }
}

From source file:com.khepry.frackhem.entities.Chemicals.java

License:Apache License

public void indexViaLucene(String textFilePath, String textColSeparator, String casEdfIdFieldName,
        Map<String, Toxicity> toxicities) throws IOException {

    String message;/*  w ww  .j a  v  a 2s .c  o  m*/

    message = "Start Indexing Chemicals via Lucene...";
    if (outputToSystemOut) {
        System.out.println(message);
    }
    if (outputToMsgQueue) {
        progressMessageQueue.send(new MessageInput(message));
    }

    File textFile = new File(textFilePath);
    if (textFile.exists()) {

        File indexFolder = new File(indexFolderPath);
        if (!indexFolder.exists()) {
            indexFolder.mkdir();
        } else {
            deleteFolder(indexFolder);
            if (!indexFolder.exists()) {
                indexFolder.mkdir();
            }
        }

        File taxonomyFolder = new File(taxonomyFolderPath);
        if (!taxonomyFolder.exists()) {
            taxonomyFolder.mkdir();
        } else {
            deleteFolder(taxonomyFolder);
            if (!taxonomyFolder.exists()) {
                taxonomyFolder.mkdir();
            }
        }

        if (indexFolder.exists() && taxonomyFolder.exists()) {

            List<String> colHeaders = new ArrayList<>();
            Map<String, Integer> colIndexes = new LinkedHashMap<>();
            Map<String, String> mapIndexFields = new LinkedHashMap<>();
            Map<String, String> mapStatsFields = new LinkedHashMap<>();

            String[] pieces;
            String[] tuples;

            pieces = indexFields.split(",");
            for (String indexField : pieces) {
                mapIndexFields.put(indexField, indexField);
            }

            pieces = statsFields.split(",");
            for (String statField : pieces) {
                tuples = statField.split(":");
                mapStatsFields.put(tuples[0], tuples.length > 1 ? tuples[1] : tuples[0]);
            }

            SimpleFSDirectory indexDirectory = new SimpleFSDirectory(indexFolder);
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_44, analyzer);
            IndexWriter indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);

            SimpleFSDirectory taxonomyDirectory = new SimpleFSDirectory(taxonomyFolder);
            TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxonomyDirectory, OpenMode.CREATE);
            FacetFields facetFields = new FacetFields(taxonomyWriter);

            List<CategoryPath> taxonomyCategories = new ArrayList<>();

            String line;
            Integer rcdCount = 0;
            StringBuilder sb = new StringBuilder();
            BufferedReader br = new BufferedReader(new FileReader(textFile));
            while ((line = br.readLine()) != null) {
                rcdCount++;
                pieces = line.split(textColSeparator);
                if (rcdCount == 1) {
                    int i = 0;
                    for (String colHeader : pieces) {
                        colHeaders.add(colHeader.trim());
                        colIndexes.put(colHeader, i);
                    }
                } else {
                    if (pieces.length == colHeaders.size()) {
                        sb.setLength(0);
                        Document document = new Document();
                        for (int i = 0; i < pieces.length; i++) {
                            Field field = new TextField(colHeaders.get(i), pieces[i].trim(), Store.YES);
                            document.add(field);
                            if (mapIndexFields.containsKey(colHeaders.get(i))) {
                                if (!pieces[i].trim().equals("")) {
                                    sb.append(pieces[i].trim());
                                    sb.append(" ");
                                }
                            }
                        }
                        // append toxicity information to the document
                        String toxCasEdfId = document.get(casEdfIdFieldName).trim();
                        Toxicity toxicity = new Toxicity();
                        if (toxicities.containsKey(toxCasEdfId)) {
                            toxicity = toxicities.get(toxCasEdfId);
                            document.add(new TextField("toxChemicalName", toxicity.getToxChemicalName().trim(),
                                    Store.YES));
                            sb.append(toxicity.getToxChemicalName().trim());
                            sb.append(" ");
                            document.add(new TextField("toxRecognized", toxicity.getToxRecognized().trim(),
                                    Store.YES));
                            sb.append(toxicity.getToxRecognized().trim());
                            sb.append(" ");
                            document.add(new TextField("toxSuspected", toxicity.getToxSuspected().trim(),
                                    Store.YES));
                            sb.append(toxicity.getToxSuspected().trim());
                            sb.append(" ");
                        } else {
                            document.add(new TextField("toxChemicalName", "", Store.YES));
                            document.add(new TextField("toxRecognized", "", Store.YES));
                            document.add(new TextField("toxSuspected", "", Store.YES));
                        }
                        Field field = new TextField("text", sb.toString().trim(), Store.NO);
                        document.add(field);

                        String toxChemical = toxicity.getToxChemicalName().trim();

                        // categorize recognized toxicities
                        String toxRecognized = toxicity.getToxRecognized().trim();
                        if (!toxRecognized.equals("")) {
                            taxonomyCategories.add(new CategoryPath("toxRecognized", "CasEdfId", toxCasEdfId));
                            taxonomyCategories.add(new CategoryPath("toxRecognized", "Chemical",
                                    toxChemical.replace("/", "|")));
                            for (String value : toxRecognized.replace(" ", ",").split(",")) {
                                if (!value.trim().equals("")) {
                                    taxonomyCategories
                                            .add(new CategoryPath("toxRecognized", "Toxicity", value));
                                }
                            }
                        }

                        // categorize suspected toxicities
                        String toxSuspected = toxicity.getToxSuspected().trim();
                        if (!toxSuspected.equals("")) {
                            taxonomyCategories.add(new CategoryPath("toxSuspected", "CasEdfId", toxCasEdfId));
                            taxonomyCategories.add(new CategoryPath("toxSuspected", "Chemical",
                                    toxChemical.replace("/", "|")));
                            for (String value : toxSuspected.replace(" ", ",").split(",")) {
                                if (!value.trim().equals("")) {
                                    taxonomyCategories.add(new CategoryPath("toxSuspected", "Toxicity", value));
                                }
                            }
                        }

                        // build up "stats" taxonomy categories
                        for (String statsKey : mapStatsFields.keySet()) {
                            if (mapIndexFields.containsKey(statsKey)) {
                                String fieldValue = mapIndexFields.get(statsKey);
                                if (!statsKey.trim().equals("") && !fieldValue.trim().equals("")) {
                                    taxonomyCategories.add(new CategoryPath("Chemicals", statsKey, fieldValue));
                                }
                            }
                        }

                        if (taxonomyCategories.size() > 0) {
                            facetFields.addFields(document, taxonomyCategories);
                            // System.out.println("Taxonomies added: " +
                            // taxonomyCategories.size());
                        }

                        indexWriter.addDocument(document);
                        if (progressInterval > 0 && rcdCount % progressInterval == 0) {
                            message = "Records indexed: " + rcdCount;
                            if (outputToSystemOut) {
                                System.out.println(message);
                            }
                            if (outputToMsgQueue) {
                                progressMessageQueue.send(new MessageInput(message));
                            }
                        }

                        taxonomyCategories.clear();
                    }
                }
            }
            br.close();
            message = "Records indexed: " + rcdCount;
            if (outputToSystemOut) {
                System.out.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }

            sb.setLength(0);
            sb.trimToSize();

            indexWriter.commit();
            indexWriter.forceMerge(1);
            indexWriter.close();

            taxonomyWriter.commit();
            taxonomyWriter.close();

            analyzer.close();

            indexDirectory.close();
            taxonomyDirectory.close();
        } else {
            message = "Lucene Index Folder: " + indexFolder + " or Lucene Taxonomy folder: " + taxonomyFolder
                    + " does not exist!";
            if (outputToSystemErr) {
                System.err.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }
        }
        message = "Ended Indexing Chemicals via Lucene!";
        if (outputToSystemOut) {
            System.out.println(message);
        }
        if (outputToMsgQueue) {
            progressMessageQueue.send(new MessageInput(message));
        }
    }
}

From source file:com.khepry.frackhem.entities.Reports.java

License:Apache License

public void indexViaLucene(String textPath, String textColSeparator, Map<String, Toxicity> toxicities,
        String... parseFields) throws IOException {

    String message;//w w w .  j  a v  a  2s.  com

    message = "Start Indexing Reports via Lucene...";
    if (outputToSystemOut) {
        System.out.println(message);
    }
    if (outputToMsgQueue) {
        progressMessageQueue.send(new MessageInput(message));
    }

    File textFile = new File(textPath);
    if (textFile.exists()) {

        File indexFolder = new File(indexFolderPath);

        if (!indexFolder.exists()) {
            indexFolder.mkdir();
        }

        File taxonomyFolder = new File(taxonomyFolderPath);
        if (!taxonomyFolder.exists()) {
            taxonomyFolder.mkdir();
        }

        if (indexFolder.exists() && taxonomyFolder.exists()) {

            deleteFolder(indexFolder);
            if (!indexFolder.exists()) {
                indexFolder.mkdir();
            }

            deleteFolder(taxonomyFolder);
            if (!taxonomyFolder.exists()) {
                taxonomyFolder.mkdir();
            }

            Map<String, String> mapBreakFields = new LinkedHashMap<>();
            Map<String, String> mapIndexFields = new LinkedHashMap<>();
            Map<String, String> mapLevelFields = new LinkedHashMap<>();
            Map<String, String> mapStatsFields = new LinkedHashMap<>();
            Map<String, Integer> mapColIndexes = new LinkedHashMap<>();

            String[] pieces;
            String[] tuples;

            pieces = indexFields.split(",");
            for (String indexField : pieces) {
                mapIndexFields.put(indexField, "");
            }

            pieces = levelFields.split(",");
            for (String levelField : pieces) {
                mapBreakFields.put(levelField, "");
                mapLevelFields.put(levelField, "");
            }

            pieces = statsFields.split(",");
            for (String statField : pieces) {
                tuples = statField.split(":");
                mapStatsFields.put(tuples[0], tuples.length > 1 ? tuples[1] : tuples[0]);
            }

            Map<String, Map<String, String>> mapToxValues = new LinkedHashMap<>();
            for (String parseField : parseFields) {
                mapToxValues.put(parseField, new TreeMap<String, String>());
            }

            SimpleFSDirectory indexDirectory = new SimpleFSDirectory(indexFolder);
            SimpleFSDirectory taxonomyDirectory = new SimpleFSDirectory(taxonomyFolder);
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_44, analyzer);
            IndexWriter indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);
            TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxonomyDirectory, OpenMode.CREATE);
            FacetFields facetFields = new FacetFields(taxonomyWriter);

            List<CategoryPath> taxonomyCategories = new ArrayList<>();

            String line;

            StringBuilder sbIndex = new StringBuilder();
            StringBuilder sbLevel = new StringBuilder();

            Integer outCount = 0;
            Integer rcdCount = 0;

            Boolean firstDataRecordHandled = false;

            BufferedReader br = new BufferedReader(new FileReader(textFile));
            while ((line = br.readLine()) != null) {
                rcdCount++;
                pieces = line.split(textColSeparator);
                if (rcdCount == 1) {
                    int i = 0;
                    for (String colHeader : pieces) {
                        mapColIndexes.put(colHeader.trim(), i);
                        i++;
                    }
                } else {
                    for (String key : mapLevelFields.keySet()) {
                        if (mapColIndexes.containsKey(key)) {
                            String value = pieces[mapColIndexes.get(key)].trim();
                            // build up level-break values
                            if (mapLevelFields.containsKey(key)) {
                                mapLevelFields.put(key, value);
                            }
                        }
                    }
                    if (!firstDataRecordHandled) {
                        mapBreakFields.putAll(mapLevelFields);
                        firstDataRecordHandled = true;
                    }
                    // if there is a "level break"
                    if (!mapLevelFields.equals(mapBreakFields)) {
                        Document tgtDocument = new Document();
                        for (Map.Entry<String, String> entry : mapBreakFields.entrySet()) {
                            Field field = new TextField(entry.getKey(), entry.getValue(), Store.YES);
                            tgtDocument.add(field);
                        }
                        for (Map.Entry<String, Map<String, String>> toxEntry : mapToxValues.entrySet()) {
                            String fieldName = toxEntry.getKey();
                            String fieldValue = GenericUtilities.joinString(toxEntry.getValue().values(), " ");
                            // System.out.println(fieldName + ": " + fieldValue);
                            sbIndex.append(fieldValue);
                            sbIndex.append(" ");
                            tgtDocument.add(new TextField(fieldName, fieldValue, Store.YES));
                            // build up "Toxicity" taxonomy categories
                            for (String value : fieldValue.replace(" ", ",").split(",")) {
                                if (!value.trim().equals("")) {
                                    taxonomyCategories.add(new CategoryPath(fieldName, "Toxicity", value));
                                }
                            }
                            // build up "stats" taxonomy categories
                            for (String statsKey : mapStatsFields.keySet()) {
                                if (mapLevelFields.containsKey(statsKey)) {
                                    String levelValue = mapLevelFields.get(statsKey);
                                    if (!statsKey.trim().equals("") && !levelValue.trim().equals("")) {
                                        taxonomyCategories
                                                .add(new CategoryPath("Reports", statsKey, levelValue));
                                    }
                                }
                            }
                        }
                        tgtDocument.add(new TextField("text", sbIndex.toString().trim(), Store.NO));
                        if (taxonomyCategories.size() > 0) {
                            facetFields.addFields(tgtDocument, taxonomyCategories);
                            // System.out.println("Taxonomies added: " +
                            // taxonomyCategories.size());
                        }
                        indexWriter.addDocument(tgtDocument);
                        outCount++;
                        sbIndex.setLength(0);
                        for (String key : mapToxValues.keySet()) {
                            mapToxValues.get(key).clear();
                        }
                        taxonomyCategories.clear();
                        mapBreakFields.putAll(mapLevelFields);
                    }
                    // build up text index values
                    for (String key : mapLevelFields.keySet()) {
                        if (mapColIndexes.containsKey(key)) {
                            String value = pieces[mapColIndexes.get(key)].trim();
                            if (!value.equals("")) {
                                // build up 'text' field index value
                                if (mapIndexFields.containsKey(key)) {
                                    sbIndex.append(value);
                                    sbIndex.append(" ");
                                }
                            }
                        }
                    }
                    // build up toxicity values for later level-break use
                    if (mapColIndexes.containsKey(casEdfIdFieldName)) {
                        Toxicity toxicity = toxicities.get(pieces[mapColIndexes.get(casEdfIdFieldName)].trim());
                        if (toxicity != null) {
                            // build up recognized toxicity values
                            String[] toxRValues = toxicity.getToxRecognized().split(",");
                            for (String toxValue : toxRValues) {
                                if (!toxValue.equals("")) {
                                    if (!mapToxValues.get("toxRecognized").containsKey(toxValue)) {
                                        mapToxValues.get("toxRecognized").put(toxValue, toxValue);
                                    }
                                }
                            }
                            // build up suspected toxicity values
                            String[] toxSValues = toxicity.getToxSuspected().split(",");
                            for (String toxValue : toxSValues) {
                                if (!toxValue.equals("")) {
                                    if (!mapToxValues.get("toxSuspected").containsKey(toxValue)) {
                                        mapToxValues.get("toxSuspected").put(toxValue, toxValue);
                                    }
                                }
                            }
                        }
                    }
                    if (progressInterval > 0 && rcdCount % progressInterval == 0) {
                        message = "Records indexed: " + rcdCount;
                        if (outputToSystemOut) {
                            System.out.println(message);
                        }
                        if (outputToMsgQueue) {
                            progressMessageQueue.send(new MessageInput(message));
                        }
                    }
                }
            }
            br.close();
            // handle end-of-file processing
            Document tgtDocument = new Document();
            for (Map.Entry<String, String> entry : mapBreakFields.entrySet()) {
                Field field = new TextField(entry.getKey(), entry.getValue(), Store.YES);
                tgtDocument.add(field);
            }
            for (Map.Entry<String, Map<String, String>> toxEntry : mapToxValues.entrySet()) {
                String fieldName = toxEntry.getKey();
                String fieldValue = GenericUtilities.joinString(toxEntry.getValue().values(), " ");
                // System.out.println(fieldName + ": " + fieldValue);
                sbIndex.append(fieldValue);
                sbIndex.append(" ");
                tgtDocument.add(new TextField(fieldName, fieldValue, Store.YES));
                // build up "Toxicity" taxonomy categories
                for (String value : fieldValue.replace(" ", ",").split(",")) {
                    if (!value.trim().equals("")) {
                        taxonomyCategories.add(new CategoryPath(fieldName, "Toxicity", value));
                    }
                }
                // build up "stats" taxonomy categories
                for (String statsKey : mapStatsFields.keySet()) {
                    if (mapLevelFields.containsKey(statsKey)) {
                        String levelValue = mapLevelFields.get(statsKey);
                        if (!statsKey.trim().equals("") && !levelValue.trim().equals("")) {
                            taxonomyCategories.add(new CategoryPath("Reports", statsKey, levelValue));
                        }
                    }
                }
            }
            tgtDocument.add(new TextField("text", sbIndex.toString().trim(), Store.NO));
            if (taxonomyCategories.size() > 0) {
                facetFields.addFields(tgtDocument, taxonomyCategories);
                // System.out.println("Taxonomies added: " +
                // taxonomyCategories.size());
            }
            indexWriter.addDocument(tgtDocument);
            outCount++;
            message = "Records processed: " + rcdCount;
            if (outputToSystemOut) {
                System.out.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }
            message = "Records indexed: " + outCount;
            if (outputToSystemOut) {
                System.out.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }

            sbIndex.setLength(0);
            sbIndex.trimToSize();

            sbLevel.setLength(0);
            sbLevel.trimToSize();

            mapToxValues.clear();

            indexWriter.commit();
            indexWriter.forceMerge(1);
            indexWriter.close();

            analyzer.close();
            indexDirectory.close();

            taxonomyWriter.commit();
            taxonomyWriter.close();
            taxonomyDirectory.close();
        } else {
            message = "Lucene Index Folder: " + indexFolder + " or Lucene Taxonomy folder: " + taxonomyFolder
                    + " does not exist!";
            if (outputToSystemErr) {
                System.err.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }
        }
        message = "Ended Indexing Reports via Lucene!";
        if (outputToSystemOut) {
            System.out.println(message);
        }
        if (outputToMsgQueue) {
            progressMessageQueue.send(new MessageInput(message));
        }
    }
}

From source file:com.khepry.frackhem.entities.Toxicities.java

License:Apache License

public void indexViaLucene(String textFilePath, String textColSeparator) throws IOException {

    String message;//from ww w.  j av a  2  s  .c  om

    message = "Start Indexing Toxicities via Lucene...";
    if (outputToSystemOut) {
        System.out.println(message);
    }
    if (outputToMsgQueue) {
        progressMessageQueue.send(new MessageInput(message));
    }

    File textFile = new File(textFilePath);
    if (textFile.exists()) {

        File indexFolder = new File(indexFolderPath);
        if (!indexFolder.exists()) {
            indexFolder.mkdir();
        } else {
            deleteFolder(indexFolder);
            if (!indexFolder.exists()) {
                indexFolder.mkdir();
            }
        }

        File taxonomyFolder = new File(taxonomyFolderPath);
        if (!taxonomyFolder.exists()) {
            taxonomyFolder.mkdir();
        } else {
            deleteFolder(taxonomyFolder);
            if (!taxonomyFolder.exists()) {
                taxonomyFolder.mkdir();
            }
        }

        if (indexFolder.exists() && taxonomyFolder.exists()) {

            List<String> colHeaders = new ArrayList<>();
            Map<String, String> mapIndexFields = new LinkedHashMap<>();
            Map<String, String> mapStatsFields = new LinkedHashMap<>();

            String[] pieces;
            String[] tuples;

            pieces = indexFields.split(",");
            for (String indexField : pieces) {
                mapIndexFields.put(indexField, indexField);
            }

            pieces = statsFields.split(",");
            for (String statField : pieces) {
                tuples = statField.split(":");
                mapStatsFields.put(tuples[0], tuples.length > 1 ? tuples[1] : tuples[0]);
            }

            SimpleFSDirectory indexDirectory = new SimpleFSDirectory(indexFolder);
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_44, analyzer);
            IndexWriter indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);

            SimpleFSDirectory taxonomyDirectory = new SimpleFSDirectory(taxonomyFolder);
            TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxonomyDirectory, OpenMode.CREATE);
            FacetFields facetFields = new FacetFields(taxonomyWriter);

            List<CategoryPath> taxonomyCategories = new ArrayList<>();

            String line;
            Integer rcdCount = 0;
            StringBuilder sb = new StringBuilder();
            BufferedReader br = new BufferedReader(new FileReader(textFile));
            while ((line = br.readLine()) != null) {
                rcdCount++;
                pieces = line.split(textColSeparator);
                if (rcdCount == 1) {
                    for (String colHeader : pieces) {
                        colHeaders.add(colHeader.trim());
                    }
                } else {
                    if (pieces.length == colHeaders.size()) {
                        sb.setLength(0);
                        Document document = new Document();
                        for (int i = 0; i < pieces.length; i++) {
                            Field field = new TextField(colHeaders.get(i), pieces[i].trim(), Store.YES);
                            document.add(field);
                            if (mapIndexFields.containsKey(colHeaders.get(i))) {
                                if (!pieces[i].trim().equals("")) {
                                    sb.append(pieces[i].trim());
                                    sb.append(" ");
                                }
                            }
                        }
                        Field field = new TextField("text", sb.toString().trim(), Store.NO);
                        document.add(field);

                        String toxCasEdfId = pieces[0].trim();
                        String toxChemical = pieces[1].trim();

                        // categorize recognized toxicities
                        String toxRecognized = pieces[2].trim();
                        if (!toxRecognized.equals("")) {
                            taxonomyCategories.add(new CategoryPath("toxRecognized", "CasEdfId", toxCasEdfId));
                            taxonomyCategories.add(new CategoryPath("toxRecognized", "Chemical",
                                    toxChemical.replace("/", "|")));
                            for (String value : toxRecognized.replace(" ", ",").split(",")) {
                                if (!value.trim().equals("")) {
                                    taxonomyCategories
                                            .add(new CategoryPath("toxRecognized", "Toxicity", value));
                                }
                            }
                        }

                        // categorize suspected toxicities
                        String toxSuspected = pieces[3].trim();
                        if (!toxSuspected.equals("")) {
                            taxonomyCategories.add(new CategoryPath("toxSuspected", "CasEdfId", toxCasEdfId));
                            taxonomyCategories.add(new CategoryPath("toxSuspected", "Chemical",
                                    toxChemical.replace("/", "|")));
                            for (String value : toxSuspected.replace(" ", ",").split(",")) {
                                if (!value.trim().equals("")) {
                                    taxonomyCategories.add(new CategoryPath("toxSuspected", "Toxicity", value));
                                }
                            }
                        }

                        // build up "stats" taxonomy categories
                        for (String statsKey : mapStatsFields.keySet()) {
                            if (mapIndexFields.containsKey(statsKey)) {
                                String fieldValue = mapIndexFields.get(statsKey);
                                if (!statsKey.trim().equals("") && !fieldValue.trim().equals("")) {
                                    taxonomyCategories
                                            .add(new CategoryPath("Toxicities", statsKey, fieldValue));
                                }
                            }
                        }

                        if (taxonomyCategories.size() > 0) {
                            facetFields.addFields(document, taxonomyCategories);
                            // System.out.println("Taxonomies added: " +
                            // taxonomyCategories.size());
                        }

                        indexWriter.addDocument(document);
                        if (progressInterval > 0 && rcdCount % progressInterval == 0) {
                            message = "Records indexed: " + rcdCount;
                            if (outputToSystemOut) {
                                System.out.println(message);
                            }
                            if (outputToMsgQueue) {
                                progressMessageQueue.send(new MessageInput(message));
                            }
                        }

                        taxonomyCategories.clear();
                    }
                }
            }
            br.close();
            message = "Records indexed: " + rcdCount;
            if (outputToSystemOut) {
                System.out.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }

            sb.setLength(0);
            sb.trimToSize();

            indexWriter.commit();
            indexWriter.forceMerge(1);
            indexWriter.close();

            taxonomyWriter.commit();
            taxonomyWriter.close();

            analyzer.close();

            indexDirectory.close();
            taxonomyDirectory.close();
        } else {
            message = "Lucene Index Folder: " + indexFolder + " or Lucene Taxonomy folder: " + taxonomyFolder
                    + " does not exist!";
            if (outputToSystemErr) {
                System.err.println(message);
            }
        }
        message = "Ended Indexing Toxicities via Lucene!";
        if (outputToSystemOut) {
            System.out.println(message);
        }
        if (outputToMsgQueue) {
            progressMessageQueue.send(new MessageInput(message));
        }
    }
}

From source file:com.orientechnologies.lucene.engine.OLuceneFacetManager.java

License:Apache License

protected void buildFacetIndexIfNeeded() throws IOException {

    if (metadata != null && metadata.containsField(FACET_FIELDS)) {
        ODatabaseDocumentInternal database = owner.getDatabase();
        Iterable<String> iterable = metadata.field(FACET_FIELDS);
        if (iterable != null) {
            Directory dir = getTaxDirectory(database);
            taxonomyWriter = new DirectoryTaxonomyWriter(dir, IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
            for (String s : iterable) {
                facetField = s;/*  w  ww . j a va 2s.  com*/
                // facetField = "facet_" + s;
                // facetDim = s;
                // config.setIndexFieldName(s, "facet_" + s);
                config.setHierarchical(s, true);
            }
        }

    }
}