Example usage for org.apache.lucene.index IndexWriter commit

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter commit.

Prototype

@Override
public final long commit() throws IOException

Source Link

Document

Commits all pending changes (added and deleted documents, segment merges, added indexes, etc.) to the index, and syncs all referenced index files, such that a reader will see the changes and the index updates will survive an OS or machine crash or power loss.

Usage

From source file:com.khepry.frackhem.entities.Blendeds.java

License:Apache License

public void indexViaLucene(String textFilePath, String textColSeparator, String casEdfIdFieldName,
        Map<String, Toxicity> toxicities) throws IOException {

    String message;//  w w w  . java2 s .co  m

    message = "Start Indexing Blendeds via Lucene...";
    if (outputToSystemOut) {
        System.out.println(message);
    }
    if (outputToMsgQueue) {
        progressMessageQueue.send(new MessageInput(message));
    }

    File textFile = new File(textFilePath);
    if (textFile.exists()) {

        File indexFolder = new File(indexFolderPath);
        if (!indexFolder.exists()) {
            indexFolder.mkdir();
        } else {
            deleteFolder(indexFolder);
            if (!indexFolder.exists()) {
                indexFolder.mkdir();
            }
        }

        File taxonomyFolder = new File(taxonomyFolderPath);
        if (!taxonomyFolder.exists()) {
            taxonomyFolder.mkdir();
        } else {
            deleteFolder(taxonomyFolder);
            if (!taxonomyFolder.exists()) {
                taxonomyFolder.mkdir();
            }
        }

        if (indexFolder.exists() && taxonomyFolder.exists()) {

            List<String> colHeaders = new ArrayList<>();
            Map<String, Integer> colIndexes = new LinkedHashMap<>();
            Map<String, String> mapIndexFields = new LinkedHashMap<>();
            Map<String, String> mapStatsFields = new LinkedHashMap<>();

            String[] pieces;
            String[] tuples;

            pieces = indexFields.split(",");
            for (String indexField : pieces) {
                mapIndexFields.put(indexField, indexField);
            }

            pieces = statsFields.split(",");
            for (String statField : pieces) {
                tuples = statField.split(":");
                mapStatsFields.put(tuples[0], tuples.length > 1 ? tuples[1] : tuples[0]);
            }

            SimpleFSDirectory indexDirectory = new SimpleFSDirectory(indexFolder);
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_44, analyzer);
            IndexWriter indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);

            SimpleFSDirectory taxonomyDirectory = new SimpleFSDirectory(taxonomyFolder);
            TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxonomyDirectory, OpenMode.CREATE);
            FacetFields facetFields = new FacetFields(taxonomyWriter);

            List<CategoryPath> taxonomyCategories = new ArrayList<>();

            String line;
            Integer rcdCount = 0;
            StringBuilder sb = new StringBuilder();
            BufferedReader br = new BufferedReader(new FileReader(textFile));
            while ((line = br.readLine()) != null) {
                rcdCount++;
                pieces = line.split(textColSeparator);
                if (rcdCount == 1) {
                    int i = 0;
                    for (String colHeader : pieces) {
                        colHeaders.add(colHeader.trim());
                        colIndexes.put(colHeader, i);
                    }
                } else {
                    if (pieces.length == colHeaders.size()) {
                        sb.setLength(0);
                        Document document = new Document();
                        for (int i = 0; i < pieces.length; i++) {
                            Field field = new TextField(colHeaders.get(i), pieces[i].trim(), Store.YES);
                            document.add(field);
                            if (mapIndexFields.containsKey(colHeaders.get(i))) {
                                if (!pieces[i].trim().equals("")) {
                                    sb.append(pieces[i].trim());
                                    sb.append(" ");
                                }
                            }
                        }
                        // append toxicity information to the document
                        String toxCasEdfId = document.get(casEdfIdFieldName).trim();
                        Toxicity toxicity = new Toxicity();
                        if (toxicities.containsKey(toxCasEdfId)) {
                            toxicity = toxicities.get(toxCasEdfId);
                            document.add(new TextField("toxChemicalName", toxicity.getToxChemicalName().trim(),
                                    Store.YES));
                            sb.append(toxicity.getToxChemicalName().trim());
                            sb.append(" ");
                            document.add(new TextField("toxRecognized", toxicity.getToxRecognized().trim(),
                                    Store.YES));
                            sb.append(toxicity.getToxRecognized().trim());
                            sb.append(" ");
                            document.add(new TextField("toxSuspected", toxicity.getToxSuspected().trim(),
                                    Store.YES));
                            sb.append(toxicity.getToxSuspected().trim());
                            sb.append(" ");
                        } else {
                            document.add(new TextField("toxChemicalName", "", Store.YES));
                            document.add(new TextField("toxRecognized", "", Store.YES));
                            document.add(new TextField("toxSuspected", "", Store.YES));
                        }
                        Field field = new TextField("text", sb.toString().trim(), Store.NO);
                        document.add(field);

                        String toxChemical = toxicity.getToxChemicalName().trim();

                        // categorize recognized toxicities
                        String toxRecognized = toxicity.getToxRecognized().trim();
                        if (!toxRecognized.equals("")) {
                            taxonomyCategories.add(new CategoryPath("toxRecognized", "CasEdfId", toxCasEdfId));
                            taxonomyCategories.add(new CategoryPath("toxRecognized", "Chemical",
                                    toxChemical.replace("/", "|")));
                            for (String value : toxRecognized.replace(" ", ",").split(",")) {
                                if (!value.trim().equals("")) {
                                    taxonomyCategories
                                            .add(new CategoryPath("toxRecognized", "Toxicity", value));
                                }
                            }
                        }

                        // categorize suspected toxicities
                        String toxSuspected = toxicity.getToxSuspected().trim();
                        if (!toxSuspected.equals("")) {
                            taxonomyCategories.add(new CategoryPath("toxSuspected", "CasEdfId", toxCasEdfId));
                            taxonomyCategories.add(new CategoryPath("toxSuspected", "Chemical",
                                    toxChemical.replace("/", "|")));
                            for (String value : toxSuspected.replace(" ", ",").split(",")) {
                                if (!value.trim().equals("")) {
                                    taxonomyCategories.add(new CategoryPath("toxSuspected", "Toxicity", value));
                                }
                            }
                        }

                        // build up "stats" taxonomy categories
                        for (String statsKey : mapStatsFields.keySet()) {
                            if (mapIndexFields.containsKey(statsKey)) {
                                String fieldValue = mapIndexFields.get(statsKey);
                                if (!statsKey.trim().equals("") && !fieldValue.trim().equals("")) {
                                    taxonomyCategories.add(new CategoryPath("Blendeds", statsKey, fieldValue));
                                }
                            }
                        }

                        if (taxonomyCategories.size() > 0) {
                            facetFields.addFields(document, taxonomyCategories);
                            // System.out.println("Taxonomies added: " +
                            // taxonomyCategories.size());
                        }

                        indexWriter.addDocument(document);
                        if (progressInterval > 0 && rcdCount % progressInterval == 0) {
                            message = "Records indexed: " + rcdCount;
                            if (outputToSystemOut) {
                                System.out.println(message);
                            }
                            if (outputToMsgQueue) {
                                progressMessageQueue.send(new MessageInput(message));
                            }
                        }

                        taxonomyCategories.clear();
                    }
                }
            }
            br.close();
            message = "Records indexed: " + rcdCount;
            if (outputToSystemOut) {
                System.out.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }

            sb.setLength(0);
            sb.trimToSize();

            indexWriter.commit();
            indexWriter.forceMerge(1);
            indexWriter.close();

            taxonomyWriter.commit();
            taxonomyWriter.close();

            analyzer.close();

            indexDirectory.close();
            taxonomyDirectory.close();
        } else {
            message = "Lucene Index Folder: " + indexFolder + " or Lucene Taxonomy folder: " + taxonomyFolder
                    + " does not exist!";
            if (outputToSystemErr) {
                System.err.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }
        }
        message = "Ended Indexing Blendeds via Lucene!";
        if (outputToSystemOut) {
            System.out.println(message);
        }
        if (outputToMsgQueue) {
            progressMessageQueue.send(new MessageInput(message));
        }
    }
}

From source file:com.khepry.frackhem.entities.Chemicals.java

License:Apache License

public void indexViaLucene(String textFilePath, String textColSeparator, String casEdfIdFieldName,
        Map<String, Toxicity> toxicities) throws IOException {

    String message;/* w w w. j  a  va2s .c o  m*/

    message = "Start Indexing Chemicals via Lucene...";
    if (outputToSystemOut) {
        System.out.println(message);
    }
    if (outputToMsgQueue) {
        progressMessageQueue.send(new MessageInput(message));
    }

    File textFile = new File(textFilePath);
    if (textFile.exists()) {

        File indexFolder = new File(indexFolderPath);
        if (!indexFolder.exists()) {
            indexFolder.mkdir();
        } else {
            deleteFolder(indexFolder);
            if (!indexFolder.exists()) {
                indexFolder.mkdir();
            }
        }

        File taxonomyFolder = new File(taxonomyFolderPath);
        if (!taxonomyFolder.exists()) {
            taxonomyFolder.mkdir();
        } else {
            deleteFolder(taxonomyFolder);
            if (!taxonomyFolder.exists()) {
                taxonomyFolder.mkdir();
            }
        }

        if (indexFolder.exists() && taxonomyFolder.exists()) {

            List<String> colHeaders = new ArrayList<>();
            Map<String, Integer> colIndexes = new LinkedHashMap<>();
            Map<String, String> mapIndexFields = new LinkedHashMap<>();
            Map<String, String> mapStatsFields = new LinkedHashMap<>();

            String[] pieces;
            String[] tuples;

            pieces = indexFields.split(",");
            for (String indexField : pieces) {
                mapIndexFields.put(indexField, indexField);
            }

            pieces = statsFields.split(",");
            for (String statField : pieces) {
                tuples = statField.split(":");
                mapStatsFields.put(tuples[0], tuples.length > 1 ? tuples[1] : tuples[0]);
            }

            SimpleFSDirectory indexDirectory = new SimpleFSDirectory(indexFolder);
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_44, analyzer);
            IndexWriter indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);

            SimpleFSDirectory taxonomyDirectory = new SimpleFSDirectory(taxonomyFolder);
            TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxonomyDirectory, OpenMode.CREATE);
            FacetFields facetFields = new FacetFields(taxonomyWriter);

            List<CategoryPath> taxonomyCategories = new ArrayList<>();

            String line;
            Integer rcdCount = 0;
            StringBuilder sb = new StringBuilder();
            BufferedReader br = new BufferedReader(new FileReader(textFile));
            while ((line = br.readLine()) != null) {
                rcdCount++;
                pieces = line.split(textColSeparator);
                if (rcdCount == 1) {
                    int i = 0;
                    for (String colHeader : pieces) {
                        colHeaders.add(colHeader.trim());
                        colIndexes.put(colHeader, i);
                    }
                } else {
                    if (pieces.length == colHeaders.size()) {
                        sb.setLength(0);
                        Document document = new Document();
                        for (int i = 0; i < pieces.length; i++) {
                            Field field = new TextField(colHeaders.get(i), pieces[i].trim(), Store.YES);
                            document.add(field);
                            if (mapIndexFields.containsKey(colHeaders.get(i))) {
                                if (!pieces[i].trim().equals("")) {
                                    sb.append(pieces[i].trim());
                                    sb.append(" ");
                                }
                            }
                        }
                        // append toxicity information to the document
                        String toxCasEdfId = document.get(casEdfIdFieldName).trim();
                        Toxicity toxicity = new Toxicity();
                        if (toxicities.containsKey(toxCasEdfId)) {
                            toxicity = toxicities.get(toxCasEdfId);
                            document.add(new TextField("toxChemicalName", toxicity.getToxChemicalName().trim(),
                                    Store.YES));
                            sb.append(toxicity.getToxChemicalName().trim());
                            sb.append(" ");
                            document.add(new TextField("toxRecognized", toxicity.getToxRecognized().trim(),
                                    Store.YES));
                            sb.append(toxicity.getToxRecognized().trim());
                            sb.append(" ");
                            document.add(new TextField("toxSuspected", toxicity.getToxSuspected().trim(),
                                    Store.YES));
                            sb.append(toxicity.getToxSuspected().trim());
                            sb.append(" ");
                        } else {
                            document.add(new TextField("toxChemicalName", "", Store.YES));
                            document.add(new TextField("toxRecognized", "", Store.YES));
                            document.add(new TextField("toxSuspected", "", Store.YES));
                        }
                        Field field = new TextField("text", sb.toString().trim(), Store.NO);
                        document.add(field);

                        String toxChemical = toxicity.getToxChemicalName().trim();

                        // categorize recognized toxicities
                        String toxRecognized = toxicity.getToxRecognized().trim();
                        if (!toxRecognized.equals("")) {
                            taxonomyCategories.add(new CategoryPath("toxRecognized", "CasEdfId", toxCasEdfId));
                            taxonomyCategories.add(new CategoryPath("toxRecognized", "Chemical",
                                    toxChemical.replace("/", "|")));
                            for (String value : toxRecognized.replace(" ", ",").split(",")) {
                                if (!value.trim().equals("")) {
                                    taxonomyCategories
                                            .add(new CategoryPath("toxRecognized", "Toxicity", value));
                                }
                            }
                        }

                        // categorize suspected toxicities
                        String toxSuspected = toxicity.getToxSuspected().trim();
                        if (!toxSuspected.equals("")) {
                            taxonomyCategories.add(new CategoryPath("toxSuspected", "CasEdfId", toxCasEdfId));
                            taxonomyCategories.add(new CategoryPath("toxSuspected", "Chemical",
                                    toxChemical.replace("/", "|")));
                            for (String value : toxSuspected.replace(" ", ",").split(",")) {
                                if (!value.trim().equals("")) {
                                    taxonomyCategories.add(new CategoryPath("toxSuspected", "Toxicity", value));
                                }
                            }
                        }

                        // build up "stats" taxonomy categories
                        for (String statsKey : mapStatsFields.keySet()) {
                            if (mapIndexFields.containsKey(statsKey)) {
                                String fieldValue = mapIndexFields.get(statsKey);
                                if (!statsKey.trim().equals("") && !fieldValue.trim().equals("")) {
                                    taxonomyCategories.add(new CategoryPath("Chemicals", statsKey, fieldValue));
                                }
                            }
                        }

                        if (taxonomyCategories.size() > 0) {
                            facetFields.addFields(document, taxonomyCategories);
                            // System.out.println("Taxonomies added: " +
                            // taxonomyCategories.size());
                        }

                        indexWriter.addDocument(document);
                        if (progressInterval > 0 && rcdCount % progressInterval == 0) {
                            message = "Records indexed: " + rcdCount;
                            if (outputToSystemOut) {
                                System.out.println(message);
                            }
                            if (outputToMsgQueue) {
                                progressMessageQueue.send(new MessageInput(message));
                            }
                        }

                        taxonomyCategories.clear();
                    }
                }
            }
            br.close();
            message = "Records indexed: " + rcdCount;
            if (outputToSystemOut) {
                System.out.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }

            sb.setLength(0);
            sb.trimToSize();

            indexWriter.commit();
            indexWriter.forceMerge(1);
            indexWriter.close();

            taxonomyWriter.commit();
            taxonomyWriter.close();

            analyzer.close();

            indexDirectory.close();
            taxonomyDirectory.close();
        } else {
            message = "Lucene Index Folder: " + indexFolder + " or Lucene Taxonomy folder: " + taxonomyFolder
                    + " does not exist!";
            if (outputToSystemErr) {
                System.err.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }
        }
        message = "Ended Indexing Chemicals via Lucene!";
        if (outputToSystemOut) {
            System.out.println(message);
        }
        if (outputToMsgQueue) {
            progressMessageQueue.send(new MessageInput(message));
        }
    }
}

From source file:com.khepry.frackhem.entities.Reports.java

License:Apache License

public void indexViaLucene(String textPath, String textColSeparator, Map<String, Toxicity> toxicities,
        String... parseFields) throws IOException {

    String message;/*from   w  ww .ja  va2  s .c  o  m*/

    message = "Start Indexing Reports via Lucene...";
    if (outputToSystemOut) {
        System.out.println(message);
    }
    if (outputToMsgQueue) {
        progressMessageQueue.send(new MessageInput(message));
    }

    File textFile = new File(textPath);
    if (textFile.exists()) {

        File indexFolder = new File(indexFolderPath);

        if (!indexFolder.exists()) {
            indexFolder.mkdir();
        }

        File taxonomyFolder = new File(taxonomyFolderPath);
        if (!taxonomyFolder.exists()) {
            taxonomyFolder.mkdir();
        }

        if (indexFolder.exists() && taxonomyFolder.exists()) {

            deleteFolder(indexFolder);
            if (!indexFolder.exists()) {
                indexFolder.mkdir();
            }

            deleteFolder(taxonomyFolder);
            if (!taxonomyFolder.exists()) {
                taxonomyFolder.mkdir();
            }

            Map<String, String> mapBreakFields = new LinkedHashMap<>();
            Map<String, String> mapIndexFields = new LinkedHashMap<>();
            Map<String, String> mapLevelFields = new LinkedHashMap<>();
            Map<String, String> mapStatsFields = new LinkedHashMap<>();
            Map<String, Integer> mapColIndexes = new LinkedHashMap<>();

            String[] pieces;
            String[] tuples;

            pieces = indexFields.split(",");
            for (String indexField : pieces) {
                mapIndexFields.put(indexField, "");
            }

            pieces = levelFields.split(",");
            for (String levelField : pieces) {
                mapBreakFields.put(levelField, "");
                mapLevelFields.put(levelField, "");
            }

            pieces = statsFields.split(",");
            for (String statField : pieces) {
                tuples = statField.split(":");
                mapStatsFields.put(tuples[0], tuples.length > 1 ? tuples[1] : tuples[0]);
            }

            Map<String, Map<String, String>> mapToxValues = new LinkedHashMap<>();
            for (String parseField : parseFields) {
                mapToxValues.put(parseField, new TreeMap<String, String>());
            }

            SimpleFSDirectory indexDirectory = new SimpleFSDirectory(indexFolder);
            SimpleFSDirectory taxonomyDirectory = new SimpleFSDirectory(taxonomyFolder);
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_44, analyzer);
            IndexWriter indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);
            TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxonomyDirectory, OpenMode.CREATE);
            FacetFields facetFields = new FacetFields(taxonomyWriter);

            List<CategoryPath> taxonomyCategories = new ArrayList<>();

            String line;

            StringBuilder sbIndex = new StringBuilder();
            StringBuilder sbLevel = new StringBuilder();

            Integer outCount = 0;
            Integer rcdCount = 0;

            Boolean firstDataRecordHandled = false;

            BufferedReader br = new BufferedReader(new FileReader(textFile));
            while ((line = br.readLine()) != null) {
                rcdCount++;
                pieces = line.split(textColSeparator);
                if (rcdCount == 1) {
                    int i = 0;
                    for (String colHeader : pieces) {
                        mapColIndexes.put(colHeader.trim(), i);
                        i++;
                    }
                } else {
                    for (String key : mapLevelFields.keySet()) {
                        if (mapColIndexes.containsKey(key)) {
                            String value = pieces[mapColIndexes.get(key)].trim();
                            // build up level-break values
                            if (mapLevelFields.containsKey(key)) {
                                mapLevelFields.put(key, value);
                            }
                        }
                    }
                    if (!firstDataRecordHandled) {
                        mapBreakFields.putAll(mapLevelFields);
                        firstDataRecordHandled = true;
                    }
                    // if there is a "level break"
                    if (!mapLevelFields.equals(mapBreakFields)) {
                        Document tgtDocument = new Document();
                        for (Map.Entry<String, String> entry : mapBreakFields.entrySet()) {
                            Field field = new TextField(entry.getKey(), entry.getValue(), Store.YES);
                            tgtDocument.add(field);
                        }
                        for (Map.Entry<String, Map<String, String>> toxEntry : mapToxValues.entrySet()) {
                            String fieldName = toxEntry.getKey();
                            String fieldValue = GenericUtilities.joinString(toxEntry.getValue().values(), " ");
                            // System.out.println(fieldName + ": " + fieldValue);
                            sbIndex.append(fieldValue);
                            sbIndex.append(" ");
                            tgtDocument.add(new TextField(fieldName, fieldValue, Store.YES));
                            // build up "Toxicity" taxonomy categories
                            for (String value : fieldValue.replace(" ", ",").split(",")) {
                                if (!value.trim().equals("")) {
                                    taxonomyCategories.add(new CategoryPath(fieldName, "Toxicity", value));
                                }
                            }
                            // build up "stats" taxonomy categories
                            for (String statsKey : mapStatsFields.keySet()) {
                                if (mapLevelFields.containsKey(statsKey)) {
                                    String levelValue = mapLevelFields.get(statsKey);
                                    if (!statsKey.trim().equals("") && !levelValue.trim().equals("")) {
                                        taxonomyCategories
                                                .add(new CategoryPath("Reports", statsKey, levelValue));
                                    }
                                }
                            }
                        }
                        tgtDocument.add(new TextField("text", sbIndex.toString().trim(), Store.NO));
                        if (taxonomyCategories.size() > 0) {
                            facetFields.addFields(tgtDocument, taxonomyCategories);
                            // System.out.println("Taxonomies added: " +
                            // taxonomyCategories.size());
                        }
                        indexWriter.addDocument(tgtDocument);
                        outCount++;
                        sbIndex.setLength(0);
                        for (String key : mapToxValues.keySet()) {
                            mapToxValues.get(key).clear();
                        }
                        taxonomyCategories.clear();
                        mapBreakFields.putAll(mapLevelFields);
                    }
                    // build up text index values
                    for (String key : mapLevelFields.keySet()) {
                        if (mapColIndexes.containsKey(key)) {
                            String value = pieces[mapColIndexes.get(key)].trim();
                            if (!value.equals("")) {
                                // build up 'text' field index value
                                if (mapIndexFields.containsKey(key)) {
                                    sbIndex.append(value);
                                    sbIndex.append(" ");
                                }
                            }
                        }
                    }
                    // build up toxicity values for later level-break use
                    if (mapColIndexes.containsKey(casEdfIdFieldName)) {
                        Toxicity toxicity = toxicities.get(pieces[mapColIndexes.get(casEdfIdFieldName)].trim());
                        if (toxicity != null) {
                            // build up recognized toxicity values
                            String[] toxRValues = toxicity.getToxRecognized().split(",");
                            for (String toxValue : toxRValues) {
                                if (!toxValue.equals("")) {
                                    if (!mapToxValues.get("toxRecognized").containsKey(toxValue)) {
                                        mapToxValues.get("toxRecognized").put(toxValue, toxValue);
                                    }
                                }
                            }
                            // build up suspected toxicity values
                            String[] toxSValues = toxicity.getToxSuspected().split(",");
                            for (String toxValue : toxSValues) {
                                if (!toxValue.equals("")) {
                                    if (!mapToxValues.get("toxSuspected").containsKey(toxValue)) {
                                        mapToxValues.get("toxSuspected").put(toxValue, toxValue);
                                    }
                                }
                            }
                        }
                    }
                    if (progressInterval > 0 && rcdCount % progressInterval == 0) {
                        message = "Records indexed: " + rcdCount;
                        if (outputToSystemOut) {
                            System.out.println(message);
                        }
                        if (outputToMsgQueue) {
                            progressMessageQueue.send(new MessageInput(message));
                        }
                    }
                }
            }
            br.close();
            // handle end-of-file processing
            Document tgtDocument = new Document();
            for (Map.Entry<String, String> entry : mapBreakFields.entrySet()) {
                Field field = new TextField(entry.getKey(), entry.getValue(), Store.YES);
                tgtDocument.add(field);
            }
            for (Map.Entry<String, Map<String, String>> toxEntry : mapToxValues.entrySet()) {
                String fieldName = toxEntry.getKey();
                String fieldValue = GenericUtilities.joinString(toxEntry.getValue().values(), " ");
                // System.out.println(fieldName + ": " + fieldValue);
                sbIndex.append(fieldValue);
                sbIndex.append(" ");
                tgtDocument.add(new TextField(fieldName, fieldValue, Store.YES));
                // build up "Toxicity" taxonomy categories
                for (String value : fieldValue.replace(" ", ",").split(",")) {
                    if (!value.trim().equals("")) {
                        taxonomyCategories.add(new CategoryPath(fieldName, "Toxicity", value));
                    }
                }
                // build up "stats" taxonomy categories
                for (String statsKey : mapStatsFields.keySet()) {
                    if (mapLevelFields.containsKey(statsKey)) {
                        String levelValue = mapLevelFields.get(statsKey);
                        if (!statsKey.trim().equals("") && !levelValue.trim().equals("")) {
                            taxonomyCategories.add(new CategoryPath("Reports", statsKey, levelValue));
                        }
                    }
                }
            }
            tgtDocument.add(new TextField("text", sbIndex.toString().trim(), Store.NO));
            if (taxonomyCategories.size() > 0) {
                facetFields.addFields(tgtDocument, taxonomyCategories);
                // System.out.println("Taxonomies added: " +
                // taxonomyCategories.size());
            }
            indexWriter.addDocument(tgtDocument);
            outCount++;
            message = "Records processed: " + rcdCount;
            if (outputToSystemOut) {
                System.out.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }
            message = "Records indexed: " + outCount;
            if (outputToSystemOut) {
                System.out.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }

            sbIndex.setLength(0);
            sbIndex.trimToSize();

            sbLevel.setLength(0);
            sbLevel.trimToSize();

            mapToxValues.clear();

            indexWriter.commit();
            indexWriter.forceMerge(1);
            indexWriter.close();

            analyzer.close();
            indexDirectory.close();

            taxonomyWriter.commit();
            taxonomyWriter.close();
            taxonomyDirectory.close();
        } else {
            message = "Lucene Index Folder: " + indexFolder + " or Lucene Taxonomy folder: " + taxonomyFolder
                    + " does not exist!";
            if (outputToSystemErr) {
                System.err.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }
        }
        message = "Ended Indexing Reports via Lucene!";
        if (outputToSystemOut) {
            System.out.println(message);
        }
        if (outputToMsgQueue) {
            progressMessageQueue.send(new MessageInput(message));
        }
    }
}

From source file:com.khepry.frackhem.entities.Toxicities.java

License:Apache License

public void indexViaLucene(String textFilePath, String textColSeparator) throws IOException {

    String message;// w  w w  . j  a  v a2 s  .c o  m

    message = "Start Indexing Toxicities via Lucene...";
    if (outputToSystemOut) {
        System.out.println(message);
    }
    if (outputToMsgQueue) {
        progressMessageQueue.send(new MessageInput(message));
    }

    File textFile = new File(textFilePath);
    if (textFile.exists()) {

        File indexFolder = new File(indexFolderPath);
        if (!indexFolder.exists()) {
            indexFolder.mkdir();
        } else {
            deleteFolder(indexFolder);
            if (!indexFolder.exists()) {
                indexFolder.mkdir();
            }
        }

        File taxonomyFolder = new File(taxonomyFolderPath);
        if (!taxonomyFolder.exists()) {
            taxonomyFolder.mkdir();
        } else {
            deleteFolder(taxonomyFolder);
            if (!taxonomyFolder.exists()) {
                taxonomyFolder.mkdir();
            }
        }

        if (indexFolder.exists() && taxonomyFolder.exists()) {

            List<String> colHeaders = new ArrayList<>();
            Map<String, String> mapIndexFields = new LinkedHashMap<>();
            Map<String, String> mapStatsFields = new LinkedHashMap<>();

            String[] pieces;
            String[] tuples;

            pieces = indexFields.split(",");
            for (String indexField : pieces) {
                mapIndexFields.put(indexField, indexField);
            }

            pieces = statsFields.split(",");
            for (String statField : pieces) {
                tuples = statField.split(":");
                mapStatsFields.put(tuples[0], tuples.length > 1 ? tuples[1] : tuples[0]);
            }

            SimpleFSDirectory indexDirectory = new SimpleFSDirectory(indexFolder);
            Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);
            IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_44, analyzer);
            IndexWriter indexWriter = new IndexWriter(indexDirectory, indexWriterConfig);

            SimpleFSDirectory taxonomyDirectory = new SimpleFSDirectory(taxonomyFolder);
            TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(taxonomyDirectory, OpenMode.CREATE);
            FacetFields facetFields = new FacetFields(taxonomyWriter);

            List<CategoryPath> taxonomyCategories = new ArrayList<>();

            String line;
            Integer rcdCount = 0;
            StringBuilder sb = new StringBuilder();
            BufferedReader br = new BufferedReader(new FileReader(textFile));
            while ((line = br.readLine()) != null) {
                rcdCount++;
                pieces = line.split(textColSeparator);
                if (rcdCount == 1) {
                    for (String colHeader : pieces) {
                        colHeaders.add(colHeader.trim());
                    }
                } else {
                    if (pieces.length == colHeaders.size()) {
                        sb.setLength(0);
                        Document document = new Document();
                        for (int i = 0; i < pieces.length; i++) {
                            Field field = new TextField(colHeaders.get(i), pieces[i].trim(), Store.YES);
                            document.add(field);
                            if (mapIndexFields.containsKey(colHeaders.get(i))) {
                                if (!pieces[i].trim().equals("")) {
                                    sb.append(pieces[i].trim());
                                    sb.append(" ");
                                }
                            }
                        }
                        Field field = new TextField("text", sb.toString().trim(), Store.NO);
                        document.add(field);

                        String toxCasEdfId = pieces[0].trim();
                        String toxChemical = pieces[1].trim();

                        // categorize recognized toxicities
                        String toxRecognized = pieces[2].trim();
                        if (!toxRecognized.equals("")) {
                            taxonomyCategories.add(new CategoryPath("toxRecognized", "CasEdfId", toxCasEdfId));
                            taxonomyCategories.add(new CategoryPath("toxRecognized", "Chemical",
                                    toxChemical.replace("/", "|")));
                            for (String value : toxRecognized.replace(" ", ",").split(",")) {
                                if (!value.trim().equals("")) {
                                    taxonomyCategories
                                            .add(new CategoryPath("toxRecognized", "Toxicity", value));
                                }
                            }
                        }

                        // categorize suspected toxicities
                        String toxSuspected = pieces[3].trim();
                        if (!toxSuspected.equals("")) {
                            taxonomyCategories.add(new CategoryPath("toxSuspected", "CasEdfId", toxCasEdfId));
                            taxonomyCategories.add(new CategoryPath("toxSuspected", "Chemical",
                                    toxChemical.replace("/", "|")));
                            for (String value : toxSuspected.replace(" ", ",").split(",")) {
                                if (!value.trim().equals("")) {
                                    taxonomyCategories.add(new CategoryPath("toxSuspected", "Toxicity", value));
                                }
                            }
                        }

                        // build up "stats" taxonomy categories
                        for (String statsKey : mapStatsFields.keySet()) {
                            if (mapIndexFields.containsKey(statsKey)) {
                                String fieldValue = mapIndexFields.get(statsKey);
                                if (!statsKey.trim().equals("") && !fieldValue.trim().equals("")) {
                                    taxonomyCategories
                                            .add(new CategoryPath("Toxicities", statsKey, fieldValue));
                                }
                            }
                        }

                        if (taxonomyCategories.size() > 0) {
                            facetFields.addFields(document, taxonomyCategories);
                            // System.out.println("Taxonomies added: " +
                            // taxonomyCategories.size());
                        }

                        indexWriter.addDocument(document);
                        if (progressInterval > 0 && rcdCount % progressInterval == 0) {
                            message = "Records indexed: " + rcdCount;
                            if (outputToSystemOut) {
                                System.out.println(message);
                            }
                            if (outputToMsgQueue) {
                                progressMessageQueue.send(new MessageInput(message));
                            }
                        }

                        taxonomyCategories.clear();
                    }
                }
            }
            br.close();
            message = "Records indexed: " + rcdCount;
            if (outputToSystemOut) {
                System.out.println(message);
            }
            if (outputToMsgQueue) {
                progressMessageQueue.send(new MessageInput(message));
            }

            sb.setLength(0);
            sb.trimToSize();

            indexWriter.commit();
            indexWriter.forceMerge(1);
            indexWriter.close();

            taxonomyWriter.commit();
            taxonomyWriter.close();

            analyzer.close();

            indexDirectory.close();
            taxonomyDirectory.close();
        } else {
            message = "Lucene Index Folder: " + indexFolder + " or Lucene Taxonomy folder: " + taxonomyFolder
                    + " does not exist!";
            if (outputToSystemErr) {
                System.err.println(message);
            }
        }
        message = "Ended Indexing Toxicities via Lucene!";
        if (outputToSystemOut) {
            System.out.println(message);
        }
        if (outputToMsgQueue) {
            progressMessageQueue.send(new MessageInput(message));
        }
    }
}

From source file:com.liferay.portal.search.lucene.dump.DumpIndexDeletionPolicy.java

License:Open Source License

public void dump(OutputStream outputStream, IndexWriter indexWriter, Lock commitLock) throws IOException {

    IndexCommit indexCommit = null;//  w  w  w  .  jav  a2 s  . co m

    String segmentsFileName = null;

    commitLock.lock();

    try {
        indexWriter.commit();

        indexCommit = _lastIndexCommit;

        segmentsFileName = indexCommit.getSegmentsFileName();

        _segmentsFileNames.add(segmentsFileName);
    } finally {
        commitLock.unlock();
    }

    try {
        IndexCommitSerializationUtil.serializeIndex(indexCommit, outputStream);
    } finally {
        _segmentsFileNames.remove(segmentsFileName);
    }
}

From source file:com.lucid.solr.sidecar.SidecarIndexReaderFactory.java

License:Apache License

DirectoryReader buildParallelReader(DirectoryReader main, SolrIndexSearcher source, boolean rebuild) {
    try {/*  w ww  .j  a va 2  s  .c o m*/
        if (source == null) {
            throw new Exception("Source collection is missing.");
        }
        // create as a sibling path of the main index
        Directory d = main.directory();
        File primaryDir = null;
        if (d instanceof FSDirectory) {
            String path = ((FSDirectory) d).getDirectory().getPath();
            primaryDir = new File(path);
            sidecarIndex = new File(primaryDir.getParentFile(), sidecarIndexLocation);
        } else {
            String secondaryPath = System.getProperty("java.io.tmpdir") + File.separator + sidecarIndexLocation
                    + "-" + System.currentTimeMillis();
            sidecarIndex = new File(secondaryPath);
        }
        // create a new tmp dir for the secondary indexes
        File secondaryIndex = new File(sidecarIndex, System.currentTimeMillis() + "-index");
        if (rebuild) {
            safeDelete(sidecarIndex);
        }
        parallelFields.addAll(source.getFieldNames());
        parallelFields.remove("id");
        LOG.debug("building a new index");
        Directory dir = FSDirectory.open(secondaryIndex);
        if (IndexWriter.isLocked(dir)) {
            // try forcing unlock
            try {
                IndexWriter.unlock(dir);
            } catch (Exception e) {
                LOG.warn("Failed to unlock " + secondaryIndex);
            }
        }
        int[] mergeTargets;
        AtomicReader[] subReaders = SidecarIndexReader.getSequentialSubReaders(main);
        if (subReaders == null || subReaders.length == 0) {
            mergeTargets = new int[] { main.maxDoc() };
        } else {
            mergeTargets = new int[subReaders.length];
            for (int i = 0; i < subReaders.length; i++) {
                mergeTargets[i] = subReaders[i].maxDoc();
            }
        }
        Version ver = currentCore.getLatestSchema().getDefaultLuceneMatchVersion();
        IndexWriterConfig cfg = new IndexWriterConfig(ver, currentCore.getLatestSchema().getAnalyzer());
        //cfg.setInfoStream(System.err);
        cfg.setMergeScheduler(new SerialMergeScheduler());
        cfg.setMergePolicy(new SidecarMergePolicy(mergeTargets, false));
        IndexWriter iw = new IndexWriter(dir, cfg);
        LOG.info("processing " + main.maxDoc() + " docs / " + main.numDeletedDocs() + " dels in main index");
        int boostedDocs = 0;
        Bits live = MultiFields.getLiveDocs(main);

        int targetPos = 0;
        int nextTarget = mergeTargets[targetPos];
        BytesRef idRef = new BytesRef();
        for (int i = 0; i < main.maxDoc(); i++) {
            if (i == nextTarget) {
                iw.commit();
                nextTarget = nextTarget + mergeTargets[++targetPos];
            }
            if (live != null && !live.get(i)) {
                addDummy(iw); // this is required to preserve doc numbers.
                continue;
            } else {
                DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(docIdField);
                main.document(i, visitor);
                Document doc = visitor.getDocument();
                // get docId
                String id = doc.get(docIdField);
                if (id == null) {
                    LOG.debug("missing id, docNo=" + i);
                    addDummy(iw);
                    continue;
                } else {
                    // find the data, if any
                    doc = lookup(source, id, idRef, parallelFields);
                    if (doc == null) {
                        LOG.debug("missing boost data, docId=" + id);
                        addDummy(iw);
                        continue;
                    } else {
                        LOG.debug("adding boost data, docId=" + id + ", b=" + doc);
                        iw.addDocument(doc);
                        boostedDocs++;
                    }
                }
            }
        }
        iw.close();
        DirectoryReader other = DirectoryReader.open(dir);
        LOG.info("SidecarIndexReader with " + boostedDocs + " boosted documents.");
        SidecarIndexReader pr = createSidecarIndexReader(main, other, sourceCollection, secondaryIndex);
        return pr;
    } catch (Exception e) {
        LOG.warn("Unable to build parallel index: " + e.toString(), e);
        LOG.warn("Proceeding with single main index.");
        try {
            return new SidecarIndexReader(this, main, null, SidecarIndexReader.getSequentialSubReaders(main),
                    sourceCollection, null);
        } catch (Exception e1) {
            LOG.warn("Unexpected exception, returning single main index", e1);
            return main;
        }
    }
}

From source file:com.mathworks.xzheng.indexing.IndexingTest.java

License:Apache License

public void testDeleteAfterOptimize() throws IOException {
    IndexWriter writer = getWriter();
    assertEquals(2, writer.numDocs());/*from   w w w.  ja  v a2  s . c o m*/
    writer.deleteDocuments(new Term("id", "1"));
    //writer.optimize();                //3
    writer.forceMerge(1);
    writer.commit();
    assertFalse(writer.hasDeletions());
    assertEquals(1, writer.maxDoc()); //C
    assertEquals(1, writer.numDocs()); //C    
    writer.close();
}

From source file:com.meizu.nlp.classification.utils.DatasetSplitter.java

License:Apache License

/**
 * Split a given index into 3 indexes for training, test and cross validation tasks respectively
 *
 * @param originalIndex        an {@link org.apache.lucene.index.LeafReader} on the source index
 * @param trainingIndex        a {@link Directory} used to write the training index
 * @param testIndex            a {@link Directory} used to write the test index
 * @param crossValidationIndex a {@link Directory} used to write the cross validation index
 * @param analyzer             {@link Analyzer} used to create the new docs
 * @param fieldNames           names of fields that need to be put in the new indexes or <code>null</code> if all should be used
 * @throws IOException if any writing operation fails on any of the indexes
 *///from  ww w.  j av a  2  s. c o m
public void split(LeafReader originalIndex, Directory trainingIndex, Directory testIndex,
        Directory crossValidationIndex, Analyzer analyzer, String... fieldNames) throws IOException {

    // create IWs for train / test / cv IDXs
    IndexWriter testWriter = new IndexWriter(testIndex, new IndexWriterConfig(analyzer));
    IndexWriter cvWriter = new IndexWriter(crossValidationIndex, new IndexWriterConfig(analyzer));
    IndexWriter trainingWriter = new IndexWriter(trainingIndex, new IndexWriterConfig(analyzer));

    try {
        int size = originalIndex.maxDoc();

        IndexSearcher indexSearcher = new IndexSearcher(originalIndex);
        TopDocs topDocs = indexSearcher.search(new MatchAllDocsQuery(), Integer.MAX_VALUE);

        // set the type to be indexed, stored, with term vectors
        FieldType ft = new FieldType(TextField.TYPE_STORED);
        ft.setStoreTermVectors(true);
        ft.setStoreTermVectorOffsets(true);
        ft.setStoreTermVectorPositions(true);

        int b = 0;

        // iterate over existing documents
        for (ScoreDoc scoreDoc : topDocs.scoreDocs) {

            // create a new document for indexing
            Document doc = new Document();
            if (fieldNames != null && fieldNames.length > 0) {
                for (String fieldName : fieldNames) {
                    doc.add(new Field(fieldName,
                            originalIndex.document(scoreDoc.doc).getField(fieldName).stringValue(), ft));
                }
            } else {
                for (IndexableField storableField : originalIndex.document(scoreDoc.doc).getFields()) {
                    if (storableField.readerValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.readerValue(), ft));
                    } else if (storableField.binaryValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.binaryValue(), ft));
                    } else if (storableField.stringValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.stringValue(), ft));
                    } else if (storableField.numericValue() != null) {
                        doc.add(new Field(storableField.name(), storableField.numericValue().toString(), ft));
                    }
                }
            }

            // add it to one of the IDXs
            if (b % 2 == 0 && testWriter.maxDoc() < size * testRatio) {
                testWriter.addDocument(doc);
            } else if (cvWriter.maxDoc() < size * crossValidationRatio) {
                cvWriter.addDocument(doc);
            } else {
                trainingWriter.addDocument(doc);
            }
            b++;
        }
    } catch (Exception e) {
        throw new IOException(e);
    } finally {
        testWriter.commit();
        cvWriter.commit();
        trainingWriter.commit();
        // close IWs
        testWriter.close();
        cvWriter.close();
        trainingWriter.close();
    }
}

From source file:com.mmiagency.knime.nodes.keyworddensity.util.KeywordDensityHelper.java

License:Open Source License

public void execute() throws IOException {

    org.jsoup.nodes.Document jdoc = null;

    // pull content using Jsoup 
    if (m_content != null && !m_content.trim().isEmpty()) {
        jdoc = Jsoup.parse(m_content);/*from  w ww  .  j a  va2 s  .co m*/
    } else {
        Connection conn = Jsoup.connect(m_url);

        conn.validateTLSCertificates(false);
        conn.followRedirects(true);
        conn.userAgent("Mozilla/5.0 (Macintosh; Intel Mac OS X 10.10; rv:40.0) Gecko/20100101 Firefox/40.0");
        conn.header("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8");
        conn.header("Accept-Language", "en-US,en;q=0.5");
        conn.header("Accept-Encoding", "gzip, deflate");

        conn.execute();
        jdoc = conn.get();
    }

    StringWriter text = new StringWriter();

    if (m_includeMetaKeywords) {
        text.write(jdoc.select("meta[name=keywords]").attr("content"));
        text.write(" ");
    }
    if (m_includeMetaDescription) {
        text.write(jdoc.select("meta[name=description]").attr("content"));
        text.write(" ");
    }
    if (m_includePageTitle) {
        text.write(jdoc.select("title").text());
        text.write(" ");
    }

    text.write(jdoc.select("body").text());

    // analyze content with Lucene
    StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
    Directory directory = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(directory, analyzer, MaxFieldLength.LIMITED);

    Document doc = new Document();
    Field textField = new Field("content", text.toString(), Field.Store.YES, Field.Index.ANALYZED,
            TermVector.WITH_POSITIONS_OFFSETS);

    doc.add(textField);

    indexWriter.addDocument(doc);
    indexWriter.commit();
    indexWriter.close();

    IndexReader indexReader = IndexReader.open(directory, true);

    TermFreqVector termFreqVector = null;

    for (int i = 0; i < indexReader.maxDoc(); i++) {
        termFreqVector = indexReader.getTermFreqVector(i, "content");

        String[] terms = termFreqVector.getTerms();
        int[] freqs = termFreqVector.getTermFrequencies();

        for (int n = 0; n < termFreqVector.size(); n++) {
            if (m_excludeList.contains(terms[n])) {
                continue;
            }
            add(terms[n], freqs[n]);
        }
    }

    indexReader.close();
    directory.close();

    // sort map by value
    sortMap();
}

From source file:com.netcrest.pado.index.provider.lucene.LuceneBuilderRAMDirectory.java

License:Open Source License

@SuppressWarnings({ "unchecked", "rawtypes", "resource" })
public void buildTemporalKeys(boolean createNewDirectory) {
    Cache cache = CacheFactory.getAnyInstance();
    Region<String, RAMDirectory> region = cache
            .getRegion(IndexMatrixUtil.getProperty(Constants.PROP_REGION_LUCENE));
    TemporalType[] temporalTypes = GemfireTemporalManager.getAllTemporalTypes();

    for (TemporalType type : temporalTypes) {
        IndexWriter writer = null;
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_47);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer);
        iwc.setOpenMode(OpenMode.CREATE);
        LuceneField luceneBuilder = new LuceneField();

        LuceneSearch ls = LuceneSearch.getLuceneSearch(type.getFullPath());
        StandardQueryParser parser = ls.createParser();
        TemporalManager tm = TemporalManager.getTemporalManager(type.getFullPath());
        try {//from   www  .  ja va 2 s.com
            List<?> identityKeyList = tm.getIdentityKeyList();
            if (identityKeyList.size() == 0) {
                continue;
            }

            RAMDirectory directory;
            if (createNewDirectory) {
                directory = new RAMDirectory();
            } else {
                directory = region.get(type.getFullPath());
                if (directory == null) {
                    directory = new RAMDirectory();
                }
            }
            writer = new IndexWriter(directory, iwc);

            // determine the identity key type, public fields and getters
            Field fields[] = null;
            Method getters[] = null;
            Class keyType = null;
            for (Object key : identityKeyList) {
                if (ReflectionHelper.isPrimitiveWrapper(key.getClass())) {
                    fields = null;
                    getters = null;
                    keyType = key.getClass();
                } else {
                    fields = ReflectionHelper.getPublicFields(key.getClass());
                    getters = ReflectionHelper.getPublicGetters(key.getClass());
                }
                break;
            }

            if (keyType != null) {

                configNumericType(parser, "IdentityKey", keyType);

                // primitive
                List<Document> docList = new ArrayList();
                if (keyType == String.class) {
                    for (Object key : identityKeyList) {
                        // TODO: do lucene here
                        Document doc = luceneBuilder.createDocument();
                        doc.add(luceneBuilder.createField("IdentityKey", key.toString()));
                        docList.add(doc);
                    }
                } else if (keyType == Integer.class) {
                    for (Object key : identityKeyList) {
                        // TODO: do lucene here
                        Document doc = luceneBuilder.createDocument();
                        doc.add(luceneBuilder.createField("IdentityKey", (Integer) key));
                        docList.add(doc);
                    }
                } else if (keyType == Long.class) {
                    for (Object key : identityKeyList) {
                        // TODO: do lucene here
                        Document doc = luceneBuilder.createDocument();
                        doc.add(luceneBuilder.createField("IdentityKey", (Long) key));
                        docList.add(doc);
                    }
                } else if (keyType == Float.class) {
                    for (Object key : identityKeyList) {
                        // TODO: do lucene here
                        Document doc = luceneBuilder.createDocument();
                        doc.add(luceneBuilder.createField("IdentityKey", (Float) key));
                        docList.add(doc);
                    }
                } else if (keyType == Double.class) {
                    for (Object key : identityKeyList) {
                        // TODO: do lucene here
                        Document doc = luceneBuilder.createDocument();
                        doc.add(luceneBuilder.createField("IdentityKey", (Double) key));
                        docList.add(doc);
                    }
                }
                try {
                    writer.addDocuments(docList);
                } catch (Exception ex) {
                    Logger.warning(ex);
                }
            } else {
                try {
                    // fields
                    if (fields != null && fields.length > 0) {

                        // configure numeric types
                        for (Field field : fields) {
                            configNumericType(parser, field.getName(), field.getType());
                        }

                        List<Document> docList = new ArrayList();
                        for (Object key : identityKeyList) {
                            Document doc = luceneBuilder.createDocument();
                            for (Field field : fields) {
                                Object obj = field.get(key);
                                Class fieldType = field.getType();
                                if (fieldType == String.class) {
                                    doc.add(luceneBuilder.createField(field.getName(), obj.toString()));
                                } else if (fieldType == Integer.class || fieldType == int.class) {
                                    doc.add(luceneBuilder.createField(field.getName(), (Integer) obj));
                                } else if (fieldType == Long.class || fieldType == long.class) {
                                    doc.add(luceneBuilder.createField(field.getName(), (Long) obj));
                                } else if (fieldType == Float.class || fieldType == float.class) {
                                    doc.add(luceneBuilder.createField(field.getName(), (Float) obj));
                                } else if (fieldType == Double.class || fieldType == double.class) {
                                    doc.add(luceneBuilder.createField(field.getName(), (Double) obj));
                                } else if (fieldType == Date.class) {
                                    doc.add(luceneBuilder.createField(field.getName(), ((Date) obj).getTime()));
                                }
                            }
                            docList.add(doc);
                        }
                        try {
                            writer.addDocuments(docList);
                        } catch (Exception ex) {
                            Logger.warning(ex);
                        }
                    }

                    // getters - methods
                    if (getters != null && getters.length > 0) {
                        List<Document> docList = new ArrayList();
                        for (Object key : identityKeyList) {
                            Document doc = luceneBuilder.createDocument();
                            for (Method method : getters) {
                                // TODO: build lucene here
                                Object obj = method.invoke(key);
                                Class<?> fieldType = method.getReturnType();
                                if (fieldType == String.class) {
                                    doc.add(luceneBuilder.createField(getPropertyName(method), obj.toString()));
                                } else if (fieldType == Integer.class || fieldType == int.class) {
                                    doc.add(luceneBuilder.createField(getPropertyName(method), (Integer) obj));
                                } else if (fieldType == Long.class || fieldType == long.class) {
                                    doc.add(luceneBuilder.createField(getPropertyName(method), (Long) obj));
                                } else if (fieldType == Float.class || fieldType == float.class) {
                                    doc.add(luceneBuilder.createField(getPropertyName(method), (Float) obj));
                                } else if (fieldType == Double.class || fieldType == double.class) {
                                    doc.add(luceneBuilder.createField(getPropertyName(method), (Double) obj));
                                } else if (fieldType == Date.class) {
                                    doc.add(luceneBuilder.createField(getPropertyName(method),
                                            ((Date) obj).getTime()));
                                }
                            }
                            docList.add(doc);
                        }
                        try {
                            writer.addDocuments(docList);
                        } catch (Exception ex) {
                            Logger.warning(ex);
                        }
                    }

                } catch (Exception ex) {
                    Logger.warning(ex);
                }
            }

            writer.commit();
            writer.close();

            // TODO: support file system
            // place the RamDirectory in lucene
            region.put(type.getFullPath(), directory);

        } catch (Exception ex) {
            Logger.error("Index builder aborted.", ex);
            return;
        }
    }
}