Example usage for org.apache.lucene.facet FacetsConfig FacetsConfig

List of usage examples for org.apache.lucene.facet FacetsConfig FacetsConfig

Introduction

In this page you can find the example usage for org.apache.lucene.facet FacetsConfig FacetsConfig.

Prototype

public FacetsConfig() 

Source Link

Document

Default constructor.

Usage

From source file:com.czw.search.lucene.example.facet.AssociationsFacetsExample.java

License:Apache License

/** Empty constructor */
public AssociationsFacetsExample() {
    config = new FacetsConfig();
    config.setMultiValued("tags", true);
    config.setIndexFieldName("tags", "$tags");
    config.setMultiValued("genre", true);
    config.setIndexFieldName("genre", "$genre");
}

From source file:com.czw.search.lucene.example.facet.RangeFacetsExample.java

License:Apache License

private FacetsConfig getConfig() {
    return new FacetsConfig();
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

/**
 * Stores features from a specified feature file to the specified project's Lucene index
 * Sample query: featureId:rs44022* AND (variationType:del OR variationType:ins)
 *
 * @param featureFileId a FeatureFile, for which features to save
 * @param projectId a project, for which to write an index
 * @param entries a list of FeatureIndexEntry to write to index
 * @throws IOException//from   w  w  w  .java  2  s. co m
 */
public void writeLuceneIndexForProject(final Long featureFileId, final long projectId,
        final List<? extends FeatureIndexEntry> entries) throws IOException {
    try (StandardAnalyzer analyzer = new StandardAnalyzer();
            Directory index = fileManager.createIndexForProject(projectId);
            IndexWriter writer = new IndexWriter(index,
                    new IndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND))) {
        FacetsConfig facetsConfig = new FacetsConfig();
        facetsConfig.setIndexFieldName(FeatureIndexFields.CHR_ID.getFieldName(),
                FeatureIndexFields.FACET_CHR_ID.getFieldName());

        for (FeatureIndexEntry entry : entries) {
            Document document = new Document();
            addCommonDocumentFields(document, entry, featureFileId);

            if (entry instanceof VcfIndexEntry) {
                addVcfDocumentFields(document, entry);
            }

            writer.addDocument(facetsConfig.build(document));
        }
    }
}

From source file:com.epam.catgenome.dao.index.FeatureIndexDao.java

License:Open Source License

/**
 * Stores features from a specified feature file to it's Lucene index
 * Sample query: featureId:rs44022* AND (variationType:del OR variationType:ins)
 *
 * @param featureFile a FeatureFile, for which features to save
 * @param entries a list of FeatureIndexEntry to write to index
 * @throws IOException/*from w  w  w .  j a va  2  s .  c o m*/
 */
public void writeLuceneIndexForFile(final FeatureFile featureFile,
        final List<? extends FeatureIndexEntry> entries) throws IOException {
    try (StandardAnalyzer analyzer = new StandardAnalyzer();
            Directory index = fileManager.createIndexForFile(featureFile);
            IndexWriter writer = new IndexWriter(index,
                    new IndexWriterConfig(analyzer).setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND))) {
        FacetsConfig facetsConfig = new FacetsConfig();
        facetsConfig.setIndexFieldName(FeatureIndexFields.CHR_ID.getFieldName(),
                FeatureIndexFields.FACET_CHR_ID.getFieldName());
        facetsConfig.setIndexFieldName(FeatureIndexFields.F_UID.getFieldName(),
                FeatureIndexFields.FACET_UID.getFieldName());

        for (FeatureIndexEntry entry : entries) {
            Document document = new Document();
            addCommonDocumentFields(document, entry, featureFile.getId());

            if (entry instanceof VcfIndexEntry) {
                addVcfDocumentFields(document, entry);
            }

            writer.addDocument(facetsConfig.build(document));
        }
    }
}

From source file:com.qwazr.search.analysis.AnalyzerContext.java

License:Apache License

public AnalyzerContext(Map<String, AnalyzerDefinition> analyzerMap, Map<String, FieldDefinition> fields)
        throws ServerException {
    this.fieldTypes = new HashMap<>();
    this.facetsConfig = new FacetsConfig();
    if (fields == null || fields.size() == 0) {
        this.indexAnalyzerMap = Collections.<String, Analyzer>emptyMap();
        this.queryAnalyzerMap = Collections.<String, Analyzer>emptyMap();
        return;/*  w  w  w. j a  v a  2s.c  o m*/
    }
    this.indexAnalyzerMap = new HashMap<>();
    this.queryAnalyzerMap = new HashMap<>();

    for (Map.Entry<String, FieldDefinition> field : fields.entrySet()) {
        final String fieldName = field.getKey();
        final FieldDefinition fieldDef = field.getValue();
        FieldTypeInterface fieldType = FieldTypeInterface.getInstance(fieldName, fieldDef);
        fieldTypes.put(fieldName, fieldType);
        if (fieldDef.template != null) {
            switch (fieldDef.template) {
            case FacetField:
            case SortedSetDocValuesFacetField:
                facetsConfig.setMultiValued(fieldName, false);
                facetsConfig.setHierarchical(fieldName, false);
                break;
            case MultiFacetField:
            case SortedSetMultiDocValuesFacetField:
                facetsConfig.setMultiValued(fieldName, true);
                facetsConfig.setHierarchical(fieldName, false);
                break;
            }
        }
        try {

            final Analyzer indexAnalyzer = StringUtils.isEmpty(fieldDef.analyzer) ? null
                    : findAnalyzer(analyzerMap, fieldDef.analyzer);
            if (indexAnalyzer != null)
                indexAnalyzerMap.put(fieldName, indexAnalyzer);

            final Analyzer queryAnalyzer = StringUtils.isEmpty(fieldDef.query_analyzer) ? indexAnalyzer
                    : findAnalyzer(analyzerMap, fieldDef.query_analyzer);
            if (queryAnalyzer != null)
                queryAnalyzerMap.put(fieldName, queryAnalyzer);

        } catch (ReflectiveOperationException | InterruptedException | IOException e) {
            throw new ServerException(Response.Status.NOT_ACCEPTABLE,
                    "Class " + fieldDef.analyzer + " not known for the field " + fieldName, e);
        }
    }
}

From source file:com.searchcode.app.service.CodeIndexer.java

License:Open Source License

/**
 * Given a queue of documents to index, index them by popping the queue limited to default of 1000 items.
 * This method must be synchronized as we have not added any logic to deal with multiple threads writing to the
 * index./* ww  w  . j a v a 2s  .co m*/
 * TODO investigate how Lucene deals with multiple writes
 */
public synchronized void indexDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException {
    Directory indexDirectory = FSDirectory.open(this.INDEX_LOCATION);
    Directory facetDirectory = FSDirectory.open(this.FACET_LOCATION);

    Analyzer analyzer = new CodeAnalyzer();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
    FacetsConfig facetsConfig;

    indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    IndexWriter writer = new IndexWriter(indexDirectory, indexWriterConfig);
    TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(facetDirectory);

    try {
        CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll();
        int count = 0;

        while (codeIndexDocument != null) {
            Singleton.getLogger()
                    .info("Indexing file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename());
            this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines());

            facetsConfig = new FacetsConfig();
            facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME);
            facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME);
            facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER);

            Document doc = this.buildDocument(codeIndexDocument);

            writer.updateDocument(
                    new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()),
                    facetsConfig.build(taxonomyWriter, doc));

            count++;
            if (count >= INDEX_QUEUE_BATCH_SIZE) {
                codeIndexDocument = null;
            } else {
                codeIndexDocument = codeIndexDocumentQueue.poll();
            }
        }
    } finally {
        try {
            writer.close();
        } finally {
            taxonomyWriter.close();
        }
        Singleton.getLogger().info("Closing writers");
    }
}

From source file:com.searchcode.app.service.CodeIndexer.java

License:Open Source License

/**
 * Given a queue of documents to index, index them by popping the queue limited to 1000 items.
 * This method must be synchronized as we have not added any logic to deal with multiple threads writing to the
 * index./*  w  w w.j a v  a  2s  .c om*/
 * TODO investigate how Lucene deals with multiple writes
 */
public synchronized void indexTimeDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue)
        throws IOException {
    // Index all documents and commit at the end for performance gains
    Directory dir = FSDirectory.open(Paths.get(
            Properties.getProperties().getProperty(Values.TIMEINDEXLOCATION, Values.DEFAULTTIMEINDEXLOCATION)));
    Directory facetsdir = FSDirectory.open(Paths.get(Properties.getProperties()
            .getProperty(Values.TIMEINDEXFACETLOCATION, Values.DEFAULTTIMEINDEXFACETLOCATION)));

    Analyzer analyzer = new CodeAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    FacetsConfig facetsConfig;

    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    IndexWriter writer = new IndexWriter(dir, iwc);
    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(facetsdir);

    try {
        CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll();
        int count = 0;

        while (codeIndexDocument != null) {
            Singleton.getLogger()
                    .info("Indexing time file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename());
            this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines());

            Document doc = new Document();
            // Path is the primary key for documents
            // needs to include repo location, project name and then filepath including file and revision
            Field pathField = new StringField("path",
                    codeIndexDocument.getRepoLocationRepoNameLocationFilename() + ":"
                            + codeIndexDocument.getRevision(),
                    Field.Store.YES);
            doc.add(pathField);

            // Add in facets
            facetsConfig = new FacetsConfig();
            facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME);
            facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME);
            facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER);
            facetsConfig.setIndexFieldName(Values.DATEYEARMONTHDAY, Values.DATEYEARMONTHDAY);
            facetsConfig.setIndexFieldName(Values.DATEYEARMONTH, Values.DATEYEARMONTH);
            facetsConfig.setIndexFieldName(Values.DATEYEAR, Values.DATEYEAR);
            facetsConfig.setIndexFieldName(Values.REVISION, Values.REVISION);
            facetsConfig.setIndexFieldName(Values.DELETED, Values.DELETED);

            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getLanguageName()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.LANGUAGENAME,
                        codeIndexDocument.getLanguageName()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRepoName()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.REPONAME, codeIndexDocument.getRepoName()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getCodeOwner()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.CODEOWNER, codeIndexDocument.getCodeOwner()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTHDAY,
                        codeIndexDocument.getYearMonthDay()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTH,
                        codeIndexDocument.getYearMonthDay().substring(0, 6)));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.DATEYEAR,
                        codeIndexDocument.getYearMonthDay().substring(0, 4)));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRevision()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.REVISION, codeIndexDocument.getRevision()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.isDeleted()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.DELETED, codeIndexDocument.isDeleted()));
            }

            String indexContents = Values.EMPTYSTRING;

            indexContents += this.searchcodeLib.splitKeywords(codeIndexDocument.getContents());
            indexContents += this.searchcodeLib.codeCleanPipeline(codeIndexDocument.getContents());
            this.searchcodeLib.addToSpellingCorrector(codeIndexDocument.getContents()); // Store in spelling corrector

            indexContents = indexContents.toLowerCase();

            doc.add(new TextField(Values.REPONAME, codeIndexDocument.getRepoName(), Field.Store.YES));
            doc.add(new TextField(Values.FILENAME, codeIndexDocument.getFileName(), Field.Store.YES));
            doc.add(new TextField(Values.FILELOCATION, codeIndexDocument.getFileLocation(), Field.Store.YES));
            doc.add(new TextField(Values.FILELOCATIONFILENAME, codeIndexDocument.getFileLocationFilename(),
                    Field.Store.YES));
            doc.add(new TextField(Values.MD5HASH, codeIndexDocument.getMd5hash(), Field.Store.YES));
            doc.add(new TextField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName(), Field.Store.YES));
            doc.add(new IntField(Values.CODELINES, codeIndexDocument.getCodeLines(), Field.Store.YES));
            doc.add(new TextField(Values.CONTENTS, indexContents, Field.Store.NO));
            doc.add(new TextField(Values.REPOLOCATION, codeIndexDocument.getRepoRemoteLocation(),
                    Field.Store.YES));
            doc.add(new TextField(Values.CODEOWNER, codeIndexDocument.getCodeOwner(), Field.Store.YES));
            doc.add(new TextField(Values.REVISION, codeIndexDocument.getRevision(), Field.Store.YES));
            doc.add(new TextField(Values.DATEYEARMONTHDAY, codeIndexDocument.getYearMonthDay(),
                    Field.Store.YES));
            doc.add(new TextField(Values.DATEYEARMONTH, codeIndexDocument.getYearMonth(), Field.Store.YES));
            doc.add(new TextField(Values.DATEYEAR, codeIndexDocument.getYear(), Field.Store.YES));
            doc.add(new TextField(Values.MESSAGE, codeIndexDocument.getMessage(), Field.Store.YES));
            doc.add(new TextField(Values.DELETED, codeIndexDocument.isDeleted(), Field.Store.YES));

            // Extra metadata in this case when it was last indexed
            doc.add(new LongField(Values.MODIFIED, new Date().getTime(), Field.Store.YES));

            writer.updateDocument(
                    new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()),
                    facetsConfig.build(taxoWriter, doc));

            count++;
            if (count >= INDEX_QUEUE_BATCH_SIZE) {
                codeIndexDocument = null;
            } else {
                codeIndexDocument = codeIndexDocumentQueue.poll();
            }
        }
    } finally {
        Singleton.getLogger().info("Closing writers");
        writer.close();
        taxoWriter.close();
    }
}

From source file:com.tripod.lucene.example.TestExampleLuceneBase.java

License:Apache License

@Before
public void setupBase() throws IOException, ParseException {
    analyzer = new StandardAnalyzer();
    directory = new RAMDirectory();

    facetsConfig = new FacetsConfig();
    facetsConfig.setIndexFieldName(ExampleField.COLOR.getName(), ExampleField.COLOR.getName());

    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    try (IndexWriter writer = new IndexWriter(directory, config)) {
        final SimpleDateFormat dateFormat = new SimpleDateFormat(DATE_FORMAT);

        // Doc1/*  ww w.j a  va 2  s  . c o m*/
        Document doc1 = new Document();
        doc1.add(new Field(ExampleField.ID.getName(), "1", StringField.TYPE_STORED));
        doc1.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("1")));
        doc1.add(new Field(ExampleField.TITLE.getName(), "Title 1", TextField.TYPE_STORED));
        doc1.add(new Field(ExampleField.BODY.getName(), "Body 1 Solr is cool", TextField.TYPE_STORED));
        doc1.add(new Field(ExampleField.COLOR.getName(), "BLUE", StringField.TYPE_STORED));
        doc1.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "BLUE"));

        Date createDate1 = dateFormat.parse("2016-10-01T01:00:00Z");
        doc1.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate1.getTime()));
        doc1.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate1.getTime()));
        writer.addDocument(facetsConfig.build(doc1));

        // Doc2
        Document doc2 = new Document();
        doc2.add(new Field(ExampleField.ID.getName(), "2", StringField.TYPE_STORED));
        doc2.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("2")));
        doc2.add(new Field(ExampleField.TITLE.getName(), "Title 2", TextField.TYPE_STORED));
        doc2.add(new Field(ExampleField.BODY.getName(), "Body 2 Lucene is cool", TextField.TYPE_STORED));
        doc2.add(new Field(ExampleField.COLOR.getName(), "RED", StringField.TYPE_STORED));
        doc2.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "RED"));

        Date createDate2 = dateFormat.parse("2016-10-01T02:00:00Z");
        doc2.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate2.getTime()));
        doc2.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate2.getTime()));
        writer.addDocument(facetsConfig.build(doc2));

        // Doc3
        Document doc3 = new Document();
        doc3.add(new Field(ExampleField.ID.getName(), "3", StringField.TYPE_STORED));
        doc3.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("3")));
        doc3.add(new Field(ExampleField.TITLE.getName(), "Title 3", TextField.TYPE_STORED));
        doc3.add(new Field(ExampleField.BODY.getName(), "Body 3 Solr is Great, Solr is Fun",
                TextField.TYPE_STORED));
        doc3.add(new Field(ExampleField.COLOR.getName(), "GREEN", StringField.TYPE_STORED));
        doc3.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "GREEN"));

        Date createDate3 = dateFormat.parse("2016-10-01T03:00:00Z");
        doc3.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate3.getTime()));
        doc3.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate3.getTime()));
        writer.addDocument(facetsConfig.build(doc3));

        // Doc4
        Document doc4 = new Document();
        doc4.add(new Field(ExampleField.ID.getName(), "4", StringField.TYPE_STORED));
        doc4.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("4")));
        doc4.add(new Field(ExampleField.TITLE.getName(), "Title 4", TextField.TYPE_STORED));
        doc4.add(new Field(ExampleField.BODY.getName(), "Body 4", TextField.TYPE_STORED));
        doc4.add(new Field(ExampleField.COLOR.getName(), "BLUE", StringField.TYPE_STORED));
        doc4.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "BLUE"));

        Date createDate4 = dateFormat.parse("2016-10-01T04:00:00Z");
        doc4.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate4.getTime()));
        doc4.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate4.getTime()));
        writer.addDocument(facetsConfig.build(doc4));

        // Doc5
        Document doc5 = new Document();
        doc5.add(new Field(ExampleField.ID.getName(), "5", StringField.TYPE_STORED));
        doc5.add(new SortedDocValuesField(ExampleField.ID.getName(), new BytesRef("5")));
        doc5.add(new Field(ExampleField.TITLE.getName(), "Title 5", TextField.TYPE_STORED));
        doc5.add(new Field(ExampleField.BODY.getName(), "Body 5", TextField.TYPE_STORED));
        doc5.add(new Field(ExampleField.COLOR.getName(), "RED", StringField.TYPE_STORED));
        doc5.add(new SortedSetDocValuesFacetField(ExampleField.COLOR.getName(), "RED"));

        Date createDate5 = dateFormat.parse("2016-10-01T05:00:00Z");
        doc5.add(new NumericDocValuesField(ExampleField.CREATE_DATE.getName(), createDate5.getTime()));
        doc5.add(new StoredField(ExampleField.CREATE_DATE.getName(), createDate5.getTime()));
        writer.addDocument(facetsConfig.build(doc5));

        // commit docs
        writer.commit();
    }

    // needs to be opened after the writer is closed otherwise it won't see the test data
    searcherManager = new SearcherManager(directory, null);
}

From source file:de.mirkosertic.desktopsearch.LuceneIndexHandler.java

License:Open Source License

public LuceneIndexHandler(Configuration aConfiguration, AnalyzerCache aAnalyzerCache,
        ExecutorPool aExecutorPool, PreviewProcessor aPreviewProcessor) throws IOException {
    previewProcessor = aPreviewProcessor;
    configuration = aConfiguration;/*from w ww  .  j  a v  a  2  s. c  o m*/
    analyzerCache = aAnalyzerCache;
    executorPool = aExecutorPool;

    contentFieldType = new FieldType();
    contentFieldType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);
    contentFieldType.setStored(true);
    contentFieldType.setTokenized(true);
    contentFieldType.setStoreTermVectorOffsets(true);
    contentFieldType.setStoreTermVectorPayloads(true);
    contentFieldType.setStoreTermVectorPositions(true);
    contentFieldType.setStoreTermVectors(true);

    analyzer = analyzerCache.getAnalyzer();

    File theIndexDirectory = new File(aConfiguration.getConfigDirectory(), "index");
    theIndexDirectory.mkdirs();

    Directory theIndexFSDirectory = new NRTCachingDirectory(FSDirectory.open(theIndexDirectory.toPath()), 100,
            100);

    IndexWriterConfig theConfig = new IndexWriterConfig(analyzer);
    theConfig.setSimilarity(new CustomSimilarity());
    indexWriter = new IndexWriter(theIndexFSDirectory, theConfig);

    searcherManager = new SearcherManager(indexWriter, true, new SearcherFactory());

    commitThread = new Thread("Lucene Commit Thread") {
        @Override
        public void run() {
            while (!isInterrupted()) {

                if (indexWriter.hasUncommittedChanges()) {
                    try {
                        indexWriter.commit();
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                }

                try {
                    Thread.sleep(2000);
                } catch (InterruptedException e) {
                    // Do nothing here
                }
            }
        }
    };
    commitThread.start();

    facetsConfig = new FacetsConfig();
}

From source file:ir.FacetLucene_1106022654.java

public static void main(String[] args) throws IOException, ParseException {
    //menyimpan daftar id semua dokume
    ArrayList idR = new ArrayList();
    //menyimpan daftar judul semua dokumen
    ArrayList judulR = new ArrayList();
    //menyimpa daftar teks untuk semua dokumen
    ArrayList teksR = new ArrayList();

    String id = "haha";
    String judul = "haha";
    String teks = "haha";
    String penulis = "haha";

    //membaca data semua dokumen
    File file = new File("D:\\Kuliah\\Sem 9\\Perolehan Informasi\\2015 - 2016\\Tugas\\Tugas 2\\Teks.txt");
    BufferedReader br = new BufferedReader(new FileReader(file));
    try {/*from   w  ww . j a  v a  2  s . c o  m*/
        StringBuilder sb = new StringBuilder();
        String line = br.readLine();
        String gabung = "";
        String gabung2 = "";
        boolean flag = false;
        while (line != null) {
            gabung = gabung + " " + line;
            gabung2 = gabung2 + " " + line;

            final Pattern patternID = Pattern.compile("<ID>(.+?)</ID>");
            final Pattern patternJ = Pattern.compile("<JUDUL>(.+?)</JUDUL>");
            boolean flag2;
            /**
             * penjelasan mengenai teknik untagging teks
             * setiap membaca </DOK> berarti satu dokumen berhasil dibaca sehingga kita bersiap membaca dokumen selanjutnya
             * flag diset false, karena sebelum membaca <DOK> tidak ada data yang disimpan 
             */
            String[] arg = line.trim().split(" ");
            if (line.equalsIgnoreCase("</DOK>")) {
                flag2 = false;
            }
            /**
             * setiap membaca <DOK>, kita bersiap untuk menyimpan data satu dokumen, sehingga flag di set menjadi true
             */
            if (line.equalsIgnoreCase("<DOK>")) {
                flag2 = true;
            }
            /**
             * selama flag di set true, kita membaca dan mengambil semua data yang berada di dalam tagging. 
             * untuk tahap ini, kita membaca id dan judul
             */
            if (flag2 = true) {
                //untagging <ID></ID>
                final Matcher matcherD = patternID.matcher(line);
                if (matcherD.matches()) {
                    id = matcherD.group(1);
                    idR.add(id);
                    //System.out.println("id ---> " + matcherD.group(1));
                }
                //untagging <JUDUL></JUDUL>
                final Matcher matcherJ = patternJ.matcher(line);
                if (matcherJ.matches()) {
                    judul = matcherJ.group(1);
                    judulR.add(judul);
                    //System.out.println("Judul ---> " + matcherJ.group(1));
                }
            }
            /**
             * setiap selesai membaca judul (artinya program menemukan tagging </JUDUL>) kita bersiap membaca teks
             * untuk membaca teks, algoritma sedikit berbeda dengan pembacaan id dan judul karena teks terdiri dari beberapa line.
             * idenya, kita membaca semua line dalam tag <TEKS> terlebih dahulu dan menyimpannya ke dalam variabel tipe string.
             * setelah menemukan tag </DOK> artinya semua teks dalam satu dokumen selesai di baca, kita menghilangkan tag yang tidak perlu 
             * kemudian menambahkannya ke ArrayList. 
             * variabel gabung merupakan variabel yang digunakan untuk menyimpan line teks yang dibaca, sehingga setelah semua teks dalam satu dokumen 
             * selesai dibaca, program kembali mengeset nilainya menjadi string kosong. 
             */
            for (int i = 0; i < arg.length; i++) {
                if (arg[i].endsWith("</JUDUL>")) {
                    gabung2 = "";

                }
                //untagging <TEKS></TEKS>
                if (arg[i].compareTo("</DOK>") == 0) {

                    //System.out.println("masuk");
                    gabung2 = gabung2.replaceAll("<TEKS>", "");
                    gabung2 = gabung2.replaceAll("</TEKS>", "");
                    gabung2 = gabung2.replaceAll("</DOK>", "");
                    teksR.add(gabung2);
                    //System.out.println("Teks ---> " + gabung2);
                    //System.out.println(id+judul+teks);
                    gabung = "";
                    gabung2 = "";
                }

            }

            line = br.readLine();
        }
        //menghitung jumlah masing - masing id,teks, dan judul untuk memastikan sudah sama
        System.out.println("size teks: " + teksR.size());
        System.out.println("size id: " + idR.size());
        System.out.println("size judul: " + judulR.size());

        String everything = sb.toString();
    } finally {
        br.close();
    }
    //inisialisasi direktori untuk menyimpan hasil index , taxonomy, dan fasetConfig
    Directory indexDir = new RAMDirectory();
    Directory taxoDir = new RAMDirectory();
    FacetsConfig fasetconfig = new FacetsConfig();

    //inisialisasi analyzer
    StandardAnalyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig indexWriterconfigs = new IndexWriterConfig(analyzer);
    //inisialisasi IndexWriter
    IndexWriter writer = new IndexWriter(indexDir, indexWriterconfigs);

    //inisialisasi lokasi penyimpanan index kategori
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    //menambahkan dokumen kedalam file IndexWriter
    for (int d = 0; d < idR.size(); d++) {
        String[] ag = idR.get(d).toString().split("-");
        penulis = ag[0];
        id = (String) idR.get(d);
        judul = (String) judulR.get(d);
        teks = (String) teksR.get(d);
        //            System.out.println("id--->" + id);
        //            System.out.println("judul--->" + judul);
        //            System.out.println("teks--->" + teks);
        addDok(writer, penulis, id, judul, teks, taxoWriter, fasetconfig);
    }

    writer.close();
    taxoWriter.close();

    //baca file query
    File fileQ = new File("D:\\Kuliah\\Sem 9\\Perolehan Informasi\\2015 - 2016\\Tugas\\Tugas 2\\Query.txt");
    BufferedReader brQ = new BufferedReader(new FileReader(fileQ));
    //inisialisasi arraylist untuk menyimpan daftar query
    ArrayList listQ = new ArrayList();
    //menyimpan query yang sedang dibaca
    String lineQ = brQ.readLine();
    while (lineQ != null) {
        //masukkan query yang sedang dibaca ke daftar query
        listQ.add(lineQ);
        lineQ = brQ.readLine();
    }
    //inisialisasi lokasi menyimpan output
    Writer tulis = new BufferedWriter(new OutputStreamWriter(new FileOutputStream(
            "D:\\Kuliah\\Sem 9\\Perolehan Informasi\\2015 - 2016\\Tugas\\Tugas 2\\1106022654_Hasil_2.txt"),
            "utf-8"));

    //searching jumlah dokumen yang sesuai dengan masing - masing kueri berdasarkan kategori penulis
    for (int qu = 0; qu < listQ.size(); qu++) {
        ArrayList<FacetResult> results = new ArrayList<>();

        //inisialisasi 
        DirectoryReader indexReader = DirectoryReader.open(indexDir);
        IndexSearcher searcher = new IndexSearcher(indexReader);
        TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir);

        FacetsCollector fc = new FacetsCollector();

        String querystr = (String) listQ.get(qu);

        Query query = new QueryParser("teks", analyzer).parse(querystr);
        //melakukan pencarian dokumen berdasarkan kueri dan hasilnya dikumpan di FacetCollector
        searcher.search(query, fc);

        Facets facets = new FastTaxonomyFacetCounts(taxoReader, fasetconfig, fc);
        //System.out.print("masuk sini ");
        //menemukan kategori hasil pencarian dokumen dan melakukan penghitngan jumlah dokumen
        results.add(facets.getTopChildren(Integer.MAX_VALUE, "penulis"));
        System.out.println(listQ.get(qu));
        tulis.write((String) listQ.get(qu) + "\n");
        //menulis hasil pencarian ke dalam dokumen
        for (int f = 0; f < results.size(); f++) {
            for (int n = 0; n < (results.get(f)).labelValues.length; n++) {
                //hanya menyimpan kategori penulis dan jumlah dokumen
                String hasil = results.get(f).labelValues[n].toString() + "\n";
                tulis.write(hasil);
                System.out.println((results.get(f).labelValues[n]));
            }

        }
        tulis.flush();
        //            indexReader.close();
        //            taxoReader.close();
    }
}