Example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig.

Prototype

public IndexWriterConfig(Analyzer analyzer)

Source Link

Document

Creates a new config that with the provided Analyzer .

Usage

From source file:com.rapidminer.search.GlobalSearchIndexer.java

License:Open Source License

/**
 * Creates an instance of {@link IndexWriter}.
 *
 * @return the writer, never {@code null}
 * @throws IOException/* w w w .jav  a  2s.c  o m*/
 *       if something goes wrong
 */
private IndexWriter createIndexWriter() throws IOException {
    Directory dir = FSDirectory.open(indexDirectoryPath);
    IndexWriterConfig config = new IndexWriterConfig(GlobalSearchUtilities.ANALYZER);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    return new IndexWriter(dir, config);
}

From source file:com.ricky.codelab.lucene.LuceneIndexAndSearchDemo.java

License:Apache License

/**
 * //from   w w  w. j  a v a2 s .  c o m
 * ???
 * @param args
 */
public static void main(String[] args) {
    //Lucene Document??
    String fieldName = "text";
    //
    String text = "IK Analyzer???????";

    //IKAnalyzer?
    Analyzer analyzer = new IKAnalyzer(true);

    Directory directory = null;
    IndexWriter iwriter = null;
    IndexReader ireader = null;
    IndexSearcher isearcher = null;
    try {
        //
        directory = new RAMDirectory();

        //?IndexWriterConfig
        IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer);
        iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwriter = new IndexWriter(directory, iwConfig);
        //
        Document doc = new Document();
        doc.add(new StringField("ID", "10000", Field.Store.YES));
        doc.add(new TextField(fieldName, text, Field.Store.YES));
        iwriter.addDocument(doc);
        iwriter.close();

        //?**********************************
        //?   
        ireader = DirectoryReader.open(directory);
        isearcher = new IndexSearcher(ireader);

        String keyword = "?";
        //QueryParser?Query
        QueryParser qp = new QueryParser(fieldName, analyzer);
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query query = qp.parse(keyword);
        System.out.println("Query = " + query);

        //?5?
        TopDocs topDocs = isearcher.search(query, 5);
        System.out.println("" + topDocs.totalHits);
        //
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (int i = 0; i < topDocs.totalHits; i++) {
            Document targetDoc = isearcher.doc(scoreDocs[i].doc);
            System.out.println("" + targetDoc.toString());
        }

    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (directory != null) {
            try {
                directory.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat.java

License:Apache License

/** Make sure the final sub-block(s) are not skipped. */
public void testFinalBlock() throws Exception {
    Directory d = newDirectory();/*from  www . j  a v a 2s .c om*/
    IndexWriter w = new IndexWriter(d, new IndexWriterConfig(new MockAnalyzer(random())));
    for (int i = 0; i < 25; i++) {
        Document doc = new Document();
        doc.add(newStringField("field", Character.toString((char) (97 + i)), Field.Store.NO));
        doc.add(newStringField("field", "z" + Character.toString((char) (97 + i)), Field.Store.NO));
        w.addDocument(doc);
    }
    w.forceMerge(1);

    DirectoryReader r = DirectoryReader.open(w);
    assertEquals(1, r.leaves().size());
    RocanaFieldReader field = (RocanaFieldReader) r.leaves().get(0).reader().fields().terms("field");
    // We should see exactly two blocks: one root block (prefix empty string) and one block for z* terms (prefix z):
    RocanaStats stats = field.getStats();
    assertEquals(0, stats.floorBlockCount);
    assertEquals(2, stats.nonFloorBlockCount);
    r.close();
    w.close();
    d.close();
}

From source file:com.search.lucene.demo.facet.DistanceFacetsExample.java

License:Apache License

/** Build the example index. */
public void index() throws IOException {
    IndexWriter writer = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter

    // Add documents with latitude/longitude location:
    // we index these both as DoubleFields (for bounding box/ranges) and as NumericDocValuesFields (for scoring)
    Document doc = new Document();
    doc.add(new DoubleField("latitude", 40.759011, Field.Store.NO));
    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.759011)));
    doc.add(new DoubleField("longitude", -73.9844722, Field.Store.NO));
    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-73.9844722)));
    writer.addDocument(doc);//  w w  w .java  2  s.  co m

    doc = new Document();
    doc.add(new DoubleField("latitude", 40.718266, Field.Store.NO));
    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.718266)));
    doc.add(new DoubleField("longitude", -74.007819, Field.Store.NO));
    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.007819)));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new DoubleField("latitude", 40.7051157, Field.Store.NO));
    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.7051157)));
    doc.add(new DoubleField("longitude", -74.0088305, Field.Store.NO));
    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.0088305)));
    writer.addDocument(doc);

    // Open near-real-time searcher
    searcher = new IndexSearcher(DirectoryReader.open(writer, true));
    writer.close();
}

From source file:com.search.lucene.demo.facet.RangeFacetsExample.java

License:Apache License

/** Build the example index. */
public void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // Add documents with a fake timestamp, 1000 sec before
    // "now", 2000 sec before "now", ...:
    for (int i = 0; i < 100; i++) {
        Document doc = new Document();
        long then = nowSec - i * 1000;
        // Add as doc values field, so we can compute range facets:
        doc.add(new NumericDocValuesField("timestamp", then));
        // Add as numeric field so we can drill-down:
        doc.add(new LongField("timestamp", then, Field.Store.NO));
        indexWriter.addDocument(doc);//from ww  w. j a v a 2s .  c o  m
    }

    // Open near-real-time searcher
    searcher = new IndexSearcher(DirectoryReader.open(indexWriter, true));
    indexWriter.close();
}

From source file:com.search.lucene.demo.facet.SimpleSortedSetFacetsExample.java

License:Apache License

/** Build the example index. */
private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));
    Document doc = new Document();
    doc.add(new SortedSetDocValuesFacetField("Author", "Bob"));
    doc.add(new SortedSetDocValuesFacetField("Publish Year", "2010"));
    indexWriter.addDocument(config.build(doc));

    doc = new Document();
    doc.add(new SortedSetDocValuesFacetField("Author", "Lisa"));
    doc.add(new SortedSetDocValuesFacetField("Publish Year", "2010"));
    indexWriter.addDocument(config.build(doc));

    doc = new Document();
    doc.add(new SortedSetDocValuesFacetField("Author", "Lisa"));
    doc.add(new SortedSetDocValuesFacetField("Publish Year", "2012"));
    indexWriter.addDocument(config.build(doc));

    doc = new Document();
    doc.add(new SortedSetDocValuesFacetField("Author", "Susan"));
    doc.add(new SortedSetDocValuesFacetField("Publish Year", "2012"));
    indexWriter.addDocument(config.build(doc));

    doc = new Document();
    doc.add(new SortedSetDocValuesFacetField("Author", "Frank"));
    doc.add(new SortedSetDocValuesFacetField("Publish Year", "1999"));
    indexWriter.addDocument(config.build(doc));

    indexWriter.close();//  w  ww  .  j  a  v  a 2  s  .c om
}

From source file:com.searchcode.app.service.CodeIndexer.java

License:Open Source License

/**
 * Deletes all files that belong to a repository.
 * TODO I don't think this clears anything from the facets, which it should
 *///w  ww .  j a  v a2s .co m
public synchronized void deleteByReponame(String repoName) throws IOException {
    Directory dir = FSDirectory.open(Paths
            .get(Properties.getProperties().getProperty(Values.INDEXLOCATION, Values.DEFAULTINDEXLOCATION)));

    Analyzer analyzer = new CodeAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    IndexWriter writer = new IndexWriter(dir, iwc);

    writer.deleteDocuments(new Term(Values.REPONAME, repoName));
    writer.close();
}

From source file:com.searchcode.app.service.CodeIndexer.java

License:Open Source License

/**
 * Deletes a file from the index using the code id which seems to be
 * the most reliable way of doing it// ww  w .  ja v  a2 s .  c o  m
 * TODO Update the record and set the facets to a value we can ignore
 */
public synchronized void deleteByCodeId(String codeId) throws IOException {
    Directory dir = FSDirectory.open(Paths
            .get(Properties.getProperties().getProperty(Values.INDEXLOCATION, Values.DEFAULTINDEXLOCATION)));

    Analyzer analyzer = new CodeAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    IndexWriter writer = new IndexWriter(dir, iwc);

    try {
        QueryParser parser = new QueryParser(Values.CONTENTS, analyzer);
        Query query = parser.parse(Values.CODEID + ":" + QueryParser.escape(codeId));
        writer.deleteDocuments(query);
    } catch (Exception ex) {
        Singleton.getLogger().warning(
                "ERROR - caught a " + ex.getClass() + " in CodeIndexer\n with message: " + ex.getMessage());
    } finally {
        writer.close();
    }
}

From source file:com.searchcode.app.service.CodeIndexer.java

License:Open Source License

/**
 * Given a queue of documents to index, index them by popping the queue limited to default of 1000 items.
 * This method must be synchronized as we have not added any logic to deal with multiple threads writing to the
 * index./*from www  .j a  v a 2s .c o  m*/
 * TODO investigate how Lucene deals with multiple writes
 */
public synchronized void indexDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException {
    Directory indexDirectory = FSDirectory.open(this.INDEX_LOCATION);
    Directory facetDirectory = FSDirectory.open(this.FACET_LOCATION);

    Analyzer analyzer = new CodeAnalyzer();
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer);
    FacetsConfig facetsConfig;

    indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    IndexWriter writer = new IndexWriter(indexDirectory, indexWriterConfig);
    TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(facetDirectory);

    try {
        CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll();
        int count = 0;

        while (codeIndexDocument != null) {
            Singleton.getLogger()
                    .info("Indexing file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename());
            this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines());

            facetsConfig = new FacetsConfig();
            facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME);
            facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME);
            facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER);

            Document doc = this.buildDocument(codeIndexDocument);

            writer.updateDocument(
                    new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()),
                    facetsConfig.build(taxonomyWriter, doc));

            count++;
            if (count >= INDEX_QUEUE_BATCH_SIZE) {
                codeIndexDocument = null;
            } else {
                codeIndexDocument = codeIndexDocumentQueue.poll();
            }
        }
    } finally {
        try {
            writer.close();
        } finally {
            taxonomyWriter.close();
        }
        Singleton.getLogger().info("Closing writers");
    }
}

From source file:com.searchcode.app.service.CodeIndexer.java

License:Open Source License

/**
 * Given a queue of documents to index, index them by popping the queue limited to 1000 items.
 * This method must be synchronized as we have not added any logic to deal with multiple threads writing to the
 * index./*from  w w  w . j a  v  a2s .c o m*/
 * TODO investigate how Lucene deals with multiple writes
 */
public synchronized void indexTimeDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue)
        throws IOException {
    // Index all documents and commit at the end for performance gains
    Directory dir = FSDirectory.open(Paths.get(
            Properties.getProperties().getProperty(Values.TIMEINDEXLOCATION, Values.DEFAULTTIMEINDEXLOCATION)));
    Directory facetsdir = FSDirectory.open(Paths.get(Properties.getProperties()
            .getProperty(Values.TIMEINDEXFACETLOCATION, Values.DEFAULTTIMEINDEXFACETLOCATION)));

    Analyzer analyzer = new CodeAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    FacetsConfig facetsConfig;

    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    IndexWriter writer = new IndexWriter(dir, iwc);
    TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(facetsdir);

    try {
        CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll();
        int count = 0;

        while (codeIndexDocument != null) {
            Singleton.getLogger()
                    .info("Indexing time file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename());
            this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines());

            Document doc = new Document();
            // Path is the primary key for documents
            // needs to include repo location, project name and then filepath including file and revision
            Field pathField = new StringField("path",
                    codeIndexDocument.getRepoLocationRepoNameLocationFilename() + ":"
                            + codeIndexDocument.getRevision(),
                    Field.Store.YES);
            doc.add(pathField);

            // Add in facets
            facetsConfig = new FacetsConfig();
            facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME);
            facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME);
            facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER);
            facetsConfig.setIndexFieldName(Values.DATEYEARMONTHDAY, Values.DATEYEARMONTHDAY);
            facetsConfig.setIndexFieldName(Values.DATEYEARMONTH, Values.DATEYEARMONTH);
            facetsConfig.setIndexFieldName(Values.DATEYEAR, Values.DATEYEAR);
            facetsConfig.setIndexFieldName(Values.REVISION, Values.REVISION);
            facetsConfig.setIndexFieldName(Values.DELETED, Values.DELETED);

            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getLanguageName()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.LANGUAGENAME,
                        codeIndexDocument.getLanguageName()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRepoName()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.REPONAME, codeIndexDocument.getRepoName()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getCodeOwner()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.CODEOWNER, codeIndexDocument.getCodeOwner()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTHDAY,
                        codeIndexDocument.getYearMonthDay()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTH,
                        codeIndexDocument.getYearMonthDay().substring(0, 6)));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.DATEYEAR,
                        codeIndexDocument.getYearMonthDay().substring(0, 4)));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRevision()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.REVISION, codeIndexDocument.getRevision()));
            }
            if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.isDeleted()) == false) {
                doc.add(new SortedSetDocValuesFacetField(Values.DELETED, codeIndexDocument.isDeleted()));
            }

            String indexContents = Values.EMPTYSTRING;

            indexContents += this.searchcodeLib.splitKeywords(codeIndexDocument.getContents());
            indexContents += this.searchcodeLib.codeCleanPipeline(codeIndexDocument.getContents());
            this.searchcodeLib.addToSpellingCorrector(codeIndexDocument.getContents()); // Store in spelling corrector

            indexContents = indexContents.toLowerCase();

            doc.add(new TextField(Values.REPONAME, codeIndexDocument.getRepoName(), Field.Store.YES));
            doc.add(new TextField(Values.FILENAME, codeIndexDocument.getFileName(), Field.Store.YES));
            doc.add(new TextField(Values.FILELOCATION, codeIndexDocument.getFileLocation(), Field.Store.YES));
            doc.add(new TextField(Values.FILELOCATIONFILENAME, codeIndexDocument.getFileLocationFilename(),
                    Field.Store.YES));
            doc.add(new TextField(Values.MD5HASH, codeIndexDocument.getMd5hash(), Field.Store.YES));
            doc.add(new TextField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName(), Field.Store.YES));
            doc.add(new IntField(Values.CODELINES, codeIndexDocument.getCodeLines(), Field.Store.YES));
            doc.add(new TextField(Values.CONTENTS, indexContents, Field.Store.NO));
            doc.add(new TextField(Values.REPOLOCATION, codeIndexDocument.getRepoRemoteLocation(),
                    Field.Store.YES));
            doc.add(new TextField(Values.CODEOWNER, codeIndexDocument.getCodeOwner(), Field.Store.YES));
            doc.add(new TextField(Values.REVISION, codeIndexDocument.getRevision(), Field.Store.YES));
            doc.add(new TextField(Values.DATEYEARMONTHDAY, codeIndexDocument.getYearMonthDay(),
                    Field.Store.YES));
            doc.add(new TextField(Values.DATEYEARMONTH, codeIndexDocument.getYearMonth(), Field.Store.YES));
            doc.add(new TextField(Values.DATEYEAR, codeIndexDocument.getYear(), Field.Store.YES));
            doc.add(new TextField(Values.MESSAGE, codeIndexDocument.getMessage(), Field.Store.YES));
            doc.add(new TextField(Values.DELETED, codeIndexDocument.isDeleted(), Field.Store.YES));

            // Extra metadata in this case when it was last indexed
            doc.add(new LongField(Values.MODIFIED, new Date().getTime(), Field.Store.YES));

            writer.updateDocument(
                    new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()),
                    facetsConfig.build(taxoWriter, doc));

            count++;
            if (count >= INDEX_QUEUE_BATCH_SIZE) {
                codeIndexDocument = null;
            } else {
                codeIndexDocument = codeIndexDocumentQueue.poll();
            }
        }
    } finally {
        Singleton.getLogger().info("Closing writers");
        writer.close();
        taxoWriter.close();
    }
}