Example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig

List of usage examples for org.apache.lucene.index IndexWriterConfig IndexWriterConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig.

Prototype

public IndexWriterConfig(Analyzer analyzer) 

Source Link

Document

Creates a new config that with the provided Analyzer .

Usage

From source file:com.searchcode.app.service.IndexService.java

License:Open Source License

/**
 * Deletes a file from the index using the code id which seems to be
 * the most reliable way of doing it. Code id being a hash of the file
 * name and location.//ww  w  .  j a  v  a  2s.co  m
 * TODO Update the record and set the facets to a value we can ignore
 */
public synchronized void deleteByCodeId(String codeId) throws IOException {
    Directory dir = FSDirectory.open(this.INDEX_LOCATION);

    Analyzer analyzer = new CodeAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    try (IndexWriter writer = new IndexWriter(dir, iwc)) {
        QueryParser parser = new QueryParser(Values.CONTENTS, analyzer);
        Query query = parser.parse(Values.CODEID + ":" + QueryParser.escape(codeId));
        writer.deleteDocuments(query);
    } catch (Exception ex) {
        this.logger.warning(
                "ERROR - caught a " + ex.getClass() + " in CodeIndexer\n with message: " + ex.getMessage());
    }
}

From source file:com.searchcode.app.service.IndexService.java

License:Open Source License

/**
 * Deletes all files that belong to a repository.
 * TODO I don't think this clears anything from the facets, which it should
 *///w w  w  . j ava2s  .  co m
public synchronized void deleteByRepoName(String repoName) throws IOException {
    Directory dir = FSDirectory.open(this.INDEX_LOCATION);

    Analyzer analyzer = new CodeAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    IndexWriter writer = new IndexWriter(dir, iwc);

    writer.deleteDocuments(new Term(Values.REPONAME, repoName));
    writer.close();
}

From source file:com.serendio.lingo3g.CreateLuceneIndex.java

License:Open Source License

public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        System.out.println("Args: index-dir");
        System.exit(-1);//from   w w  w.  ja  va  2 s . c  o  m
    }

    File indexDir = new File(args[0]);
    if (indexDir.exists()) {
        System.out.println("Index directory already exists: " + indexDir.getAbsolutePath());
        System.exit(-2);
    }

    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    IndexWriter writer = new IndexWriter(FSDirectory.open(indexDir.toPath()), config);

    for (Document d : SampleDocumentData.DOCUMENTS_DATA_MINING) {
        final org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();
        /*
         * We will create Lucene documents with searchable "fullContent" field and "title", 
         * "url" and "snippet" fields for clustering.
         */
        doc.add(new TextField("fullContent", d.getSummary(), Store.NO));

        doc.add(new TextField("title", d.getTitle(), Store.YES));
        doc.add(new TextField("snippet", d.getSummary(), Store.YES));
        doc.add(new StringField("url", d.getContentUrl(), Store.YES));
        writer.addDocument(doc);
    }

    writer.close();
}

From source file:com.shaie.annots.AnnotationSearchExample.java

License:Apache License

public static void main(String[] args) throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriterConfig conf = new IndexWriterConfig(new WhitespaceAnalyzer());
    IndexWriter writer = new IndexWriter(dir, conf);

    // we need to add the annotation as a TokenStream field, therefore cannot use an Analyzer passed in the
    // IndexWriterConfig.
    Tokenizer tokenizer = new WhitespaceTokenizer();
    tokenizer.setReader(new StringReader("quick brown fox ate the blue red chicken"));
    TeeSinkTokenFilter textStream = new TeeSinkTokenFilter(tokenizer);
    TokenStream colorAnnotationStream = new AnnotatingTokenFilter(
            textStream.newSinkTokenStream(new ColorsSinkFilter()), COLOR_ANNOT_TERM);

    Document doc = new Document();
    doc.add(new TextField("text", textStream));
    doc.add(new TextField("annot", colorAnnotationStream));
    writer.addDocument(doc);/*from w w  w .  j  ava  2  s . co  m*/

    writer.close();

    DirectoryReader reader = DirectoryReader.open(dir);
    LeafReader ar = reader.leaves().get(0).reader(); // we only have one segment
    printFieldTerms(ar, "text");
    System.out.println();

    final ByteArrayDataInput in = new ByteArrayDataInput();
    PostingsEnum dape = ar.postings(new Term("annot", COLOR_ANNOT_TERM));
    int docID = dape.nextDoc();
    int freq = dape.freq();
    System.out.println("Color annotation spans: doc=" + docID + ", freq=" + freq);
    for (int i = 0; i < freq; i++) {
        dape.nextPosition();
        BytesRef payload = dape.getPayload();
        in.reset(payload.bytes, payload.offset, payload.length);
        System.out.println("  start=" + in.readVInt() + ", length=" + in.readVInt());
    }

    IndexSearcher searcher = new IndexSearcher(reader);

    System.out.println("\nsearching for 'red WITHIN color':");
    Query q = new SpanWithinQuery(new SpanAnnotationTermQuery(new Term("annot", COLOR_ANNOT_TERM)),
            new SpanInclusivePositionTermQuery(new Term("text", "red")));
    TopDocs td = searcher.search(q, 10);
    System.out.println("  num results: " + td.scoreDocs.length);

    System.out.println("\nsearching for 'ate WITHIN color':");
    q = new SpanWithinQuery(new SpanAnnotationTermQuery(new Term("annot", COLOR_ANNOT_TERM)),
            new SpanInclusivePositionTermQuery(new Term("text", "ate")));
    td = searcher.search(q, 10);
    System.out.println("  num results: " + td.scoreDocs.length);

    reader.close();
    dir.close();
}

From source file:com.shaie.annots.example.AnnotatorAnyExample.java

License:Apache License

@SuppressWarnings("resource")
public static void main(String[] args) throws Exception {
    final Directory dir = new RAMDirectory();
    final Analyzer analyzer = new WhitespaceAnalyzer();
    final IndexWriterConfig conf = new IndexWriterConfig(analyzer);
    final IndexWriter writer = new IndexWriter(dir, conf);

    addDocument(writer, "brown fox and a red dog");
    addDocument(writer, "only red dog");
    addDocument(writer, "no red animals here");
    writer.close();//from   w w  w.  j  a  va 2  s.c  o  m

    final QueryParser qp = new QueryParser(TEXT_FIELD, analyzer);
    qp.setAllowLeadingWildcard(true);

    final DirectoryReader reader = DirectoryReader.open(dir);
    final LeafReader leaf = reader.leaves().get(0).reader(); // We only have one segment
    IndexUtils.printFieldTerms(leaf, TEXT_FIELD, COLOR_FIELD, ANIMAL_FIELD);
    IndexUtils.printFieldTermsWithInfo(leaf, COLOR_FIELD, ANIMAL_FIELD);
    System.out.println();

    final IndexSearcher searcher = new IndexSearcher(reader);

    search(searcher, qp.parse("animal:" + AnyAnnotationTokenFilter.ANY_ANNOTATION_TERM + " AND color:"
            + AnyAnnotationTokenFilter.ANY_ANNOTATION_TERM));
    System.out.println();

    search(searcher, qp.parse("animal:" + AnyAnnotationTokenFilter.ANY_ANNOTATION_TERM + " AND color:red"));
    System.out.println();

    searchForRedAnimal(searcher);
    System.out.println();

    reader.close();
}

From source file:com.shaie.annots.example.AnnotatorTeeSinkFilterExample.java

License:Apache License

@SuppressWarnings("resource")
public static void main(String[] args) throws Exception {
    final Directory dir = new RAMDirectory();
    final Analyzer analyzer = new WhitespaceAnalyzer();
    final IndexWriterConfig conf = new IndexWriterConfig(analyzer);
    final IndexWriter writer = new IndexWriter(dir, conf);

    addDocument(writer, "brown fox and a red dog");
    addDocument(writer, "only red dog");
    addDocument(writer, "no red animals here");
    writer.close();/*from  w  ww  .  j a  v  a2 s.co m*/

    final QueryParser qp = new QueryParser(TEXT_FIELD, analyzer);
    qp.setAllowLeadingWildcard(true);

    final DirectoryReader reader = DirectoryReader.open(dir);
    final LeafReader leaf = reader.leaves().get(0).reader(); // We only have one segment
    IndexUtils.printFieldTerms(leaf, TEXT_FIELD, COLOR_FIELD, ANIMAL_FIELD);
    IndexUtils.printFieldTermsWithInfo(leaf, COLOR_FIELD, ANIMAL_FIELD);

    final IndexSearcher searcher = new IndexSearcher(reader);

    search(searcher, qp.parse("color:red"));
    System.out.println();

    search(searcher, qp.parse("animal:fox"));
    System.out.println();

    searchForBrownFox(searcher);
    System.out.println();

    search(searcher, qp.parse("animal:* AND color:*"));
    System.out.println();

    search(searcher, qp.parse("animal:* AND color:red"));
    System.out.println();

    reader.close();
}

From source file:com.shaie.annots.example.AnnotatorTokenFilterExample.java

License:Apache License

@SuppressWarnings("resource")
public static void main(String[] args) throws Exception {
    final Directory dir = new RAMDirectory();
    final Analyzer analyzer = createAnalyzer();
    final IndexWriterConfig conf = new IndexWriterConfig(analyzer);
    final IndexWriter writer = new IndexWriter(dir, conf);

    addDocument(writer, "brown fox and a red dog");
    addDocument(writer, "only red dog");
    addDocument(writer, "no red animals here");
    writer.close();/*from  ww w.  ja v  a  2 s  . c  om*/

    final QueryParser qp = new QueryParser(TEXT_FIELD, analyzer);
    qp.setAllowLeadingWildcard(true);

    final DirectoryReader reader = DirectoryReader.open(dir);
    final LeafReader leaf = reader.leaves().get(0).reader(); // We only have one segment
    IndexUtils.printFieldTerms(leaf, TEXT_FIELD, COLOR_FIELD, ANIMAL_FIELD);
    IndexUtils.printFieldTermsWithInfo(leaf, COLOR_FIELD, ANIMAL_FIELD);

    final IndexSearcher searcher = new IndexSearcher(reader);

    search(searcher, qp.parse("color:red"));
    System.out.println();

    search(searcher, qp.parse("animal:fox"));
    System.out.println();

    searchForBrownFox(searcher);
    System.out.println();

    search(searcher, qp.parse("animal:* AND color:*"));
    System.out.println();

    search(searcher, qp.parse("animal:* AND color:red"));
    System.out.println();

    reader.close();
}

From source file:com.shaie.annots.example.PreAnnotatedTokenFilterExample.java

License:Apache License

@SuppressWarnings("resource")
public static void main(String[] args) throws Exception {
    final Directory dir = new RAMDirectory();
    final Analyzer analyzer = new WhitespaceAnalyzer();
    final IndexWriterConfig conf = new IndexWriterConfig(analyzer);
    final IndexWriter writer = new IndexWriter(dir, conf);

    addDocument(writer, "quick rosy brown fox and a pale violet red dog", 1, 2, 2, 1, 6, 3, 7, 1, 8, 1);
    addDocument(writer, "only red dog", 1, 1);
    addDocument(writer, "man with red pale face", 2, 1);
    writer.close();// www .  jav  a 2  s  . c  o  m

    final QueryParser qp = new QueryParser(TEXT_FIELD, analyzer);
    qp.setAllowLeadingWildcard(true);

    final DirectoryReader reader = DirectoryReader.open(dir);
    final LeafReader leaf = reader.leaves().get(0).reader(); // We only have one segment
    IndexUtils.printFieldTerms(leaf, TEXT_FIELD, COLOR_FIELD);
    IndexUtils.printFieldTermsWithInfo(leaf, COLOR_FIELD);
    System.out.println();

    final IndexSearcher searcher = new IndexSearcher(reader);

    search(searcher, qp.parse("color:" + ANY_ANNOTATION_TERM));
    System.out.println();

    search(searcher, qp.parse("color:pale"));
    System.out.println();

    searchForColoredFox(searcher);
    System.out.println();

    reader.close();
}

From source file:com.shaie.facet.NotDrillDownExample.java

License:Apache License

private static void createIndex() throws IOException {
    try (Analyzer analyzer = new WhitespaceAnalyzer();
            IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(analyzer));
            TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir)) {
        for (final String author : new String[] { "Bob", "Lisa" }) {
            final Document doc = new Document();
            doc.add(new FacetField(AUTHOR_FACET, author));
            doc.add(new StoredField(AUTHOR_FACET, author));
            indexWriter.addDocument(config.build(taxoWriter, doc));
        }/*from   w  w  w .  j  a v  a2s  .  c  o m*/
    }
}

From source file:com.shaie.PhraseVsSpanQuery.java

License:Apache License

@SuppressWarnings("resource")
public static void main(String[] args) throws Exception {
    final Directory dir = new RAMDirectory();
    final IndexWriterConfig conf = new IndexWriterConfig(new WhitespaceAnalyzer());
    final IndexWriter writer = new IndexWriter(dir, conf);

    final Document doc = new Document();
    doc.add(new TextField("f", new TokenStream() {
        final PositionIncrementAttribute pos = addAttribute(PositionIncrementAttribute.class);
        final CharTermAttribute term = addAttribute(CharTermAttribute.class);
        boolean first = true, done = false;

        @Override//  w ww .  jav a2s.  com
        public boolean incrementToken() throws IOException {
            if (done) {
                return false;
            }
            if (first) {
                term.setEmpty().append("a");
                pos.setPositionIncrement(1);
                first = false;
            } else {
                term.setEmpty().append("b");
                pos.setPositionIncrement(0);
                done = true;
            }
            return true;
        }
    }));
    writer.addDocument(doc);
    writer.close();

    final DirectoryReader reader = DirectoryReader.open(dir);
    final IndexSearcher searcher = new IndexSearcher(reader);
    final LeafReader ar = reader.leaves().get(0).reader();
    final TermsEnum te = ar.terms("f").iterator();
    BytesRef scratch = new BytesRef();
    while ((scratch = te.next()) != null) {
        System.out.println(scratch.utf8ToString());
        final PostingsEnum dape = ar.postings(new Term("f", scratch.utf8ToString()));
        System.out.println("  doc=" + dape.nextDoc() + ", pos=" + dape.nextPosition());
    }

    System.out.println();

    // try a phrase query with a slop
    final PhraseQuery pqNoSlop = buildPhraseQuery(0);
    System.out.println("searching for \"a b\"; num results = " + searcher.search(pqNoSlop, 10).totalHits);

    final PhraseQuery pqSlop1 = buildPhraseQuery(1);
    System.out.println("searching for \"a b\"~1; num results = " + searcher.search(pqSlop1, 10).totalHits);

    final PhraseQuery pqSlop3 = buildPhraseQuery(3);
    System.out.println("searching for \"a b\"~3; num results = " + searcher.search(pqSlop3, 10).totalHits);

    final SpanNearQuery snqUnOrdered = new SpanNearQuery(
            new SpanQuery[] { new SpanTermQuery(new Term("f", "a")), new SpanTermQuery(new Term("f", "b")) }, 1,
            false);
    System.out.println("searching for SpanNearUnordered('a', 'b'), slop=1; num results = "
            + searcher.search(snqUnOrdered, 10).totalHits);

    final SpanNearQuery snqOrdered = new SpanNearQuery(
            new SpanQuery[] { new SpanTermQuery(new Term("f", "a")), new SpanTermQuery(new Term("f", "b")) }, 1,
            true);
    System.out.println("searching for SpanNearOrdered('a', 'b'), slop=1; num results = "
            + searcher.search(snqOrdered, 10).totalHits);

    reader.close();
}