Example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig.

Prototype

public IndexWriterConfig(Analyzer analyzer)

Source Link

Document

Creates a new config that with the provided Analyzer .

Usage

From source file:com.searchcode.app.service.IndexService.java

License:Open Source License

/**
 * Deletes a file from the index using the code id which seems to be
 * the most reliable way of doing it. Code id being a hash of the file
 * name and location.//ww  w  .  j a  v  a  2s.co  m
 * TODO Update the record and set the facets to a value we can ignore
 */
public synchronized void deleteByCodeId(String codeId) throws IOException {
    Directory dir = FSDirectory.open(this.INDEX_LOCATION);

    Analyzer analyzer = new CodeAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    try (IndexWriter writer = new IndexWriter(dir, iwc)) {
        QueryParser parser = new QueryParser(Values.CONTENTS, analyzer);
        Query query = parser.parse(Values.CODEID + ":" + QueryParser.escape(codeId));
        writer.deleteDocuments(query);
    } catch (Exception ex) {
        this.logger.warning(
                "ERROR - caught a " + ex.getClass() + " in CodeIndexer\n with message: " + ex.getMessage());
    }
}

From source file:com.searchcode.app.service.IndexService.java

License:Open Source License

/**
 * Deletes all files that belong to a repository.
 * TODO I don't think this clears anything from the facets, which it should
 *///w w  w  . j ava2s  .  co m
public synchronized void deleteByRepoName(String repoName) throws IOException {
    Directory dir = FSDirectory.open(this.INDEX_LOCATION);

    Analyzer analyzer = new CodeAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    IndexWriter writer = new IndexWriter(dir, iwc);

    writer.deleteDocuments(new Term(Values.REPONAME, repoName));
    writer.close();
}

From source file:com.serendio.lingo3g.CreateLuceneIndex.java

License:Open Source License

public static void main(String[] args) throws Exception {
    if (args.length != 1) {
        System.out.println("Args: index-dir");
        System.exit(-1);//from   w w  w.  ja  va  2 s . c  o  m
    }

    File indexDir = new File(args[0]);
    if (indexDir.exists()) {
        System.out.println("Index directory already exists: " + indexDir.getAbsolutePath());
        System.exit(-2);
    }

    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig config = new IndexWriterConfig(analyzer);
    IndexWriter writer = new IndexWriter(FSDirectory.open(indexDir.toPath()), config);

    for (Document d : SampleDocumentData.DOCUMENTS_DATA_MINING) {
        final org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();
        /*
         * We will create Lucene documents with searchable "fullContent" field and "title", 
         * "url" and "snippet" fields for clustering.
         */
        doc.add(new TextField("fullContent", d.getSummary(), Store.NO));

        doc.add(new TextField("title", d.getTitle(), Store.YES));
        doc.add(new TextField("snippet", d.getSummary(), Store.YES));
        doc.add(new StringField("url", d.getContentUrl(), Store.YES));
        writer.addDocument(doc);
    }

    writer.close();
}

From source file:com.shaie.annots.AnnotationSearchExample.java

License:Apache License

public static void main(String[] args) throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriterConfig conf = new IndexWriterConfig(new WhitespaceAnalyzer());
    IndexWriter writer = new IndexWriter(dir, conf);

    // we need to add the annotation as a TokenStream field, therefore cannot use an Analyzer passed in the
    // IndexWriterConfig.
    Tokenizer tokenizer = new WhitespaceTokenizer();
    tokenizer.setReader(new StringReader("quick brown fox ate the blue red chicken"));
    TeeSinkTokenFilter textStream = new TeeSinkTokenFilter(tokenizer);
    TokenStream colorAnnotationStream = new AnnotatingTokenFilter(
            textStream.newSinkTokenStream(new ColorsSinkFilter()), COLOR_ANNOT_TERM);

    Document doc = new Document();
    doc.add(new TextField("text", textStream));
    doc.add(new TextField("annot", colorAnnotationStream));
    writer.addDocument(doc);/*from w w  w .  j  ava  2  s . co  m*/

    writer.close();

    DirectoryReader reader = DirectoryReader.open(dir);
    LeafReader ar = reader.leaves().get(0).reader(); // we only have one segment
    printFieldTerms(ar, "text");
    System.out.println();

    final ByteArrayDataInput in = new ByteArrayDataInput();
    PostingsEnum dape = ar.postings(new Term("annot", COLOR_ANNOT_TERM));
    int docID = dape.nextDoc();
    int freq = dape.freq();
    System.out.println("Color annotation spans: doc=" + docID + ", freq=" + freq);
    for (int i = 0; i < freq; i++) {
        dape.nextPosition();
        BytesRef payload = dape.getPayload();
        in.reset(payload.bytes, payload.offset, payload.length);
        System.out.println("  start=" + in.readVInt() + ", length=" + in.readVInt());
    }

    IndexSearcher searcher = new IndexSearcher(reader);

    System.out.println("\nsearching for 'red WITHIN color':");
    Query q = new SpanWithinQuery(new SpanAnnotationTermQuery(new Term("annot", COLOR_ANNOT_TERM)),
            new SpanInclusivePositionTermQuery(new Term("text", "red")));
    TopDocs td = searcher.search(q, 10);
    System.out.println("  num results: " + td.scoreDocs.length);

    System.out.println("\nsearching for 'ate WITHIN color':");
    q = new SpanWithinQuery(new SpanAnnotationTermQuery(new Term("annot", COLOR_ANNOT_TERM)),
            new SpanInclusivePositionTermQuery(new Term("text", "ate")));
    td = searcher.search(q, 10);
    System.out.println("  num results: " + td.scoreDocs.length);

    reader.close();
    dir.close();
}

From source file:com.shaie.annots.example.AnnotatorAnyExample.java

License:Apache License

@SuppressWarnings("resource")
public static void main(String[] args) throws Exception {
    final Directory dir = new RAMDirectory();
    final Analyzer analyzer = new WhitespaceAnalyzer();
    final IndexWriterConfig conf = new IndexWriterConfig(analyzer);
    final IndexWriter writer = new IndexWriter(dir, conf);

    addDocument(writer, "brown fox and a red dog");
    addDocument(writer, "only red dog");
    addDocument(writer, "no red animals here");
    writer.close();//from   w w  w.  j  a  va 2  s.c  o  m

    final QueryParser qp = new QueryParser(TEXT_FIELD, analyzer);
    qp.setAllowLeadingWildcard(true);

    final DirectoryReader reader = DirectoryReader.open(dir);
    final LeafReader leaf = reader.leaves().get(0).reader(); // We only have one segment
    IndexUtils.printFieldTerms(leaf, TEXT_FIELD, COLOR_FIELD, ANIMAL_FIELD);
    IndexUtils.printFieldTermsWithInfo(leaf, COLOR_FIELD, ANIMAL_FIELD);
    System.out.println();

    final IndexSearcher searcher = new IndexSearcher(reader);

    search(searcher, qp.parse("animal:" + AnyAnnotationTokenFilter.ANY_ANNOTATION_TERM + " AND color:"
            + AnyAnnotationTokenFilter.ANY_ANNOTATION_TERM));
    System.out.println();

    search(searcher, qp.parse("animal:" + AnyAnnotationTokenFilter.ANY_ANNOTATION_TERM + " AND color:red"));
    System.out.println();

    searchForRedAnimal(searcher);
    System.out.println();

    reader.close();
}

From source file:com.shaie.annots.example.AnnotatorTeeSinkFilterExample.java

License:Apache License

@SuppressWarnings("resource")
public static void main(String[] args) throws Exception {
    final Directory dir = new RAMDirectory();
    final Analyzer analyzer = new WhitespaceAnalyzer();
    final IndexWriterConfig conf = new IndexWriterConfig(analyzer);
    final IndexWriter writer = new IndexWriter(dir, conf);

    addDocument(writer, "brown fox and a red dog");
    addDocument(writer, "only red dog");
    addDocument(writer, "no red animals here");
    writer.close();/*from  w  ww  .  j a  v  a2 s.co m*/

    final QueryParser qp = new QueryParser(TEXT_FIELD, analyzer);
    qp.setAllowLeadingWildcard(true);

    final DirectoryReader reader = DirectoryReader.open(dir);
    final LeafReader leaf = reader.leaves().get(0).reader(); // We only have one segment
    IndexUtils.printFieldTerms(leaf, TEXT_FIELD, COLOR_FIELD, ANIMAL_FIELD);
    IndexUtils.printFieldTermsWithInfo(leaf, COLOR_FIELD, ANIMAL_FIELD);

    final IndexSearcher searcher = new IndexSearcher(reader);

    search(searcher, qp.parse("color:red"));
    System.out.println();

    search(searcher, qp.parse("animal:fox"));
    System.out.println();

    searchForBrownFox(searcher);
    System.out.println();

    search(searcher, qp.parse("animal:* AND color:*"));
    System.out.println();

    search(searcher, qp.parse("animal:* AND color:red"));
    System.out.println();

    reader.close();
}

From source file:com.shaie.annots.example.AnnotatorTokenFilterExample.java

License:Apache License

@SuppressWarnings("resource")
public static void main(String[] args) throws Exception {
    final Directory dir = new RAMDirectory();
    final Analyzer analyzer = createAnalyzer();
    final IndexWriterConfig conf = new IndexWriterConfig(analyzer);
    final IndexWriter writer = new IndexWriter(dir, conf);

    addDocument(writer, "brown fox and a red dog");
    addDocument(writer, "only red dog");
    addDocument(writer, "no red animals here");
    writer.close();/*from  ww w.  ja v  a  2 s  . c  om*/

    final QueryParser qp = new QueryParser(TEXT_FIELD, analyzer);
    qp.setAllowLeadingWildcard(true);

    final DirectoryReader reader = DirectoryReader.open(dir);
    final LeafReader leaf = reader.leaves().get(0).reader(); // We only have one segment
    IndexUtils.printFieldTerms(leaf, TEXT_FIELD, COLOR_FIELD, ANIMAL_FIELD);
    IndexUtils.printFieldTermsWithInfo(leaf, COLOR_FIELD, ANIMAL_FIELD);

    final IndexSearcher searcher = new IndexSearcher(reader);

    search(searcher, qp.parse("color:red"));
    System.out.println();

    search(searcher, qp.parse("animal:fox"));
    System.out.println();

    searchForBrownFox(searcher);
    System.out.println();

    search(searcher, qp.parse("animal:* AND color:*"));
    System.out.println();

    search(searcher, qp.parse("animal:* AND color:red"));
    System.out.println();

    reader.close();
}

From source file:com.shaie.annots.example.PreAnnotatedTokenFilterExample.java

License:Apache License

@SuppressWarnings("resource")
public static void main(String[] args) throws Exception {
    final Directory dir = new RAMDirectory();
    final Analyzer analyzer = new WhitespaceAnalyzer();
    final IndexWriterConfig conf = new IndexWriterConfig(analyzer);
    final IndexWriter writer = new IndexWriter(dir, conf);

    addDocument(writer, "quick rosy brown fox and a pale violet red dog", 1, 2, 2, 1, 6, 3, 7, 1, 8, 1);
    addDocument(writer, "only red dog", 1, 1);
    addDocument(writer, "man with red pale face", 2, 1);
    writer.close();// www .  jav  a 2  s  . c  o  m

    final QueryParser qp = new QueryParser(TEXT_FIELD, analyzer);
    qp.setAllowLeadingWildcard(true);

    final DirectoryReader reader = DirectoryReader.open(dir);
    final LeafReader leaf = reader.leaves().get(0).reader(); // We only have one segment
    IndexUtils.printFieldTerms(leaf, TEXT_FIELD, COLOR_FIELD);
    IndexUtils.printFieldTermsWithInfo(leaf, COLOR_FIELD);
    System.out.println();

    final IndexSearcher searcher = new IndexSearcher(reader);

    search(searcher, qp.parse("color:" + ANY_ANNOTATION_TERM));
    System.out.println();

    search(searcher, qp.parse("color:pale"));
    System.out.println();

    searchForColoredFox(searcher);
    System.out.println();

    reader.close();
}

From source file:com.shaie.facet.NotDrillDownExample.java

License:Apache License

private static void createIndex() throws IOException {
    try (Analyzer analyzer = new WhitespaceAnalyzer();
            IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(analyzer));
            TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir)) {
        for (final String author : new String[] { "Bob", "Lisa" }) {
            final Document doc = new Document();
            doc.add(new FacetField(AUTHOR_FACET, author));
            doc.add(new StoredField(AUTHOR_FACET, author));
            indexWriter.addDocument(config.build(taxoWriter, doc));
        }/*from   w  w  w .  j  a v  a2s  .  c  o m*/
    }
}

From source file:com.shaie.PhraseVsSpanQuery.java

License:Apache License

@SuppressWarnings("resource")
public static void main(String[] args) throws Exception {
    final Directory dir = new RAMDirectory();
    final IndexWriterConfig conf = new IndexWriterConfig(new WhitespaceAnalyzer());
    final IndexWriter writer = new IndexWriter(dir, conf);

    final Document doc = new Document();
    doc.add(new TextField("f", new TokenStream() {
        final PositionIncrementAttribute pos = addAttribute(PositionIncrementAttribute.class);
        final CharTermAttribute term = addAttribute(CharTermAttribute.class);
        boolean first = true, done = false;

        @Override//  w ww .  jav a2s.  com
        public boolean incrementToken() throws IOException {
            if (done) {
                return false;
            }
            if (first) {
                term.setEmpty().append("a");
                pos.setPositionIncrement(1);
                first = false;
            } else {
                term.setEmpty().append("b");
                pos.setPositionIncrement(0);
                done = true;
            }
            return true;
        }
    }));
    writer.addDocument(doc);
    writer.close();

    final DirectoryReader reader = DirectoryReader.open(dir);
    final IndexSearcher searcher = new IndexSearcher(reader);
    final LeafReader ar = reader.leaves().get(0).reader();
    final TermsEnum te = ar.terms("f").iterator();
    BytesRef scratch = new BytesRef();
    while ((scratch = te.next()) != null) {
        System.out.println(scratch.utf8ToString());
        final PostingsEnum dape = ar.postings(new Term("f", scratch.utf8ToString()));
        System.out.println("  doc=" + dape.nextDoc() + ", pos=" + dape.nextPosition());
    }

    System.out.println();

    // try a phrase query with a slop
    final PhraseQuery pqNoSlop = buildPhraseQuery(0);
    System.out.println("searching for \"a b\"; num results = " + searcher.search(pqNoSlop, 10).totalHits);

    final PhraseQuery pqSlop1 = buildPhraseQuery(1);
    System.out.println("searching for \"a b\"~1; num results = " + searcher.search(pqSlop1, 10).totalHits);

    final PhraseQuery pqSlop3 = buildPhraseQuery(3);
    System.out.println("searching for \"a b\"~3; num results = " + searcher.search(pqSlop3, 10).totalHits);

    final SpanNearQuery snqUnOrdered = new SpanNearQuery(
            new SpanQuery[] { new SpanTermQuery(new Term("f", "a")), new SpanTermQuery(new Term("f", "b")) }, 1,
            false);
    System.out.println("searching for SpanNearUnordered('a', 'b'), slop=1; num results = "
            + searcher.search(snqUnOrdered, 10).totalHits);

    final SpanNearQuery snqOrdered = new SpanNearQuery(
            new SpanQuery[] { new SpanTermQuery(new Term("f", "a")), new SpanTermQuery(new Term("f", "b")) }, 1,
            true);
    System.out.println("searching for SpanNearOrdered('a', 'b'), slop=1; num results = "
            + searcher.search(snqOrdered, 10).totalHits);

    reader.close();
}