List of usage examples for org.apache.lucene.index IndexWriterConfig IndexWriterConfig
public IndexWriterConfig(Analyzer analyzer)
From source file:com.searchcode.app.service.IndexService.java
License:Open Source License
/** * Deletes a file from the index using the code id which seems to be * the most reliable way of doing it. Code id being a hash of the file * name and location.//ww w . j a v a 2s.co m * TODO Update the record and set the facets to a value we can ignore */ public synchronized void deleteByCodeId(String codeId) throws IOException { Directory dir = FSDirectory.open(this.INDEX_LOCATION); Analyzer analyzer = new CodeAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); try (IndexWriter writer = new IndexWriter(dir, iwc)) { QueryParser parser = new QueryParser(Values.CONTENTS, analyzer); Query query = parser.parse(Values.CODEID + ":" + QueryParser.escape(codeId)); writer.deleteDocuments(query); } catch (Exception ex) { this.logger.warning( "ERROR - caught a " + ex.getClass() + " in CodeIndexer\n with message: " + ex.getMessage()); } }
From source file:com.searchcode.app.service.IndexService.java
License:Open Source License
/** * Deletes all files that belong to a repository. * TODO I don't think this clears anything from the facets, which it should *///w w w . j ava2s . co m public synchronized void deleteByRepoName(String repoName) throws IOException { Directory dir = FSDirectory.open(this.INDEX_LOCATION); Analyzer analyzer = new CodeAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(dir, iwc); writer.deleteDocuments(new Term(Values.REPONAME, repoName)); writer.close(); }
From source file:com.serendio.lingo3g.CreateLuceneIndex.java
License:Open Source License
public static void main(String[] args) throws Exception { if (args.length != 1) { System.out.println("Args: index-dir"); System.exit(-1);//from w w w. ja va 2 s . c o m } File indexDir = new File(args[0]); if (indexDir.exists()) { System.out.println("Index directory already exists: " + indexDir.getAbsolutePath()); System.exit(-2); } Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig config = new IndexWriterConfig(analyzer); IndexWriter writer = new IndexWriter(FSDirectory.open(indexDir.toPath()), config); for (Document d : SampleDocumentData.DOCUMENTS_DATA_MINING) { final org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document(); /* * We will create Lucene documents with searchable "fullContent" field and "title", * "url" and "snippet" fields for clustering. */ doc.add(new TextField("fullContent", d.getSummary(), Store.NO)); doc.add(new TextField("title", d.getTitle(), Store.YES)); doc.add(new TextField("snippet", d.getSummary(), Store.YES)); doc.add(new StringField("url", d.getContentUrl(), Store.YES)); writer.addDocument(doc); } writer.close(); }
From source file:com.shaie.annots.AnnotationSearchExample.java
License:Apache License
public static void main(String[] args) throws Exception { Directory dir = new RAMDirectory(); IndexWriterConfig conf = new IndexWriterConfig(new WhitespaceAnalyzer()); IndexWriter writer = new IndexWriter(dir, conf); // we need to add the annotation as a TokenStream field, therefore cannot use an Analyzer passed in the // IndexWriterConfig. Tokenizer tokenizer = new WhitespaceTokenizer(); tokenizer.setReader(new StringReader("quick brown fox ate the blue red chicken")); TeeSinkTokenFilter textStream = new TeeSinkTokenFilter(tokenizer); TokenStream colorAnnotationStream = new AnnotatingTokenFilter( textStream.newSinkTokenStream(new ColorsSinkFilter()), COLOR_ANNOT_TERM); Document doc = new Document(); doc.add(new TextField("text", textStream)); doc.add(new TextField("annot", colorAnnotationStream)); writer.addDocument(doc);/*from w w w . j ava 2 s . co m*/ writer.close(); DirectoryReader reader = DirectoryReader.open(dir); LeafReader ar = reader.leaves().get(0).reader(); // we only have one segment printFieldTerms(ar, "text"); System.out.println(); final ByteArrayDataInput in = new ByteArrayDataInput(); PostingsEnum dape = ar.postings(new Term("annot", COLOR_ANNOT_TERM)); int docID = dape.nextDoc(); int freq = dape.freq(); System.out.println("Color annotation spans: doc=" + docID + ", freq=" + freq); for (int i = 0; i < freq; i++) { dape.nextPosition(); BytesRef payload = dape.getPayload(); in.reset(payload.bytes, payload.offset, payload.length); System.out.println(" start=" + in.readVInt() + ", length=" + in.readVInt()); } IndexSearcher searcher = new IndexSearcher(reader); System.out.println("\nsearching for 'red WITHIN color':"); Query q = new SpanWithinQuery(new SpanAnnotationTermQuery(new Term("annot", COLOR_ANNOT_TERM)), new SpanInclusivePositionTermQuery(new Term("text", "red"))); TopDocs td = searcher.search(q, 10); System.out.println(" num results: " + td.scoreDocs.length); System.out.println("\nsearching for 'ate WITHIN color':"); q = new SpanWithinQuery(new SpanAnnotationTermQuery(new Term("annot", COLOR_ANNOT_TERM)), new SpanInclusivePositionTermQuery(new Term("text", "ate"))); td = searcher.search(q, 10); System.out.println(" num results: " + td.scoreDocs.length); reader.close(); dir.close(); }
From source file:com.shaie.annots.example.AnnotatorAnyExample.java
License:Apache License
@SuppressWarnings("resource") public static void main(String[] args) throws Exception { final Directory dir = new RAMDirectory(); final Analyzer analyzer = new WhitespaceAnalyzer(); final IndexWriterConfig conf = new IndexWriterConfig(analyzer); final IndexWriter writer = new IndexWriter(dir, conf); addDocument(writer, "brown fox and a red dog"); addDocument(writer, "only red dog"); addDocument(writer, "no red animals here"); writer.close();//from w w w. j a va 2 s.c o m final QueryParser qp = new QueryParser(TEXT_FIELD, analyzer); qp.setAllowLeadingWildcard(true); final DirectoryReader reader = DirectoryReader.open(dir); final LeafReader leaf = reader.leaves().get(0).reader(); // We only have one segment IndexUtils.printFieldTerms(leaf, TEXT_FIELD, COLOR_FIELD, ANIMAL_FIELD); IndexUtils.printFieldTermsWithInfo(leaf, COLOR_FIELD, ANIMAL_FIELD); System.out.println(); final IndexSearcher searcher = new IndexSearcher(reader); search(searcher, qp.parse("animal:" + AnyAnnotationTokenFilter.ANY_ANNOTATION_TERM + " AND color:" + AnyAnnotationTokenFilter.ANY_ANNOTATION_TERM)); System.out.println(); search(searcher, qp.parse("animal:" + AnyAnnotationTokenFilter.ANY_ANNOTATION_TERM + " AND color:red")); System.out.println(); searchForRedAnimal(searcher); System.out.println(); reader.close(); }
From source file:com.shaie.annots.example.AnnotatorTeeSinkFilterExample.java
License:Apache License
@SuppressWarnings("resource") public static void main(String[] args) throws Exception { final Directory dir = new RAMDirectory(); final Analyzer analyzer = new WhitespaceAnalyzer(); final IndexWriterConfig conf = new IndexWriterConfig(analyzer); final IndexWriter writer = new IndexWriter(dir, conf); addDocument(writer, "brown fox and a red dog"); addDocument(writer, "only red dog"); addDocument(writer, "no red animals here"); writer.close();/*from w ww . j a v a2 s.co m*/ final QueryParser qp = new QueryParser(TEXT_FIELD, analyzer); qp.setAllowLeadingWildcard(true); final DirectoryReader reader = DirectoryReader.open(dir); final LeafReader leaf = reader.leaves().get(0).reader(); // We only have one segment IndexUtils.printFieldTerms(leaf, TEXT_FIELD, COLOR_FIELD, ANIMAL_FIELD); IndexUtils.printFieldTermsWithInfo(leaf, COLOR_FIELD, ANIMAL_FIELD); final IndexSearcher searcher = new IndexSearcher(reader); search(searcher, qp.parse("color:red")); System.out.println(); search(searcher, qp.parse("animal:fox")); System.out.println(); searchForBrownFox(searcher); System.out.println(); search(searcher, qp.parse("animal:* AND color:*")); System.out.println(); search(searcher, qp.parse("animal:* AND color:red")); System.out.println(); reader.close(); }
From source file:com.shaie.annots.example.AnnotatorTokenFilterExample.java
License:Apache License
@SuppressWarnings("resource") public static void main(String[] args) throws Exception { final Directory dir = new RAMDirectory(); final Analyzer analyzer = createAnalyzer(); final IndexWriterConfig conf = new IndexWriterConfig(analyzer); final IndexWriter writer = new IndexWriter(dir, conf); addDocument(writer, "brown fox and a red dog"); addDocument(writer, "only red dog"); addDocument(writer, "no red animals here"); writer.close();/*from ww w. ja v a 2 s . c om*/ final QueryParser qp = new QueryParser(TEXT_FIELD, analyzer); qp.setAllowLeadingWildcard(true); final DirectoryReader reader = DirectoryReader.open(dir); final LeafReader leaf = reader.leaves().get(0).reader(); // We only have one segment IndexUtils.printFieldTerms(leaf, TEXT_FIELD, COLOR_FIELD, ANIMAL_FIELD); IndexUtils.printFieldTermsWithInfo(leaf, COLOR_FIELD, ANIMAL_FIELD); final IndexSearcher searcher = new IndexSearcher(reader); search(searcher, qp.parse("color:red")); System.out.println(); search(searcher, qp.parse("animal:fox")); System.out.println(); searchForBrownFox(searcher); System.out.println(); search(searcher, qp.parse("animal:* AND color:*")); System.out.println(); search(searcher, qp.parse("animal:* AND color:red")); System.out.println(); reader.close(); }
From source file:com.shaie.annots.example.PreAnnotatedTokenFilterExample.java
License:Apache License
@SuppressWarnings("resource") public static void main(String[] args) throws Exception { final Directory dir = new RAMDirectory(); final Analyzer analyzer = new WhitespaceAnalyzer(); final IndexWriterConfig conf = new IndexWriterConfig(analyzer); final IndexWriter writer = new IndexWriter(dir, conf); addDocument(writer, "quick rosy brown fox and a pale violet red dog", 1, 2, 2, 1, 6, 3, 7, 1, 8, 1); addDocument(writer, "only red dog", 1, 1); addDocument(writer, "man with red pale face", 2, 1); writer.close();// www . jav a 2 s . c o m final QueryParser qp = new QueryParser(TEXT_FIELD, analyzer); qp.setAllowLeadingWildcard(true); final DirectoryReader reader = DirectoryReader.open(dir); final LeafReader leaf = reader.leaves().get(0).reader(); // We only have one segment IndexUtils.printFieldTerms(leaf, TEXT_FIELD, COLOR_FIELD); IndexUtils.printFieldTermsWithInfo(leaf, COLOR_FIELD); System.out.println(); final IndexSearcher searcher = new IndexSearcher(reader); search(searcher, qp.parse("color:" + ANY_ANNOTATION_TERM)); System.out.println(); search(searcher, qp.parse("color:pale")); System.out.println(); searchForColoredFox(searcher); System.out.println(); reader.close(); }
From source file:com.shaie.facet.NotDrillDownExample.java
License:Apache License
private static void createIndex() throws IOException { try (Analyzer analyzer = new WhitespaceAnalyzer(); IndexWriter indexWriter = new IndexWriter(indexDir, new IndexWriterConfig(analyzer)); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir)) { for (final String author : new String[] { "Bob", "Lisa" }) { final Document doc = new Document(); doc.add(new FacetField(AUTHOR_FACET, author)); doc.add(new StoredField(AUTHOR_FACET, author)); indexWriter.addDocument(config.build(taxoWriter, doc)); }/*from w w w . j a v a2s . c o m*/ } }
From source file:com.shaie.PhraseVsSpanQuery.java
License:Apache License
@SuppressWarnings("resource") public static void main(String[] args) throws Exception { final Directory dir = new RAMDirectory(); final IndexWriterConfig conf = new IndexWriterConfig(new WhitespaceAnalyzer()); final IndexWriter writer = new IndexWriter(dir, conf); final Document doc = new Document(); doc.add(new TextField("f", new TokenStream() { final PositionIncrementAttribute pos = addAttribute(PositionIncrementAttribute.class); final CharTermAttribute term = addAttribute(CharTermAttribute.class); boolean first = true, done = false; @Override// w ww . jav a2s. com public boolean incrementToken() throws IOException { if (done) { return false; } if (first) { term.setEmpty().append("a"); pos.setPositionIncrement(1); first = false; } else { term.setEmpty().append("b"); pos.setPositionIncrement(0); done = true; } return true; } })); writer.addDocument(doc); writer.close(); final DirectoryReader reader = DirectoryReader.open(dir); final IndexSearcher searcher = new IndexSearcher(reader); final LeafReader ar = reader.leaves().get(0).reader(); final TermsEnum te = ar.terms("f").iterator(); BytesRef scratch = new BytesRef(); while ((scratch = te.next()) != null) { System.out.println(scratch.utf8ToString()); final PostingsEnum dape = ar.postings(new Term("f", scratch.utf8ToString())); System.out.println(" doc=" + dape.nextDoc() + ", pos=" + dape.nextPosition()); } System.out.println(); // try a phrase query with a slop final PhraseQuery pqNoSlop = buildPhraseQuery(0); System.out.println("searching for \"a b\"; num results = " + searcher.search(pqNoSlop, 10).totalHits); final PhraseQuery pqSlop1 = buildPhraseQuery(1); System.out.println("searching for \"a b\"~1; num results = " + searcher.search(pqSlop1, 10).totalHits); final PhraseQuery pqSlop3 = buildPhraseQuery(3); System.out.println("searching for \"a b\"~3; num results = " + searcher.search(pqSlop3, 10).totalHits); final SpanNearQuery snqUnOrdered = new SpanNearQuery( new SpanQuery[] { new SpanTermQuery(new Term("f", "a")), new SpanTermQuery(new Term("f", "b")) }, 1, false); System.out.println("searching for SpanNearUnordered('a', 'b'), slop=1; num results = " + searcher.search(snqUnOrdered, 10).totalHits); final SpanNearQuery snqOrdered = new SpanNearQuery( new SpanQuery[] { new SpanTermQuery(new Term("f", "a")), new SpanTermQuery(new Term("f", "b")) }, 1, true); System.out.println("searching for SpanNearOrdered('a', 'b'), slop=1; num results = " + searcher.search(snqOrdered, 10).totalHits); reader.close(); }