List of usage examples for org.apache.lucene.index IndexWriter addDocument
public long addDocument(Iterable<? extends IndexableField> doc) throws IOException
From source file:com.lucene.index.test.IKAnalyzerdemo.java
License:Apache License
/** * //from w ww . j av a2s . c o m * ??? * @param args */ public static void main(String[] args) { //Lucene Document?? String fieldName = "text"; // String text1 = "oracle,?"; String text2 = "?"; String text3 = "?"; //IKAnalyzer? Analyzer analyzer = new IKAnalyzer(); Directory directory1 = null; Directory directory2 = null; IndexWriter iwriter1 = null; IndexWriter iwriter2 = null; IndexReader ireader1 = null; IndexReader ireader2 = null; IndexSearcher isearcher = null; try { // directory1 = new RAMDirectory(); directory2 = new RAMDirectory(); //?IndexWriterConfig IndexWriterConfig iwConfig1 = new IndexWriterConfig(analyzer); iwConfig1.setOpenMode(OpenMode.CREATE); IndexWriterConfig iwConfig2 = new IndexWriterConfig(analyzer); iwConfig2.setOpenMode(OpenMode.CREATE); iwriter1 = new IndexWriter(directory1, iwConfig1); iwriter2 = new IndexWriter(directory2, iwConfig2); // Document doc1 = new Document(); doc1.add(new StringField("ID", "10000", Field.Store.YES)); doc1.add(new TextField("text1", text1, Field.Store.YES)); iwriter1.addDocument(doc1); Document doc2 = new Document(); doc2.add(new StringField("ID", "10001", Field.Store.YES)); doc2.add(new TextField("text2", text2, Field.Store.YES)); iwriter2.addDocument(doc2); iwriter1.close(); iwriter2.close(); //?********************************** //? ireader1 = DirectoryReader.open(directory1); ireader2 = DirectoryReader.open(directory2); IndexReader[] mreader = { ireader1, ireader2 }; MultiReader multiReader = new MultiReader(mreader); isearcher = new IndexSearcher(multiReader); String keyword = "?"; //QueryParser?Query String[] fields = { "text1", "text2" }; Map<String, Float> boosts = new HashMap<String, Float>(); boosts.put("text1", 5.0f); boosts.put("text2", 2.0f); /**MultiFieldQueryParser??? * */ MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer, boosts); Query query = parser.parse(keyword); System.out.println("Query = " + query); //?5? TopDocs topDocs = isearcher.search(query, 5); System.out.println("" + topDocs.totalHits); // ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (int i = 0; i < topDocs.totalHits; i++) { Document targetDoc = isearcher.doc(scoreDocs[i].doc); System.out.println("" + targetDoc.toString()); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } finally { if (ireader1 != null) { try { ireader1.close(); ireader2.close(); } catch (IOException e) { e.printStackTrace(); } } if (directory1 != null) { try { directory1.close(); directory2.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:com.lucene.index.test.IKAnalyzerdemoMutilField.java
License:Apache License
/** * /*w w w . j a v a 2 s . c o m*/ * ??? * @param args */ public static void main(String[] args) { //Lucene Document?? String fieldName = "text"; // String text1 = "oracle?"; String text2 = "?"; String text3 = "?"; //IKAnalyzer? Analyzer analyzer = new IKAnalyzer(); Directory directory = null; IndexWriter iwriter = null; IndexReader ireader = null; IndexSearcher isearcher = null; try { // directory = new RAMDirectory(); //?IndexWriterConfig IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer); iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); iwriter = new IndexWriter(directory, iwConfig); // Document doc1 = new Document(); doc1.add(new StringField("ID", "10000", Field.Store.YES)); doc1.add(new TextField(fieldName, text1, Field.Store.YES)); iwriter.addDocument(doc1); Document doc2 = new Document(); doc2.add(new StringField("ID", "10000", Field.Store.YES)); doc2.add(new TextField(fieldName, text2, Field.Store.YES)); iwriter.addDocument(doc2); Document doc3 = new Document(); doc3.add(new StringField("ID", "10000", Field.Store.YES)); doc3.add(new TextField(fieldName, text3, Field.Store.YES)); iwriter.addDocument(doc3); iwriter.close(); //?********************************** //? ireader = DirectoryReader.open(directory); isearcher = new IndexSearcher(ireader); String keyword = "?"; //QueryParser?Query QueryParser qp = new QueryParser(fieldName, analyzer); qp.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = qp.parse(keyword); System.out.println("Query = " + query); //?5? TopDocs topDocs = isearcher.search(query, 5); System.out.println("" + topDocs.totalHits); // ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (int i = 0; i < topDocs.totalHits; i++) { Document targetDoc = isearcher.doc(scoreDocs[i].doc); System.out.println("" + targetDoc.toString()); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { e.printStackTrace(); } } if (directory != null) { try { directory.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:com.lucene.index.test.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);/* w w w. j a v a 2 s. co m*/ // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", lastModified, Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): //System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.lucid.solr.sidecar.SidecarIndexReaderFactory.java
License:Apache License
DirectoryReader buildParallelReader(DirectoryReader main, SolrIndexSearcher source, boolean rebuild) { try {//from ww w . j a va 2 s . co m if (source == null) { throw new Exception("Source collection is missing."); } // create as a sibling path of the main index Directory d = main.directory(); File primaryDir = null; if (d instanceof FSDirectory) { String path = ((FSDirectory) d).getDirectory().getPath(); primaryDir = new File(path); sidecarIndex = new File(primaryDir.getParentFile(), sidecarIndexLocation); } else { String secondaryPath = System.getProperty("java.io.tmpdir") + File.separator + sidecarIndexLocation + "-" + System.currentTimeMillis(); sidecarIndex = new File(secondaryPath); } // create a new tmp dir for the secondary indexes File secondaryIndex = new File(sidecarIndex, System.currentTimeMillis() + "-index"); if (rebuild) { safeDelete(sidecarIndex); } parallelFields.addAll(source.getFieldNames()); parallelFields.remove("id"); LOG.debug("building a new index"); Directory dir = FSDirectory.open(secondaryIndex); if (IndexWriter.isLocked(dir)) { // try forcing unlock try { IndexWriter.unlock(dir); } catch (Exception e) { LOG.warn("Failed to unlock " + secondaryIndex); } } int[] mergeTargets; AtomicReader[] subReaders = SidecarIndexReader.getSequentialSubReaders(main); if (subReaders == null || subReaders.length == 0) { mergeTargets = new int[] { main.maxDoc() }; } else { mergeTargets = new int[subReaders.length]; for (int i = 0; i < subReaders.length; i++) { mergeTargets[i] = subReaders[i].maxDoc(); } } Version ver = currentCore.getLatestSchema().getDefaultLuceneMatchVersion(); IndexWriterConfig cfg = new IndexWriterConfig(ver, currentCore.getLatestSchema().getAnalyzer()); //cfg.setInfoStream(System.err); cfg.setMergeScheduler(new SerialMergeScheduler()); cfg.setMergePolicy(new SidecarMergePolicy(mergeTargets, false)); IndexWriter iw = new IndexWriter(dir, cfg); LOG.info("processing " + main.maxDoc() + " docs / " + main.numDeletedDocs() + " dels in main index"); int boostedDocs = 0; Bits live = MultiFields.getLiveDocs(main); int targetPos = 0; int nextTarget = mergeTargets[targetPos]; BytesRef idRef = new BytesRef(); for (int i = 0; i < main.maxDoc(); i++) { if (i == nextTarget) { iw.commit(); nextTarget = nextTarget + mergeTargets[++targetPos]; } if (live != null && !live.get(i)) { addDummy(iw); // this is required to preserve doc numbers. continue; } else { DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(docIdField); main.document(i, visitor); Document doc = visitor.getDocument(); // get docId String id = doc.get(docIdField); if (id == null) { LOG.debug("missing id, docNo=" + i); addDummy(iw); continue; } else { // find the data, if any doc = lookup(source, id, idRef, parallelFields); if (doc == null) { LOG.debug("missing boost data, docId=" + id); addDummy(iw); continue; } else { LOG.debug("adding boost data, docId=" + id + ", b=" + doc); iw.addDocument(doc); boostedDocs++; } } } } iw.close(); DirectoryReader other = DirectoryReader.open(dir); LOG.info("SidecarIndexReader with " + boostedDocs + " boosted documents."); SidecarIndexReader pr = createSidecarIndexReader(main, other, sourceCollection, secondaryIndex); return pr; } catch (Exception e) { LOG.warn("Unable to build parallel index: " + e.toString(), e); LOG.warn("Proceeding with single main index."); try { return new SidecarIndexReader(this, main, null, SidecarIndexReader.getSequentialSubReaders(main), sourceCollection, null); } catch (Exception e1) { LOG.warn("Unexpected exception, returning single main index", e1); return main; } } }
From source file:com.lucid.solr.sidecar.SidecarIndexReaderFactory.java
License:Apache License
private void addDummy(IndexWriter iw) throws IOException { Document dummy = new Document(); Field f = new Field("_" + boostField, "d", StringField.TYPE_NOT_STORED); dummy.add(f);//from w ww .ja v a2 s. com iw.addDocument(dummy); }
From source file:com.m3958.apps.pcms.lucene.facet.MultiCategoryListsFacetsExample.java
License:Apache License
private void add(IndexWriter indexWriter, FacetFields facetFields, String... categoryPaths) throws IOException { Document doc = new Document(); List<CategoryPath> paths = new ArrayList<CategoryPath>(); for (String categoryPath : categoryPaths) { paths.add(new CategoryPath(categoryPath, '/')); }//from ww w . j a v a 2s . co m facetFields.addFields(doc, paths); indexWriter.addDocument(doc); }
From source file:com.mathworks.xzheng.advsearching.MultiPhraseQueryTest.java
License:Apache License
protected void setUp() throws Exception { Directory directory = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, new WhitespaceAnalyzer(Version.LUCENE_46)); IndexWriter writer = new IndexWriter(directory, config); Document doc1 = new Document(); doc1.add(new Field("field", "the quick brown fox jumped over the lazy dog", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc1); Document doc2 = new Document(); doc2.add(new Field("field", "the fast fox hopped over the hound", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc2);//w ww.j a v a2 s . c om writer.close(); searcher = new IndexSearcher(DirectoryReader.open(directory)); }
From source file:com.mathworks.xzheng.advsearching.MultiSearcherTest.java
License:Apache License
public void setUp() throws Exception { String[] animals = { "aardvark", "beaver", "coati", "dog", "elephant", "frog", "gila monster", "horse", "iguana", "javelina", "kangaroo", "lemur", "moose", "nematode", "orca", "python", "quokka", "rat", "scorpion", "tarantula", "uromastyx", "vicuna", "walrus", "xiphias", "yak", "zebra" }; Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_46); Directory aTOmDirectory = new RAMDirectory(); // #1 Directory nTOzDirectory = new RAMDirectory(); // #1 IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer); IndexWriter aTOmWriter = new IndexWriter(aTOmDirectory, config); IndexWriter nTOzWriter = new IndexWriter(nTOzDirectory, config); for (int i = animals.length - 1; i >= 0; i--) { Document doc = new Document(); String animal = animals[i]; doc.add(new Field("animal", animal, Field.Store.YES, Field.Index.NOT_ANALYZED)); if (animal.charAt(0) < 'n') { aTOmWriter.addDocument(doc); // #2 } else {/*from w ww.j a va 2 s. c o m*/ nTOzWriter.addDocument(doc); // #2 } } aTOmWriter.close(); nTOzWriter.close(); searchers = new IndexSearcher[2]; searchers[0] = new IndexSearcher(DirectoryReader.open(aTOmDirectory)); searchers[1] = new IndexSearcher(DirectoryReader.open(nTOzDirectory)); }
From source file:com.mathworks.xzheng.advsearching.SecurityFilterTest.java
License:Apache License
protected void setUp() throws Exception { Directory directory = new RAMDirectory(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, new WhitespaceAnalyzer(Version.LUCENE_46)); IndexWriter writer = new IndexWriter(directory, config); Document document = new Document(); // 1 document.add(new Field("owner", // 1 "elwood", // 1 Field.Store.YES, // 1 Field.Index.NOT_ANALYZED)); // 1 document.add(new Field("keywords", // 1 "elwood's sensitive info", // 1 Field.Store.YES, // 1 Field.Index.ANALYZED)); // 1 writer.addDocument(document); document = new Document(); // 2 document.add(new Field("owner", // 2 "jake", // 2 Field.Store.YES, // 2 Field.Index.NOT_ANALYZED)); // 2 document.add(new Field("keywords", // 2 "jake's sensitive info", // 2 Field.Store.YES, // 2 Field.Index.ANALYZED)); // 2 writer.addDocument(document);//from ww w .j a v a 2 s .co m writer.close(); searcher = new IndexSearcher(DirectoryReader.open(directory)); }
From source file:com.mathworks.xzheng.advsearching.SpanQueryTest.java
License:Apache License
protected void setUp() throws Exception { directory = new RAMDirectory(); analyzer = new WhitespaceAnalyzer(Version.LUCENE_46); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer); IndexWriter writer = new IndexWriter(directory, config); Document doc = new Document(); doc.add(new Field("f", "the quick brown fox jumps over the lazy dog", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); doc = new Document(); doc.add(new Field("f", "the quick red fox jumps over the sleepy cat", Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc);//from ww w . j a v a2 s . c o m writer.close(); searcher = new IndexSearcher(DirectoryReader.open(directory)); reader = searcher.getIndexReader(); quick = new SpanTermQuery(new Term("f", "quick")); brown = new SpanTermQuery(new Term("f", "brown")); red = new SpanTermQuery(new Term("f", "red")); fox = new SpanTermQuery(new Term("f", "fox")); lazy = new SpanTermQuery(new Term("f", "lazy")); sleepy = new SpanTermQuery(new Term("f", "sleepy")); dog = new SpanTermQuery(new Term("f", "dog")); cat = new SpanTermQuery(new Term("f", "cat")); }