List of usage examples for org.apache.lucene.index IndexWriter addDocument
public long addDocument(Iterable<? extends IndexableField> doc) throws IOException
From source file:de.hybris.platform.lucenesearch.jalo.LuceneTest.java
License:Open Source License
@Test public void testReindex() throws IOException { assertTermSearch(set(docA, docB, docC), "text"); final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_40, new StandardAnalyzer(Version.LUCENE_40)).setOpenMode(OpenMode.APPEND); final IndexWriter changewriter = new IndexWriter(directory, indexWriterConfig); changewriter.deleteDocuments(new Term("key", "b")); final Document docB2 = new Document(); docB2.add(new Field("key", "b", Field.Store.YES, Field.Index.NOT_ANALYZED)); docB2.add(new Field("text", "neuer texxxt zum zweiten document", Field.Store.YES, Field.Index.ANALYZED)); changewriter.addDocument(docB2); changewriter.close();//www. j a va 2 s . c o m assertTermSearch(set(docA, docB2, docC), "zum"); assertTermSearch(set(docA, docC), "text"); assertTermSearch(set(docB2), "texxxt"); }
From source file:de.ingrid.search.utils.facet.DummyIndex.java
License:EUPL
private static File createTestIndex() { File indexDirectory = new File("./test_index"); if (!indexDirectory.exists()) { try {/*from w w w. j ava 2 s . c o m*/ IndexWriter writer = new IndexWriter(FSDirectory.getDirectory(indexDirectory), new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED); for (Object[][] doc : IndexDef) { Document document = new Document(); for (Object[] fields : doc) { document.add(new Field((String) fields[0], (String) fields[1], (Field.Store) fields[2], (Field.Index) fields[3])); } writer.addDocument(document); } writer.close(); } catch (CorruptIndexException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (LockObtainFailedException e) { // TODO Auto-generated catch block e.printStackTrace(); } catch (IOException e) { // TODO Auto-generated catch block e.printStackTrace(); } } return indexDirectory; }
From source file:de.ingrid.upgrader.service.ManifestIndexer.java
License:EUPL
private void index(final List<File> files) throws Exception { // create tmp index folder LOG.debug(" create tmp index folder"); final File tmp = new File(_targetFolder, IKeys.TEMP_FOLDER); if (!tmp.exists()) { tmp.mkdirs();//from w w w.j a va 2 s . c o m } // indexer LOG.debug(" initialize index writer"); final StandardAnalyzer analyzer = new StandardAnalyzer(); final IndexWriter writer = new IndexWriter(tmp, analyzer, true); // add files to index LOG.debug(" adding documents"); for (final File file : files) { final Document doc = fileToDocument(file); if (doc != null) { writer.addDocument(doc); } } // optimize LOG.debug(" optimizing and closing writer"); writer.optimize(); writer.close(); // close searcher final LuceneSearcher searcher = LuceneSearcher.getInstance(); if (searcher != null) { searcher.closeReader(); } // rename index LOG.debug(" renaming tmp index folder"); final File folder = new File(_targetFolder, IKeys.INDEX_FOLDER); delete(folder); tmp.renameTo(folder); // open new searcher if (searcher == null) { LuceneSearcher.createInstance(folder); } else { searcher.openReader(folder); } }
From source file:de.jetsli.lumeo.util.LuceneHelperTest.java
License:Apache License
@Test public void testTermMatching() throws Exception { RAMDirectory dir = new RAMDirectory(); IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(RawLucene.VERSION, new KeywordAnalyzer())); Document d = new Document(); FieldType ft = Mapping.getLongFieldType(true, true); d.add(new LongField("id", 1234, ft)); d.add(new LongField("tmp", 1111, ft)); w.addDocument(d); d = new Document(); d.add(new LongField("id", 1234, ft)); d.add(new LongField("tmp", 2222, ft)); w.updateDocument(getTerm("id", 1234), d); d = new Document(); d.add(new LongField("id", 0, ft)); w.addDocument(d);/*from w w w . j a v a 2 s. c o m*/ w.commit(); IndexReader reader = DirectoryReader.open(w, true); IndexSearcher searcher = new IndexSearcher(reader); BytesRef bytes = new BytesRef(); NumericUtils.longToPrefixCoded(1234, 0, bytes); TopDocs td = searcher.search(new TermQuery(new Term("id", bytes)), 10); assertEquals(1, td.totalHits); assertEquals(1234L, searcher.doc(td.scoreDocs[0].doc).getField("id").numericValue()); assertEquals(2222L, searcher.doc(td.scoreDocs[0].doc).getField("tmp").numericValue()); w.close(); }
From source file:de.ks.flatadocdb.index.LuceneIndex.java
License:Apache License
protected void writeEntry(SessionEntry sessionEntry, IndexWriter writer) throws IOException { LuceneDocumentExtractor luceneExtractor = sessionEntry.getEntityDescriptor().getLuceneExtractor(); @SuppressWarnings("unchecked") Document document = luceneExtractor.createDocument(sessionEntry.getObject()); if (document == null) { document = new Document(); }//from w w w .j av a2s . c om String id = sessionEntry.getId(); String fileName = sessionEntry.getFileName(); NaturalId naturalId = sessionEntry.getNaturalId(); appendStandardFields(document, id, fileName, naturalId); if (log.isTraceEnabled()) { document.getFields().forEach(f -> log.trace("Extracted field {} from {}({}). Vaue={}", // f.name(), sessionEntry.getObject(), sessionEntry.getFileName(), // f.stringValue().length() > 70 ? f.stringValue().substring(0, 70) : f.stringValue())); } writer.addDocument(document); }
From source file:de.ks.lucene.LuceneTaggingTest.java
License:Apache License
@Test public void testTags() throws Exception { IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(analyzer)); List<String> allTags = Arrays.asList("Bla Blubb", "Blubb", "Blubber Huhu", "Bla Huhu", "Haha"); for (String tag : allTags) { Document doc = new Document(); doc.add(new TextField("tags", tag, Field.Store.YES)); writer.addDocument(doc); }//from www .j a v a 2s .c o m writer.close(); DirectoryReader directoryReader = DirectoryReader.open(directory); IndexSearcher searcher = new IndexSearcher(directoryReader); String term = "blubb"; TermQuery termQuery = new TermQuery(new Term("tags", term)); TopDocs search = searcher.search(termQuery, 50); log("TermQuery", searcher, search); FuzzyQuery fuzzyQuery = new FuzzyQuery(new Term("tags", term)); search = searcher.search(fuzzyQuery, 50); log("FuzzyQuery", searcher, search); BooleanQuery.Builder builder = new BooleanQuery.Builder(); builder.add(new TermQuery(new Term("tags", "blubb")), BooleanClause.Occur.SHOULD); builder.add(new TermQuery(new Term("tags", "bla")), BooleanClause.Occur.SHOULD); BooleanQuery query = builder.build(); search = searcher.search(query, 50); log("BooleanQuery", searcher, search); }
From source file:de.lmu.ifi.dbs.elki.application.lucene.SimpleTextLoader.java
License:Open Source License
@Override public void run() { try {//w w w. ja va 2 s . c om final Directory dir = FSDirectory.open(index); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36, new StandardAnalyzer(Version.LUCENE_36)); IndexWriter writer = new IndexWriter(dir, config); final URI suri = source.toURI(); for (File inf : source.listFiles()) { Document doc = new Document(); String id = suri.relativize(inf.toURI()).getPath(); String text = FileUtil.slurp(new FileInputStream(inf)); doc.add(new Field("id", id, Field.Store.YES, Field.Index.NOT_ANALYZED)); doc.add(new Field("contents", text, Field.Store.YES, Field.Index.ANALYZED)); writer.addDocument(doc); } writer.close(); } catch (IOException e) { throw new AbortException("I/O error in lucene.", e); } }
From source file:de.nava.informa.search.ChannelIndexer.java
License:Open Source License
/** * Index all given news items.//from w w w .j av a2 s. c o m * * @param createNewIndex - Wether a new index should be generated or * an existant one should be taken into account. * @param items - A collection of ItemIF objects. */ public void indexItems(boolean createNewIndex, Collection<ItemIF> items) throws java.io.IOException { if (LOG.isDebugEnabled()) { LOG.debug("Start writing index."); } IndexWriter writer = new IndexWriter(indexDir, analyzer, createNewIndex); try { for (ItemIF item1 : items) { if (LOG.isDebugEnabled()) { LOG.debug("Add item " + item1 + " to index."); } writer.addDocument(ItemDocument.makeDocument(item1)); } writer.optimize(); nrOfIndexedItems = writer.docCount(); } finally { writer.close(); } if (LOG.isDebugEnabled()) { LOG.info("Finished writing index."); } }
From source file:de.qaware.chronix.lucene.client.add.LuceneAddingService.java
License:Apache License
/** * Adds the given collection of time series to the lucene index. * Converts the time series using the default object types of java and available lucene fields. * If an attribute of a time series is user defined data type then it is ignored. * <p>/*w ww. ja v a 2 s .c om*/ * Note: The add method do not commit the time series. * * @param converter the converter to converter the time series into a lucene document * @param timeSeries the collection with time series * @param indexWriter the lucene index writer * @return true if successful, otherwise false */ public static <T> boolean add(TimeSeriesConverter<T> converter, Collection<T> timeSeries, IndexWriter indexWriter) { if (timeSeries == null || timeSeries.isEmpty()) { LOGGER.debug("Collection is empty. Nothing to commit"); return true; } timeSeries.parallelStream().forEach(ts -> { try { indexWriter.addDocument(convert(ts, converter)); } catch (IOException e) { LOGGER.error("Could not add documents to lucene.", e); } }); return true; }
From source file:de.tuberlin.dima.cuttlefish.preprocessing.indexing.Indexer.java
License:Open Source License
public void index(File indexDir) throws Exception { Directory index = new SimpleFSDirectory(indexDir); Analyzer analyzer = featureExtraction.analyzerToUse(); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_42, analyzer); final IndexWriter writer = new IndexWriter(index, config); final AtomicInteger numDocsIndexed = new AtomicInteger(0); try {/*from w ww.jav a 2 s . c o m*/ newsItemExtractor.extract(new NewsItemProcessor() { @Override public void process(NewsItem newsItem) { try { writer.addDocument(featureExtraction.asDocument(newsItem)); int numDocs = numDocsIndexed.incrementAndGet(); if (numDocs % 100 == 0) { log.info("Indexed {} news articles", numDocs); } } catch (IOException e) { log.error("Failed to index news item", e); } } }); } finally { writer.close(true); } log.info("Indexed {} news articles", numDocsIndexed.get()); }