Example usage for org.apache.lucene.index IndexWriter addDocument

List of usage examples for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Document

Adds a document to this index.

Usage

From source file:de.hybris.platform.lucenesearch.jalo.LuceneTest.java

License:Open Source License

@Test
public void testReindex() throws IOException {
    assertTermSearch(set(docA, docB, docC), "text");

    final IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_40,
            new StandardAnalyzer(Version.LUCENE_40)).setOpenMode(OpenMode.APPEND);
    final IndexWriter changewriter = new IndexWriter(directory, indexWriterConfig);

    changewriter.deleteDocuments(new Term("key", "b"));

    final Document docB2 = new Document();
    docB2.add(new Field("key", "b", Field.Store.YES, Field.Index.NOT_ANALYZED));
    docB2.add(new Field("text", "neuer texxxt zum zweiten document", Field.Store.YES, Field.Index.ANALYZED));
    changewriter.addDocument(docB2);
    changewriter.close();//www. j a va  2 s .  c o  m
    assertTermSearch(set(docA, docB2, docC), "zum");
    assertTermSearch(set(docA, docC), "text");
    assertTermSearch(set(docB2), "texxxt");
}

From source file:de.ingrid.search.utils.facet.DummyIndex.java

License:EUPL

private static File createTestIndex() {
    File indexDirectory = new File("./test_index");
    if (!indexDirectory.exists()) {
        try {/*from  w w w. j ava 2 s  .  c o m*/
            IndexWriter writer = new IndexWriter(FSDirectory.getDirectory(indexDirectory),
                    new StandardAnalyzer(), true, IndexWriter.MaxFieldLength.LIMITED);
            for (Object[][] doc : IndexDef) {
                Document document = new Document();
                for (Object[] fields : doc) {
                    document.add(new Field((String) fields[0], (String) fields[1], (Field.Store) fields[2],
                            (Field.Index) fields[3]));
                }
                writer.addDocument(document);
            }
            writer.close();
        } catch (CorruptIndexException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (LockObtainFailedException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        } catch (IOException e) {
            // TODO Auto-generated catch block
            e.printStackTrace();
        }
    }
    return indexDirectory;
}

From source file:de.ingrid.upgrader.service.ManifestIndexer.java

License:EUPL

private void index(final List<File> files) throws Exception {
    // create tmp index folder
    LOG.debug("  create tmp index folder");
    final File tmp = new File(_targetFolder, IKeys.TEMP_FOLDER);
    if (!tmp.exists()) {
        tmp.mkdirs();//from w  w  w.j  a  va 2 s .  c o m
    }

    // indexer
    LOG.debug("  initialize index writer");
    final StandardAnalyzer analyzer = new StandardAnalyzer();
    final IndexWriter writer = new IndexWriter(tmp, analyzer, true);

    // add files to index
    LOG.debug("  adding documents");
    for (final File file : files) {
        final Document doc = fileToDocument(file);
        if (doc != null) {
            writer.addDocument(doc);
        }
    }

    // optimize
    LOG.debug("  optimizing and closing writer");
    writer.optimize();
    writer.close();

    // close searcher
    final LuceneSearcher searcher = LuceneSearcher.getInstance();
    if (searcher != null) {
        searcher.closeReader();
    }

    // rename index
    LOG.debug("  renaming tmp index folder");
    final File folder = new File(_targetFolder, IKeys.INDEX_FOLDER);
    delete(folder);
    tmp.renameTo(folder);

    // open new searcher
    if (searcher == null) {
        LuceneSearcher.createInstance(folder);
    } else {
        searcher.openReader(folder);
    }
}

From source file:de.jetsli.lumeo.util.LuceneHelperTest.java

License:Apache License

@Test
public void testTermMatching() throws Exception {
    RAMDirectory dir = new RAMDirectory();
    IndexWriter w = new IndexWriter(dir, new IndexWriterConfig(RawLucene.VERSION, new KeywordAnalyzer()));
    Document d = new Document();

    FieldType ft = Mapping.getLongFieldType(true, true);
    d.add(new LongField("id", 1234, ft));
    d.add(new LongField("tmp", 1111, ft));
    w.addDocument(d);

    d = new Document();
    d.add(new LongField("id", 1234, ft));
    d.add(new LongField("tmp", 2222, ft));
    w.updateDocument(getTerm("id", 1234), d);

    d = new Document();
    d.add(new LongField("id", 0, ft));
    w.addDocument(d);/*from  w  w  w . j  a v a 2  s.  c  o m*/
    w.commit();

    IndexReader reader = DirectoryReader.open(w, true);
    IndexSearcher searcher = new IndexSearcher(reader);

    BytesRef bytes = new BytesRef();
    NumericUtils.longToPrefixCoded(1234, 0, bytes);
    TopDocs td = searcher.search(new TermQuery(new Term("id", bytes)), 10);
    assertEquals(1, td.totalHits);
    assertEquals(1234L, searcher.doc(td.scoreDocs[0].doc).getField("id").numericValue());
    assertEquals(2222L, searcher.doc(td.scoreDocs[0].doc).getField("tmp").numericValue());
    w.close();
}

From source file:de.ks.flatadocdb.index.LuceneIndex.java

License:Apache License

protected void writeEntry(SessionEntry sessionEntry, IndexWriter writer) throws IOException {
    LuceneDocumentExtractor luceneExtractor = sessionEntry.getEntityDescriptor().getLuceneExtractor();
    @SuppressWarnings("unchecked")
    Document document = luceneExtractor.createDocument(sessionEntry.getObject());
    if (document == null) {
        document = new Document();
    }//from  w  w w .j av a2s .  c  om
    String id = sessionEntry.getId();
    String fileName = sessionEntry.getFileName();
    NaturalId naturalId = sessionEntry.getNaturalId();
    appendStandardFields(document, id, fileName, naturalId);

    if (log.isTraceEnabled()) {
        document.getFields().forEach(f -> log.trace("Extracted field {} from {}({}). Vaue={}", //
                f.name(), sessionEntry.getObject(), sessionEntry.getFileName(), //
                f.stringValue().length() > 70 ? f.stringValue().substring(0, 70) : f.stringValue()));
    }
    writer.addDocument(document);
}

From source file:de.ks.lucene.LuceneTaggingTest.java

License:Apache License

@Test
public void testTags() throws Exception {
    IndexWriter writer = new IndexWriter(directory, new IndexWriterConfig(analyzer));

    List<String> allTags = Arrays.asList("Bla Blubb", "Blubb", "Blubber Huhu", "Bla Huhu", "Haha");
    for (String tag : allTags) {
        Document doc = new Document();
        doc.add(new TextField("tags", tag, Field.Store.YES));
        writer.addDocument(doc);
    }//from   www  .j a v  a 2s  .c  o m
    writer.close();

    DirectoryReader directoryReader = DirectoryReader.open(directory);
    IndexSearcher searcher = new IndexSearcher(directoryReader);
    String term = "blubb";
    TermQuery termQuery = new TermQuery(new Term("tags", term));
    TopDocs search = searcher.search(termQuery, 50);
    log("TermQuery", searcher, search);

    FuzzyQuery fuzzyQuery = new FuzzyQuery(new Term("tags", term));
    search = searcher.search(fuzzyQuery, 50);
    log("FuzzyQuery", searcher, search);

    BooleanQuery.Builder builder = new BooleanQuery.Builder();
    builder.add(new TermQuery(new Term("tags", "blubb")), BooleanClause.Occur.SHOULD);
    builder.add(new TermQuery(new Term("tags", "bla")), BooleanClause.Occur.SHOULD);
    BooleanQuery query = builder.build();
    search = searcher.search(query, 50);
    log("BooleanQuery", searcher, search);
}

From source file:de.lmu.ifi.dbs.elki.application.lucene.SimpleTextLoader.java

License:Open Source License

@Override
public void run() {
    try {//w w  w. ja va 2 s . c  om
        final Directory dir = FSDirectory.open(index);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_36,
                new StandardAnalyzer(Version.LUCENE_36));
        IndexWriter writer = new IndexWriter(dir, config);

        final URI suri = source.toURI();
        for (File inf : source.listFiles()) {
            Document doc = new Document();
            String id = suri.relativize(inf.toURI()).getPath();
            String text = FileUtil.slurp(new FileInputStream(inf));
            doc.add(new Field("id", id, Field.Store.YES, Field.Index.NOT_ANALYZED));
            doc.add(new Field("contents", text, Field.Store.YES, Field.Index.ANALYZED));
            writer.addDocument(doc);
        }
        writer.close();
    } catch (IOException e) {
        throw new AbortException("I/O error in lucene.", e);
    }
}

From source file:de.nava.informa.search.ChannelIndexer.java

License:Open Source License

/**
 * Index all given news items.//from w w  w  .j  av a2  s.  c  o m
 *
 * @param createNewIndex - Wether a new index should be generated or
 *                       an existant one should be taken into account.
 * @param items          - A collection of ItemIF objects.
 */
public void indexItems(boolean createNewIndex, Collection<ItemIF> items) throws java.io.IOException {
    if (LOG.isDebugEnabled()) {
        LOG.debug("Start writing index.");
    }
    IndexWriter writer = new IndexWriter(indexDir, analyzer, createNewIndex);
    try {
        for (ItemIF item1 : items) {
            if (LOG.isDebugEnabled()) {
                LOG.debug("Add item " + item1 + " to index.");
            }
            writer.addDocument(ItemDocument.makeDocument(item1));
        }
        writer.optimize();
        nrOfIndexedItems = writer.docCount();
    } finally {
        writer.close();
    }
    if (LOG.isDebugEnabled()) {
        LOG.info("Finished writing index.");
    }
}

From source file:de.qaware.chronix.lucene.client.add.LuceneAddingService.java

License:Apache License

/**
 * Adds the given collection of time series to the lucene index.
 * Converts the time series using the default object types of java and available lucene fields.
 * If an attribute of a time series is user defined data type then it is ignored.
 * <p>/*w  ww.  ja  v a 2 s  .c  om*/
 * Note: The add method do not commit the time series.
 *
 * @param converter   the converter to converter the time series into a lucene document
 * @param timeSeries  the collection with time series
 * @param indexWriter the lucene index writer
 * @return true if successful, otherwise false
 */
public static <T> boolean add(TimeSeriesConverter<T> converter, Collection<T> timeSeries,
        IndexWriter indexWriter) {

    if (timeSeries == null || timeSeries.isEmpty()) {
        LOGGER.debug("Collection is empty. Nothing to commit");
        return true;
    }

    timeSeries.parallelStream().forEach(ts -> {
        try {
            indexWriter.addDocument(convert(ts, converter));
        } catch (IOException e) {
            LOGGER.error("Could not add documents to lucene.", e);
        }
    });
    return true;
}

From source file:de.tuberlin.dima.cuttlefish.preprocessing.indexing.Indexer.java

License:Open Source License

public void index(File indexDir) throws Exception {
    Directory index = new SimpleFSDirectory(indexDir);
    Analyzer analyzer = featureExtraction.analyzerToUse();
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_42, analyzer);
    final IndexWriter writer = new IndexWriter(index, config);

    final AtomicInteger numDocsIndexed = new AtomicInteger(0);

    try {/*from   w ww.jav  a 2  s  .  c  o m*/

        newsItemExtractor.extract(new NewsItemProcessor() {
            @Override
            public void process(NewsItem newsItem) {

                try {
                    writer.addDocument(featureExtraction.asDocument(newsItem));
                    int numDocs = numDocsIndexed.incrementAndGet();
                    if (numDocs % 100 == 0) {
                        log.info("Indexed {} news articles", numDocs);
                    }

                } catch (IOException e) {
                    log.error("Failed to index news item", e);
                }
            }
        });

    } finally {
        writer.close(true);
    }

    log.info("Indexed {} news articles", numDocsIndexed.get());
}