Example usage for org.apache.lucene.index IndexWriter addDocument

List of usage examples for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Document

Adds a document to this index.

Usage

From source file:com.mathworks.xzheng.analysis.codec.MetaphoneAnalyzerTest.java

License:Apache License

public void testKoolKat() throws Exception {
    RAMDirectory directory = new RAMDirectory();
    Analyzer analyzer = new MetaphoneReplacementAnalyzer();

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);

    IndexWriter writer = new IndexWriter(directory, config);

    Document doc = new Document();
    doc.add(new Field("contents", //#A
            "cool cat", Field.Store.YES, Field.Index.ANALYZED));
    writer.addDocument(doc);
    writer.close();//from  ww w . ja  v  a2 s  .c o  m

    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(directory));

    Query query = new QueryParser(Version.LUCENE_46, //#B
            "contents", analyzer) //#B
                    .parse("kool kat"); //#B

    TopDocs hits = searcher.search(query, 1);
    assertEquals(1, hits.totalHits); //#C
    int docID = hits.scoreDocs[0].doc;
    doc = searcher.doc(docID);
    assertEquals("cool cat", doc.get("contents")); //#D

}

From source file:com.mathworks.xzheng.analysis.keyword.KeywordAnalyzerTest.java

License:Apache License

public void setUp() throws Exception {
    Directory directory = new RAMDirectory();

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, new SimpleAnalyzer(Version.LUCENE_46));

    IndexWriter writer = new IndexWriter(directory, config);

    Document doc = new Document();
    doc.add(new Field("partnum", "Q36", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); //A
    doc.add(new Field("description", "Illidium Space Modulator", Field.Store.YES, Field.Index.ANALYZED));
    writer.addDocument(doc);

    writer.close();// w  w w. ja  v  a  2  s . com

    searcher = new IndexSearcher(DirectoryReader.open(directory));
}

From source file:com.mathworks.xzheng.analysis.positional.PositionalPorterStopAnalyzerTest.java

License:Apache License

public void setUp() throws Exception {

    RAMDirectory directory = new RAMDirectory();
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, porterAnalyzer);
    IndexWriter writer = new IndexWriter(directory, config);

    Document doc = new Document();
    doc.add(new Field("contents", "The quick brown fox jumps over the lazy dog", Field.Store.YES,
            Field.Index.ANALYZED));
    writer.addDocument(doc);
    writer.close();/*  ww  w.j  a v  a 2s .c om*/
    searcher = new IndexSearcher(DirectoryReader.open(directory));
    parser = new QueryParser(Version.LUCENE_46, "contents", porterAnalyzer);
}

From source file:com.mathworks.xzheng.analysis.synonym.SynonymAnalyzerTest.java

License:Apache License

public void setUp() throws Exception {
    RAMDirectory directory = new RAMDirectory();

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, synonymAnalyzer);
    IndexWriter writer = new IndexWriter(directory, config); //#1
    Document doc = new Document();
    doc.add(new Field("content", "The quick brown fox jumps over the lazy dog", Field.Store.YES,
            Field.Index.ANALYZED)); //#2
    writer.addDocument(doc);

    writer.close();/*from   ww w. j a v a2  s.  c  om*/

    searcher = new IndexSearcher(DirectoryReader.open(directory));
}

From source file:com.mathworks.xzheng.analysis.UsingAnalyzersExample.java

License:Apache License

/**
 * This method doesn't do anything, except compile correctly.
 * This is used to show snippets of how Analyzers are used.
 *///  ww  w  .  ja v  a  2  s  . co m
public void someMethod() throws IOException, ParseException {
    RAMDirectory directory = new RAMDirectory();

    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_46);

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);
    IndexWriter writer = new IndexWriter(directory, config);

    Document doc = new Document();
    doc.add(new Field("title", "This is the title", Field.Store.YES, Field.Index.ANALYZED));
    doc.add(new Field("contents", "...document contents...", Field.Store.NO, Field.Index.ANALYZED));
    writer.addDocument(doc);

    writer.addDocument(doc, analyzer);

    String expression = "some query";

    Query query = new QueryParser(Version.LUCENE_46, "contents", analyzer).parse(expression);

    QueryParser parser = new QueryParser(Version.LUCENE_46, "contents", analyzer);
    query = parser.parse(expression);
}

From source file:com.mathworks.xzheng.common.CreateTestIndex.java

License:Apache License

public static void main(String[] args) throws IOException {
    String dataDir = args[0];/*www  . j av  a 2  s  .  c o  m*/
    String indexDir = args[1];
    List<File> results = new ArrayList<File>();
    findFiles(results, new File(dataDir));
    System.out.println(results.size() + " books to index");
    Directory dir = FSDirectory.open(new File(indexDir));
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, new MyStandardAnalyzer());
    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    IndexWriter w = new IndexWriter(dir, config);
    for (File file : results) {
        Document doc = getDocument(dataDir, file);
        w.addDocument(doc);
    }
    w.close();
    dir.close();
}

From source file:com.mathworks.xzheng.indexing.Fragments.java

License:Apache License

public void docBoostMethod() throws IOException {

    Directory dir = new RAMDirectory();
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46,
            new StandardAnalyzer(Version.LUCENE_46));
    IndexWriter writer = new IndexWriter(dir, config);

    // START/* www  .ja  v  a2s.c om*/
    Document doc = new Document();
    String senderEmail = getSenderEmail();
    String senderName = getSenderName();
    String subject = getSubject();
    String body = getBody();
    doc.add(new Field("senderEmail", senderEmail, Field.Store.YES, Field.Index.NOT_ANALYZED));
    doc.add(new Field("senderName", senderName, Field.Store.YES, Field.Index.ANALYZED));
    doc.add(new Field("subject", subject, Field.Store.YES, Field.Index.ANALYZED));
    doc.add(new Field("body", body, Field.Store.NO, Field.Index.ANALYZED));
    String lowerDomain = getSenderDomain().toLowerCase();
    if (isImportant(lowerDomain)) {
        doc.setBoost(1.5F); //1
    } else if (isUnimportant(lowerDomain)) {
        doc.setBoost(0.1F); //2 
    }
    writer.addDocument(doc);
    // END
    writer.close();

    /*
      #1 Good domain boost factor: 1.5
      #2 Bad domain boost factor: 0.1
    */
}

From source file:com.mathworks.xzheng.indexing.IndexingTest.java

License:Apache License

public void testMaxFieldLength() throws IOException {

    assertEquals(1, getHitCount("contents", "bridges")); //1
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46,
            new WhitespaceAnalyzer(Version.LUCENE_46));
    IndexWriter writer = new IndexWriter(directory, config); //2
    Document doc = new Document(); // 3
    doc.add(new Field("contents", "these bridges can't be found", // 3
            Field.Store.NO, Field.Index.ANALYZED)); // 3
    writer.addDocument(doc); // 3
    writer.close(); // 3

    assertEquals(1, getHitCount("contents", "bridges")); //4
}

From source file:com.mathworks.xzheng.indexing.VerboseIndexing.java

License:Apache License

private void index() throws IOException {

    Directory dir = new RAMDirectory();
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46,
            new WhitespaceAnalyzer(Version.LUCENE_46));
    IndexWriter writer = new IndexWriter(dir, config);

    config.setInfoStream(System.out);

    for (int i = 0; i < 100; i++) {
        Document doc = new Document();
        doc.add(new Field("keyword", "goober", Field.Store.YES, Field.Index.NOT_ANALYZED));
        writer.addDocument(doc);
    }/*from  w  w w  .  ja  v  a  2s . c o m*/
    //writer.optimize();
    writer.forceMerge(Integer.MAX_VALUE);
    writer.close();
}

From source file:com.mathworks.xzheng.searching.NearRealTimeTest.java

License:Apache License

public void testNearRealTime() throws Exception {
    Directory dir = new RAMDirectory();

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46,
            new StandardAnalyzer(Version.LUCENE_46));
    IndexWriter writer = new IndexWriter(dir, config);
    for (int i = 0; i < 10; i++) {
        Document doc = new Document();
        doc.add(new Field("id", "" + i, Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS));
        doc.add(new Field("text", "aaa", Field.Store.NO, Field.Index.ANALYZED));
        writer.addDocument(doc);
    }//from   ww  w .j av  a 2  s.  co m
    IndexReader reader = DirectoryReader.open(writer.getDirectory()); // #1
    IndexSearcher searcher = new IndexSearcher(reader); // #A

    Query query = new TermQuery(new Term("text", "aaa"));
    TopDocs docs = searcher.search(query, 1);
    assertEquals(10, docs.totalHits); // #B

    writer.deleteDocuments(new Term("id", "7")); // #2

    Document doc = new Document(); // #3
    doc.add(new Field("id", // #3
            "11", // #3
            Field.Store.NO, // #3
            Field.Index.NOT_ANALYZED_NO_NORMS)); // #3
    doc.add(new Field("text", // #3
            "bbb", // #3
            Field.Store.NO, // #3
            Field.Index.ANALYZED)); // #3
    writer.addDocument(doc); // #3

    //IndexReader newReader = reader.reopen();                 // #4
    IndexReader newReader = DirectoryReader.open(writer.getDirectory()); // #4
    assertFalse(reader == newReader); // #5
    reader.close(); // #6
    searcher = new IndexSearcher(newReader);

    TopDocs hits = searcher.search(query, 10); // #7
    assertEquals(9, hits.totalHits); // #7

    query = new TermQuery(new Term("text", "bbb")); // #8
    hits = searcher.search(query, 1); // #8
    assertEquals(1, hits.totalHits); // #8

    newReader.close();
    writer.close();
}