Example usage for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException

Source Link

Document

Adds a document to this index.

Usage

From source file:action.indexing.Fragments.java

License:Apache License

public void docBoostMethod() throws IOException {

    Directory dir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_30),
            IndexWriter.MaxFieldLength.UNLIMITED);

    // START/*from www  .  java2  s. com*/
    Document doc = new Document();
    String senderEmail = getSenderEmail();
    String senderName = getSenderName();
    String subject = getSubject();
    String body = getBody();
    doc.add(new Field("senderEmail", senderEmail, Field.Store.YES, Field.Index.NOT_ANALYZED));
    doc.add(new Field("senderName", senderName, Field.Store.YES, Field.Index.ANALYZED));
    doc.add(new Field("subject", subject, Field.Store.YES, Field.Index.ANALYZED));
    doc.add(new Field("body", body, Field.Store.NO, Field.Index.ANALYZED));
    String lowerDomain = getSenderDomain().toLowerCase();
    if (isImportant(lowerDomain)) {
        doc.setBoost(1.5F); //1
    } else if (isUnimportant(lowerDomain)) {
        doc.setBoost(0.1F); //2 
    }
    writer.addDocument(doc);
    // END
    writer.close();

    /*
      #1 Good domain boost factor: 1.5
      #2 Bad domain boost factor: 0.1
    */
}

From source file:action.indexing.IndexingTest.java

License:Apache License

protected void setUp() throws Exception { //1
    directory = new RAMDirectory();

    IndexWriter writer = getWriter(); //2

    for (int i = 0; i < ids.length; i++) { //3
        Document doc = new Document();
        doc.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
        doc.add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO));
        doc.add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED));
        doc.add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED));
        writer.addDocument(doc);
    }/*from   w ww  .  ja v  a  2 s  .c o m*/
    writer.close();
}

From source file:action.indexing.IndexingTest.java

License:Apache License

public void testMaxFieldLength() throws IOException {

    assertEquals(1, getHitCount("contents", "bridges")); //1

    IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), //2
            new IndexWriter.MaxFieldLength(1)); //2
    Document doc = new Document(); // 3
    doc.add(new Field("contents", "these bridges can't be found", // 3
            Field.Store.NO, Field.Index.ANALYZED)); // 3
    writer.addDocument(doc); // 3
    writer.close(); // 3

    assertEquals(1, getHitCount("contents", "bridges")); //4
}

From source file:action.indexing.VerboseIndexing.java

License:Apache License

private void index() throws IOException {

    Directory dir = new RAMDirectory();

    IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);

    writer.setInfoStream(System.out);

    for (int i = 0; i < 100; i++) {
        Document doc = new Document();
        doc.add(new Field("keyword", "goober", Field.Store.YES, Field.Index.NOT_ANALYZED));
        writer.addDocument(doc);
    }/*from ww  w  .j  a  v  a 2 s .co m*/
    writer.optimize();
    writer.close();
}

From source file:analysis.SynonymAnalyzerTest.java

License:Apache License

public void setUp() throws Exception {
    RAMDirectory directory = new RAMDirectory();

    IndexWriter writer = new IndexWriter(directory, synonymAnalyzer, //#1  
            IndexWriter.MaxFieldLength.UNLIMITED);
    Document doc = new Document();
    doc.add(new Field("content", "The quick brown fox jumps over the lazy dog", Field.Store.YES,
            Field.Index.ANALYZED)); //#2
    writer.addDocument(doc);

    writer.close();/*from w ww  .  j ava2  s.  c  o m*/

    searcher = new IndexSearcher(directory, true);
}

From source file:antnlp.opie.indexsearch.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {

    InputStreamReader iReader = new InputStreamReader(Files.newInputStream(file), StandardCharsets.UTF_8);
    BufferedReader bufReader = new BufferedReader(iReader);

    String docLine = null;//w  w w  . jav  a 2  s  . c om
    while ((docLine = bufReader.readLine()) != null) {
        docLine = docLine.trim();
        if (docLine.length() == 0)
            continue;
        String[] column = docLine.split("\\t");
        System.out.println(column[0]);
        System.out.println(column[1]);

        // make a new, empty document
        Document doc = new Document();

        // Add the id of the file as a field named "id".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field docidField = new StringField("docid", column[0], Field.Store.YES);
        doc.add(docidField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents", column[1], Field.Store.YES));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + column[0]);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + column[0]);
            writer.updateDocument(new Term("docid", column[0]), doc);
        }
    }
    iReader.close();
    bufReader.close();
}

From source file:aos.lucene.analysis.codec.MetaphoneAnalyzerTest.java

License:Apache License

public void testKoolKat() throws Exception {

    RAMDirectory directory = new RAMDirectory();
    Analyzer analyzer = new MetaphoneReplacementAnalyzer();

    IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);

    Document doc = new Document();
    doc.add(new Field("contents", "cool cat", Field.Store.YES, Field.Index.ANALYZED));
    writer.addDocument(doc);
    writer.close();/*from  ww w  .j a  v  a  2  s . c  om*/

    IndexSearcher searcher = new IndexSearcher(directory);

    Query query = new QueryParser(Version.LUCENE_46, "contents", analyzer).parse("kool kat");

    TopDocs hits = searcher.search(query, 1);
    assertEquals(1, hits.totalHits);
    int docID = hits.scoreDocs[0].doc;
    Document storedDoc = searcher.doc(docID);
    assertEquals("cool cat", storedDoc.get("contents"));

    searcher.close();
}

From source file:aos.lucene.analysis.Fragments.java

License:Apache License

public void frag2() throws Exception {
    IndexWriter writer = null;

    Document doc = new Document();
    doc.add(new Field("title", "This is the title", Field.Store.YES, Field.Index.ANALYZED));
    doc.add(new Field("contents", "...document contents...", Field.Store.NO, Field.Index.ANALYZED));
    writer.addDocument(doc);

}

From source file:aos.lucene.analysis.keyword.KeywordAnalyzerTest.java

License:Apache License

public void setUp() throws Exception {
    Directory directory = new RAMDirectory();

    IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);

    Document doc = new Document();
    doc.add(new Field("partnum", "Q36", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); //A
    doc.add(new Field("description", "Illidium Space Modulator", Field.Store.YES, Field.Index.ANALYZED));
    writer.addDocument(doc);

    writer.close();//w  ww.  ja va  2  s  . c o  m

    searcher = new IndexSearcher(directory);
}

From source file:aos.lucene.analysis.positional.PositionalPorterStopAnalyzerTest.java

License:Apache License

public void setUp() throws Exception {

    RAMDirectory directory = new RAMDirectory();

    IndexWriter writer = new IndexWriter(directory, porterAnalyzer, IndexWriter.MaxFieldLength.UNLIMITED);

    Document doc = new Document();
    doc.add(new Field("contents", "The quick brown fox jumps over the lazy dog", Field.Store.YES,
            Field.Index.ANALYZED));
    writer.addDocument(doc);
    writer.close();//  w w w.  ja va 2s.c o  m
    searcher = new IndexSearcher(directory, true);
    parser = new QueryParser(Version.LUCENE_46, "contents", porterAnalyzer);
}