Example usage for org.apache.lucene.index IndexWriter addDocument

List of usage examples for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Document

Adds a document to this index.

Usage

From source file:action.indexing.Fragments.java

License:Apache License

public void docBoostMethod() throws IOException {

    Directory dir = new RAMDirectory();
    IndexWriter writer = new IndexWriter(dir, new StandardAnalyzer(Version.LUCENE_30),
            IndexWriter.MaxFieldLength.UNLIMITED);

    // START/*from www  .  java2  s. com*/
    Document doc = new Document();
    String senderEmail = getSenderEmail();
    String senderName = getSenderName();
    String subject = getSubject();
    String body = getBody();
    doc.add(new Field("senderEmail", senderEmail, Field.Store.YES, Field.Index.NOT_ANALYZED));
    doc.add(new Field("senderName", senderName, Field.Store.YES, Field.Index.ANALYZED));
    doc.add(new Field("subject", subject, Field.Store.YES, Field.Index.ANALYZED));
    doc.add(new Field("body", body, Field.Store.NO, Field.Index.ANALYZED));
    String lowerDomain = getSenderDomain().toLowerCase();
    if (isImportant(lowerDomain)) {
        doc.setBoost(1.5F); //1
    } else if (isUnimportant(lowerDomain)) {
        doc.setBoost(0.1F); //2 
    }
    writer.addDocument(doc);
    // END
    writer.close();

    /*
      #1 Good domain boost factor: 1.5
      #2 Bad domain boost factor: 0.1
    */
}

From source file:action.indexing.IndexingTest.java

License:Apache License

protected void setUp() throws Exception { //1
    directory = new RAMDirectory();

    IndexWriter writer = getWriter(); //2

    for (int i = 0; i < ids.length; i++) { //3
        Document doc = new Document();
        doc.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
        doc.add(new Field("country", unindexed[i], Field.Store.YES, Field.Index.NO));
        doc.add(new Field("contents", unstored[i], Field.Store.NO, Field.Index.ANALYZED));
        doc.add(new Field("city", text[i], Field.Store.YES, Field.Index.ANALYZED));
        writer.addDocument(doc);
    }/*from   w ww  .  ja v  a  2 s  .c o m*/
    writer.close();
}

From source file:action.indexing.IndexingTest.java

License:Apache License

public void testMaxFieldLength() throws IOException {

    assertEquals(1, getHitCount("contents", "bridges")); //1

    IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), //2
            new IndexWriter.MaxFieldLength(1)); //2
    Document doc = new Document(); // 3
    doc.add(new Field("contents", "these bridges can't be found", // 3
            Field.Store.NO, Field.Index.ANALYZED)); // 3
    writer.addDocument(doc); // 3
    writer.close(); // 3

    assertEquals(1, getHitCount("contents", "bridges")); //4
}

From source file:action.indexing.VerboseIndexing.java

License:Apache License

private void index() throws IOException {

    Directory dir = new RAMDirectory();

    IndexWriter writer = new IndexWriter(dir, new WhitespaceAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);

    writer.setInfoStream(System.out);

    for (int i = 0; i < 100; i++) {
        Document doc = new Document();
        doc.add(new Field("keyword", "goober", Field.Store.YES, Field.Index.NOT_ANALYZED));
        writer.addDocument(doc);
    }/*from ww  w  .j  a  v  a 2 s .co m*/
    writer.optimize();
    writer.close();
}

From source file:analysis.SynonymAnalyzerTest.java

License:Apache License

public void setUp() throws Exception {
    RAMDirectory directory = new RAMDirectory();

    IndexWriter writer = new IndexWriter(directory, synonymAnalyzer, //#1  
            IndexWriter.MaxFieldLength.UNLIMITED);
    Document doc = new Document();
    doc.add(new Field("content", "The quick brown fox jumps over the lazy dog", Field.Store.YES,
            Field.Index.ANALYZED)); //#2
    writer.addDocument(doc);

    writer.close();/*from w ww  .  j ava2  s.  c  o m*/

    searcher = new IndexSearcher(directory, true);
}

From source file:antnlp.opie.indexsearch.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {

    InputStreamReader iReader = new InputStreamReader(Files.newInputStream(file), StandardCharsets.UTF_8);
    BufferedReader bufReader = new BufferedReader(iReader);

    String docLine = null;//w  w w  . jav  a 2  s  . c om
    while ((docLine = bufReader.readLine()) != null) {
        docLine = docLine.trim();
        if (docLine.length() == 0)
            continue;
        String[] column = docLine.split("\\t");
        System.out.println(column[0]);
        System.out.println(column[1]);

        // make a new, empty document
        Document doc = new Document();

        // Add the id of the file as a field named "id".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field docidField = new StringField("docid", column[0], Field.Store.YES);
        doc.add(docidField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents", column[1], Field.Store.YES));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + column[0]);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + column[0]);
            writer.updateDocument(new Term("docid", column[0]), doc);
        }
    }
    iReader.close();
    bufReader.close();
}

From source file:aos.lucene.analysis.codec.MetaphoneAnalyzerTest.java

License:Apache License

public void testKoolKat() throws Exception {

    RAMDirectory directory = new RAMDirectory();
    Analyzer analyzer = new MetaphoneReplacementAnalyzer();

    IndexWriter writer = new IndexWriter(directory, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);

    Document doc = new Document();
    doc.add(new Field("contents", "cool cat", Field.Store.YES, Field.Index.ANALYZED));
    writer.addDocument(doc);
    writer.close();/*from  ww w  .j a  v  a  2  s . c  om*/

    IndexSearcher searcher = new IndexSearcher(directory);

    Query query = new QueryParser(Version.LUCENE_46, "contents", analyzer).parse("kool kat");

    TopDocs hits = searcher.search(query, 1);
    assertEquals(1, hits.totalHits);
    int docID = hits.scoreDocs[0].doc;
    Document storedDoc = searcher.doc(docID);
    assertEquals("cool cat", storedDoc.get("contents"));

    searcher.close();
}

From source file:aos.lucene.analysis.Fragments.java

License:Apache License

public void frag2() throws Exception {
    IndexWriter writer = null;

    Document doc = new Document();
    doc.add(new Field("title", "This is the title", Field.Store.YES, Field.Index.ANALYZED));
    doc.add(new Field("contents", "...document contents...", Field.Store.NO, Field.Index.ANALYZED));
    writer.addDocument(doc);

}

From source file:aos.lucene.analysis.keyword.KeywordAnalyzerTest.java

License:Apache License

public void setUp() throws Exception {
    Directory directory = new RAMDirectory();

    IndexWriter writer = new IndexWriter(directory, new SimpleAnalyzer(), IndexWriter.MaxFieldLength.UNLIMITED);

    Document doc = new Document();
    doc.add(new Field("partnum", "Q36", Field.Store.NO, Field.Index.NOT_ANALYZED_NO_NORMS)); //A
    doc.add(new Field("description", "Illidium Space Modulator", Field.Store.YES, Field.Index.ANALYZED));
    writer.addDocument(doc);

    writer.close();//w  ww.  ja va  2  s  . c o  m

    searcher = new IndexSearcher(directory);
}

From source file:aos.lucene.analysis.positional.PositionalPorterStopAnalyzerTest.java

License:Apache License

public void setUp() throws Exception {

    RAMDirectory directory = new RAMDirectory();

    IndexWriter writer = new IndexWriter(directory, porterAnalyzer, IndexWriter.MaxFieldLength.UNLIMITED);

    Document doc = new Document();
    doc.add(new Field("contents", "The quick brown fox jumps over the lazy dog", Field.Store.YES,
            Field.Index.ANALYZED));
    writer.addDocument(doc);
    writer.close();//  w w w.  ja va 2s.c o  m
    searcher = new IndexSearcher(directory, true);
    parser = new QueryParser(Version.LUCENE_46, "contents", porterAnalyzer);
}