Example usage for org.apache.lucene.index IndexWriter addDocument

List of usage examples for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Document

Adds a document to this index.

Usage

From source file:com.redhat.satellite.search.index.ngram.tests.NGramTestSetup.java

License:Open Source License

/**
 * Creates an index in RAM// w ww  .ja  v  a2 s .co m
 * */
public void setUp() throws Exception {
    super.setUp();
    initItems();
    this.stanDir = new RAMDirectory();
    IndexWriter stanWriter = new IndexWriter(this.stanDir, new StandardAnalyzer(), true);

    this.ngramDir = new RAMDirectory();
    IndexWriter ngramWriter = new IndexWriter(this.ngramDir, new NGramAnalyzer(min_ngram, max_ngram), true);

    for (Map<String, String> item : items) {
        String name = item.get("name");
        String descp = item.get("description");
        Document doc = new Document();
        doc.add(new Field("name", name, Field.Store.YES, Field.Index.TOKENIZED));
        doc.add(new Field("description", descp, Field.Store.YES, Field.Index.TOKENIZED));
        stanWriter.addDocument(doc);
        ngramWriter.addDocument(doc);
    }
    stanWriter.close();
    ngramWriter.close();
}

From source file:com.redsqirl.SimpleFileIndexer.java

License:Open Source License

public void indexFileWithIndexWriter(IndexWriter indexWriter, File f, String suffix) throws IOException {
    if (f.isHidden() || f.isDirectory() || !f.canRead() || !f.exists()) {
        return;/*from   w  w w  . j ava 2s. c  om*/
    }
    if (suffix != null && !f.getName().endsWith(suffix)) {
        return;
    }
    logger.info("Indexing file " + f.getCanonicalPath());

    Document doc = new Document();
    doc.add(new Field("contents", new FileReader(f)));
    doc.add(new Field("filename", f.getCanonicalPath(), Field.Store.YES, Field.Index.ANALYZED));

    indexWriter.addDocument(doc);
}

From source file:com.revorg.goat.IndexManager.java

License:Open Source License

/**
 * Creates documents inside of the Lucene Index
 *
 * @param writer            Index Writer Class
 * @param rs                Result Set for the row of data
 * @param columnNamesArray  The array of column names to be added to the document
 * @param indexTypeArray    The array of column types to be added to the document
 * @param tempHTMLDir       The temporary HTML directory for HTML publishing
 * @throws Exception/*from www . j  a  va 2  s.  co  m*/
 * @return ActionResult
 */
private static String createDocument(IndexWriter writer, ResultSet rs, String columnNamesArray[],
        String indexTypeArray[], String tempHTMLDir) {

    try {
        final Document doc = new Document();
        int columns = columnNamesArray.length;

        /*
        public Field(String name, String value, Field.Store store, Field.Index index)
        Store:
               COMPRESS - Store the original field value in the index in a compressed form. This is useful for long documents and for binary valued fields. 
               YES -Store the original field value in the index. This is useful for short texts like a document's title which should be displayed with the results. 
        The value is stored in its original form, i.e. no analyzer is used before it is stored. 
               NO - Do not store the field value in the index. 
                                                           
        Index:
               ANALYZED -  Index the tokens produced by running the field's value through an Analyzer. This is useful for common text
               NOT_ANALYZED - Index the field's value without using an Analyzer, so it can be searched. As no analyzer is used the value will be stored as a single term. 
        This is useful for unique Ids like product numbers.
               NO - Do not index the field value. This field can thus not be searched, but one can still access its contents provided it is stored. 
        */

        for (int i = 0; i < columns; i++) {
            String columnName = columnNamesArray[i].trim().toLowerCase();
            String columnIndexType = indexTypeArray[i]; //Map Column Type To Array
            String columnValue = rs.getString(columnName); //Get Value But Result Sets are at 1 Not 0
            if (columnValue == null) { //Lucene Does Not Like Nulls
                columnValue = "";
            }
            //System.out.println("   Values: " + columnName +  " " + columnIndexType + " " + columnValue + " " + columnValue.length());
            //Can't Add Triggers
            if (columnIndexType.equalsIgnoreCase("TriggerUpdate") == false
                    || columnIndexType.equalsIgnoreCase("TriggerDelete") == false) {
                if (columnIndexType.equalsIgnoreCase("PrimaryKey")
                        || columnIndexType.equalsIgnoreCase("Keyword")
                        || columnIndexType.equalsIgnoreCase("Date")) {
                    //Format Dates to Correct for Sorting
                    if (columnIndexType.equalsIgnoreCase("Date")) {
                        columnValue = columnValue.replace("/", "");
                    }

                    doc.add(new Field(columnName, columnValue, Field.Store.YES, Field.Index.NOT_ANALYZED));
                }
                //UnIndexed of UnIndexed
                else if (columnIndexType.equalsIgnoreCase("UnIndexed")) {
                    doc.add(new Field(columnName, columnValue, Field.Store.YES, Field.Index.NO));
                } else if (columnIndexType.equalsIgnoreCase("Text")) {
                    doc.add(new Field(columnName, columnValue, Field.Store.YES, Field.Index.ANALYZED));
                } else if (columnIndexType.equalsIgnoreCase("UnStored")
                        || columnIndexType.equalsIgnoreCase("HTML")) {
                    if (columnIndexType.equalsIgnoreCase("HTML") && columnValue.length() != 0) {
                        String htmlString = tempHTMLDir + Utilities.CreateUUID() + ".html";
                        File htmlFile = new File(htmlString);
                        BufferedWriter out = new BufferedWriter(new FileWriter(htmlString));
                        out.write(columnValue);
                        out.close();

                        //Parse Document              
                        FileInputStream fis = new FileInputStream(htmlFile);
                        HTMLParser parser = new HTMLParser(fis);
                        // Add the tag-stripped contents as a Reader-valued Text field so it will
                        // get tokenized and indexed.
                        doc.add(new Field(columnName, parser.getReader()));

                        //Parse HTML
                    }
                    //UnStored Field
                    else {
                        doc.add(new Field(columnName, columnValue, Field.Store.NO, Field.Index.ANALYZED));
                    }

                } else if (columnIndexType.equalsIgnoreCase("Binary")) {
                    doc.add(new Field(columnName, columnValue, Field.Store.COMPRESS, Field.Index.NO));
                }
            }
        }

        //Add Document Here
        //System.out.println(doc); 
        writer.addDocument(doc);
        ActionResult = "Success";
        return ActionResult;

    }

    catch (Exception e) {
        ActionResultError = " caught a " + e.getClass() + " with message: " + e.getMessage();
        //System.out.println("Failure of DbSchema File: " + xmlFile);
    }
    ActionResult = "Failure";
    return ActionResult + ActionResultError;
}

From source file:com.ricky.codelab.lucene.LuceneIndexAndSearchDemo.java

License:Apache License

/**
 * /*w w w.  j  a  v  a2s.c o m*/
 * ???
 * @param args
 */
public static void main(String[] args) {
    //Lucene Document??
    String fieldName = "text";
    //
    String text = "IK Analyzer???????";

    //IKAnalyzer?
    Analyzer analyzer = new IKAnalyzer(true);

    Directory directory = null;
    IndexWriter iwriter = null;
    IndexReader ireader = null;
    IndexSearcher isearcher = null;
    try {
        //
        directory = new RAMDirectory();

        //?IndexWriterConfig
        IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer);
        iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwriter = new IndexWriter(directory, iwConfig);
        //
        Document doc = new Document();
        doc.add(new StringField("ID", "10000", Field.Store.YES));
        doc.add(new TextField(fieldName, text, Field.Store.YES));
        iwriter.addDocument(doc);
        iwriter.close();

        //?**********************************
        //?   
        ireader = DirectoryReader.open(directory);
        isearcher = new IndexSearcher(ireader);

        String keyword = "?";
        //QueryParser?Query
        QueryParser qp = new QueryParser(fieldName, analyzer);
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query query = qp.parse(keyword);
        System.out.println("Query = " + query);

        //?5?
        TopDocs topDocs = isearcher.search(query, 5);
        System.out.println("" + topDocs.totalHits);
        //
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (int i = 0; i < topDocs.totalHits; i++) {
            Document targetDoc = isearcher.doc(scoreDocs[i].doc);
            System.out.println("" + targetDoc.toString());
        }

    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (directory != null) {
            try {
                directory.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat.java

License:Apache License

/** Make sure the final sub-block(s) are not skipped. */
public void testFinalBlock() throws Exception {
    Directory d = newDirectory();/*from www  .  j  a v  a2  s.  c  o  m*/
    IndexWriter w = new IndexWriter(d, new IndexWriterConfig(new MockAnalyzer(random())));
    for (int i = 0; i < 25; i++) {
        Document doc = new Document();
        doc.add(newStringField("field", Character.toString((char) (97 + i)), Field.Store.NO));
        doc.add(newStringField("field", "z" + Character.toString((char) (97 + i)), Field.Store.NO));
        w.addDocument(doc);
    }
    w.forceMerge(1);

    DirectoryReader r = DirectoryReader.open(w);
    assertEquals(1, r.leaves().size());
    RocanaFieldReader field = (RocanaFieldReader) r.leaves().get(0).reader().fields().terms("field");
    // We should see exactly two blocks: one root block (prefix empty string) and one block for z* terms (prefix z):
    RocanaStats stats = field.getStats();
    assertEquals(0, stats.floorBlockCount);
    assertEquals(2, stats.nonFloorBlockCount);
    r.close();
    w.close();
    d.close();
}

From source file:com.rocana.lucene.codec.v1.TestRocanaPerFieldPostingsFormat2.java

License:Apache License

private void addDocs(IndexWriter writer, int numDocs) throws IOException {
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        doc.add(newTextField("content", "aaa", Field.Store.NO));
        writer.addDocument(doc);
    }//  www. j  a  v a 2 s.  c om
}

From source file:com.rocana.lucene.codec.v1.TestRocanaPerFieldPostingsFormat2.java

License:Apache License

private void addDocs2(IndexWriter writer, int numDocs) throws IOException {
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        doc.add(newTextField("content", "bbb", Field.Store.NO));
        writer.addDocument(doc);
    }/*from  w  ww  .  j  a va 2 s. c  o  m*/
}

From source file:com.rocana.lucene.codec.v1.TestRocanaPerFieldPostingsFormat2.java

License:Apache License

private void addDocs3(IndexWriter writer, int numDocs) throws IOException {
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        doc.add(newTextField("content", "ccc", Field.Store.NO));
        doc.add(newStringField("id", "" + i, Field.Store.YES));
        writer.addDocument(doc);
    }/*from w ww  . j  a v  a  2s .  c  o m*/
}

From source file:com.rocana.lucene.codec.v1.TestRocanaPerFieldPostingsFormat2.java

License:Apache License

@Test
public void testStressPerFieldCodec() throws IOException {
    Directory dir = newDirectory(random());
    final int docsPerRound = 97;
    int numRounds = atLeast(1);
    for (int i = 0; i < numRounds; i++) {
        int num = TestUtil.nextInt(random(), 30, 60);
        IndexWriterConfig config = newIndexWriterConfig(random(), new MockAnalyzer(random()));
        config.setOpenMode(OpenMode.CREATE_OR_APPEND);
        IndexWriter writer = newWriter(dir, config);
        for (int j = 0; j < docsPerRound; j++) {
            final Document doc = new Document();
            for (int k = 0; k < num; k++) {
                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.setTokenized(random().nextBoolean());
                customType.setOmitNorms(random().nextBoolean());
                Field field = newField("" + k, TestUtil.randomRealisticUnicodeString(random(), 128),
                        customType);//  w  ww  .j a v a  2 s . c o  m
                doc.add(field);
            }
            writer.addDocument(doc);
        }
        if (random().nextBoolean()) {
            writer.forceMerge(1);
        }
        writer.commit();
        assertEquals((i + 1) * docsPerRound, writer.maxDoc());
        writer.close();
    }
    dir.close();
}

From source file:com.search.lucene.demo.facet.DistanceFacetsExample.java

License:Apache License

/** Build the example index. */
public void index() throws IOException {
    IndexWriter writer = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter

    // Add documents with latitude/longitude location:
    // we index these both as DoubleFields (for bounding box/ranges) and as NumericDocValuesFields (for scoring)
    Document doc = new Document();
    doc.add(new DoubleField("latitude", 40.759011, Field.Store.NO));
    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.759011)));
    doc.add(new DoubleField("longitude", -73.9844722, Field.Store.NO));
    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-73.9844722)));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new DoubleField("latitude", 40.718266, Field.Store.NO));
    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.718266)));
    doc.add(new DoubleField("longitude", -74.007819, Field.Store.NO));
    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.007819)));
    writer.addDocument(doc);//from   w w  w .j  a  v  a 2 s  . c  o m

    doc = new Document();
    doc.add(new DoubleField("latitude", 40.7051157, Field.Store.NO));
    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.7051157)));
    doc.add(new DoubleField("longitude", -74.0088305, Field.Store.NO));
    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.0088305)));
    writer.addDocument(doc);

    // Open near-real-time searcher
    searcher = new IndexSearcher(DirectoryReader.open(writer, true));
    writer.close();
}