Example usage for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException

Source Link

Document

Adds a document to this index.

Usage

From source file:com.redhat.satellite.search.index.ngram.tests.NGramTestSetup.java

License:Open Source License

/**
 * Creates an index in RAM// w ww  .ja  v  a2 s .co m
 * */
public void setUp() throws Exception {
    super.setUp();
    initItems();
    this.stanDir = new RAMDirectory();
    IndexWriter stanWriter = new IndexWriter(this.stanDir, new StandardAnalyzer(), true);

    this.ngramDir = new RAMDirectory();
    IndexWriter ngramWriter = new IndexWriter(this.ngramDir, new NGramAnalyzer(min_ngram, max_ngram), true);

    for (Map<String, String> item : items) {
        String name = item.get("name");
        String descp = item.get("description");
        Document doc = new Document();
        doc.add(new Field("name", name, Field.Store.YES, Field.Index.TOKENIZED));
        doc.add(new Field("description", descp, Field.Store.YES, Field.Index.TOKENIZED));
        stanWriter.addDocument(doc);
        ngramWriter.addDocument(doc);
    }
    stanWriter.close();
    ngramWriter.close();
}

From source file:com.redsqirl.SimpleFileIndexer.java

License:Open Source License

public void indexFileWithIndexWriter(IndexWriter indexWriter, File f, String suffix) throws IOException {
    if (f.isHidden() || f.isDirectory() || !f.canRead() || !f.exists()) {
        return;/*from   w  w w  . j ava 2s. c  om*/
    }
    if (suffix != null && !f.getName().endsWith(suffix)) {
        return;
    }
    logger.info("Indexing file " + f.getCanonicalPath());

    Document doc = new Document();
    doc.add(new Field("contents", new FileReader(f)));
    doc.add(new Field("filename", f.getCanonicalPath(), Field.Store.YES, Field.Index.ANALYZED));

    indexWriter.addDocument(doc);
}

From source file:com.revorg.goat.IndexManager.java

License:Open Source License

/**
 * Creates documents inside of the Lucene Index
 *
 * @param writer            Index Writer Class
 * @param rs                Result Set for the row of data
 * @param columnNamesArray  The array of column names to be added to the document
 * @param indexTypeArray    The array of column types to be added to the document
 * @param tempHTMLDir       The temporary HTML directory for HTML publishing
 * @throws Exception/*from www . j  a  va 2  s.  co  m*/
 * @return ActionResult
 */
private static String createDocument(IndexWriter writer, ResultSet rs, String columnNamesArray[],
        String indexTypeArray[], String tempHTMLDir) {

    try {
        final Document doc = new Document();
        int columns = columnNamesArray.length;

        /*
        public Field(String name, String value, Field.Store store, Field.Index index)
        Store:
               COMPRESS - Store the original field value in the index in a compressed form. This is useful for long documents and for binary valued fields. 
               YES -Store the original field value in the index. This is useful for short texts like a document's title which should be displayed with the results. 
        The value is stored in its original form, i.e. no analyzer is used before it is stored. 
               NO - Do not store the field value in the index. 
                                                           
        Index:
               ANALYZED -  Index the tokens produced by running the field's value through an Analyzer. This is useful for common text
               NOT_ANALYZED - Index the field's value without using an Analyzer, so it can be searched. As no analyzer is used the value will be stored as a single term. 
        This is useful for unique Ids like product numbers.
               NO - Do not index the field value. This field can thus not be searched, but one can still access its contents provided it is stored. 
        */

        for (int i = 0; i < columns; i++) {
            String columnName = columnNamesArray[i].trim().toLowerCase();
            String columnIndexType = indexTypeArray[i]; //Map Column Type To Array
            String columnValue = rs.getString(columnName); //Get Value But Result Sets are at 1 Not 0
            if (columnValue == null) { //Lucene Does Not Like Nulls
                columnValue = "";
            }
            //System.out.println("   Values: " + columnName +  " " + columnIndexType + " " + columnValue + " " + columnValue.length());
            //Can't Add Triggers
            if (columnIndexType.equalsIgnoreCase("TriggerUpdate") == false
                    || columnIndexType.equalsIgnoreCase("TriggerDelete") == false) {
                if (columnIndexType.equalsIgnoreCase("PrimaryKey")
                        || columnIndexType.equalsIgnoreCase("Keyword")
                        || columnIndexType.equalsIgnoreCase("Date")) {
                    //Format Dates to Correct for Sorting
                    if (columnIndexType.equalsIgnoreCase("Date")) {
                        columnValue = columnValue.replace("/", "");
                    }

                    doc.add(new Field(columnName, columnValue, Field.Store.YES, Field.Index.NOT_ANALYZED));
                }
                //UnIndexed of UnIndexed
                else if (columnIndexType.equalsIgnoreCase("UnIndexed")) {
                    doc.add(new Field(columnName, columnValue, Field.Store.YES, Field.Index.NO));
                } else if (columnIndexType.equalsIgnoreCase("Text")) {
                    doc.add(new Field(columnName, columnValue, Field.Store.YES, Field.Index.ANALYZED));
                } else if (columnIndexType.equalsIgnoreCase("UnStored")
                        || columnIndexType.equalsIgnoreCase("HTML")) {
                    if (columnIndexType.equalsIgnoreCase("HTML") && columnValue.length() != 0) {
                        String htmlString = tempHTMLDir + Utilities.CreateUUID() + ".html";
                        File htmlFile = new File(htmlString);
                        BufferedWriter out = new BufferedWriter(new FileWriter(htmlString));
                        out.write(columnValue);
                        out.close();

                        //Parse Document              
                        FileInputStream fis = new FileInputStream(htmlFile);
                        HTMLParser parser = new HTMLParser(fis);
                        // Add the tag-stripped contents as a Reader-valued Text field so it will
                        // get tokenized and indexed.
                        doc.add(new Field(columnName, parser.getReader()));

                        //Parse HTML
                    }
                    //UnStored Field
                    else {
                        doc.add(new Field(columnName, columnValue, Field.Store.NO, Field.Index.ANALYZED));
                    }

                } else if (columnIndexType.equalsIgnoreCase("Binary")) {
                    doc.add(new Field(columnName, columnValue, Field.Store.COMPRESS, Field.Index.NO));
                }
            }
        }

        //Add Document Here
        //System.out.println(doc); 
        writer.addDocument(doc);
        ActionResult = "Success";
        return ActionResult;

    }

    catch (Exception e) {
        ActionResultError = " caught a " + e.getClass() + " with message: " + e.getMessage();
        //System.out.println("Failure of DbSchema File: " + xmlFile);
    }
    ActionResult = "Failure";
    return ActionResult + ActionResultError;
}

From source file:com.ricky.codelab.lucene.LuceneIndexAndSearchDemo.java

License:Apache License

/**
 * /*w w w.  j  a  v  a2s.c o m*/
 * ???
 * @param args
 */
public static void main(String[] args) {
    //Lucene Document??
    String fieldName = "text";
    //
    String text = "IK Analyzer???????";

    //IKAnalyzer?
    Analyzer analyzer = new IKAnalyzer(true);

    Directory directory = null;
    IndexWriter iwriter = null;
    IndexReader ireader = null;
    IndexSearcher isearcher = null;
    try {
        //
        directory = new RAMDirectory();

        //?IndexWriterConfig
        IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer);
        iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwriter = new IndexWriter(directory, iwConfig);
        //
        Document doc = new Document();
        doc.add(new StringField("ID", "10000", Field.Store.YES));
        doc.add(new TextField(fieldName, text, Field.Store.YES));
        iwriter.addDocument(doc);
        iwriter.close();

        //?**********************************
        //?   
        ireader = DirectoryReader.open(directory);
        isearcher = new IndexSearcher(ireader);

        String keyword = "?";
        //QueryParser?Query
        QueryParser qp = new QueryParser(fieldName, analyzer);
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query query = qp.parse(keyword);
        System.out.println("Query = " + query);

        //?5?
        TopDocs topDocs = isearcher.search(query, 5);
        System.out.println("" + topDocs.totalHits);
        //
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (int i = 0; i < topDocs.totalHits; i++) {
            Document targetDoc = isearcher.doc(scoreDocs[i].doc);
            System.out.println("" + targetDoc.toString());
        }

    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (directory != null) {
            try {
                directory.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:com.rocana.lucene.codec.v1.TestBlockPostingsFormat.java

License:Apache License

/** Make sure the final sub-block(s) are not skipped. */
public void testFinalBlock() throws Exception {
    Directory d = newDirectory();/*from www  .  j  a v  a2  s.  c  o  m*/
    IndexWriter w = new IndexWriter(d, new IndexWriterConfig(new MockAnalyzer(random())));
    for (int i = 0; i < 25; i++) {
        Document doc = new Document();
        doc.add(newStringField("field", Character.toString((char) (97 + i)), Field.Store.NO));
        doc.add(newStringField("field", "z" + Character.toString((char) (97 + i)), Field.Store.NO));
        w.addDocument(doc);
    }
    w.forceMerge(1);

    DirectoryReader r = DirectoryReader.open(w);
    assertEquals(1, r.leaves().size());
    RocanaFieldReader field = (RocanaFieldReader) r.leaves().get(0).reader().fields().terms("field");
    // We should see exactly two blocks: one root block (prefix empty string) and one block for z* terms (prefix z):
    RocanaStats stats = field.getStats();
    assertEquals(0, stats.floorBlockCount);
    assertEquals(2, stats.nonFloorBlockCount);
    r.close();
    w.close();
    d.close();
}

From source file:com.rocana.lucene.codec.v1.TestRocanaPerFieldPostingsFormat2.java

License:Apache License

private void addDocs(IndexWriter writer, int numDocs) throws IOException {
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        doc.add(newTextField("content", "aaa", Field.Store.NO));
        writer.addDocument(doc);
    }//  www. j  a  v a 2 s.  c om
}

From source file:com.rocana.lucene.codec.v1.TestRocanaPerFieldPostingsFormat2.java

License:Apache License

private void addDocs2(IndexWriter writer, int numDocs) throws IOException {
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        doc.add(newTextField("content", "bbb", Field.Store.NO));
        writer.addDocument(doc);
    }/*from  w  ww  .  j  a va 2 s. c  o  m*/
}

From source file:com.rocana.lucene.codec.v1.TestRocanaPerFieldPostingsFormat2.java

License:Apache License

private void addDocs3(IndexWriter writer, int numDocs) throws IOException {
    for (int i = 0; i < numDocs; i++) {
        Document doc = new Document();
        doc.add(newTextField("content", "ccc", Field.Store.NO));
        doc.add(newStringField("id", "" + i, Field.Store.YES));
        writer.addDocument(doc);
    }/*from w ww  . j  a v  a  2s .  c  o m*/
}

From source file:com.rocana.lucene.codec.v1.TestRocanaPerFieldPostingsFormat2.java

License:Apache License

@Test
public void testStressPerFieldCodec() throws IOException {
    Directory dir = newDirectory(random());
    final int docsPerRound = 97;
    int numRounds = atLeast(1);
    for (int i = 0; i < numRounds; i++) {
        int num = TestUtil.nextInt(random(), 30, 60);
        IndexWriterConfig config = newIndexWriterConfig(random(), new MockAnalyzer(random()));
        config.setOpenMode(OpenMode.CREATE_OR_APPEND);
        IndexWriter writer = newWriter(dir, config);
        for (int j = 0; j < docsPerRound; j++) {
            final Document doc = new Document();
            for (int k = 0; k < num; k++) {
                FieldType customType = new FieldType(TextField.TYPE_NOT_STORED);
                customType.setTokenized(random().nextBoolean());
                customType.setOmitNorms(random().nextBoolean());
                Field field = newField("" + k, TestUtil.randomRealisticUnicodeString(random(), 128),
                        customType);//  w  ww  .j a v a  2 s . c o  m
                doc.add(field);
            }
            writer.addDocument(doc);
        }
        if (random().nextBoolean()) {
            writer.forceMerge(1);
        }
        writer.commit();
        assertEquals((i + 1) * docsPerRound, writer.maxDoc());
        writer.close();
    }
    dir.close();
}

From source file:com.search.lucene.demo.facet.DistanceFacetsExample.java

License:Apache License

/** Build the example index. */
public void index() throws IOException {
    IndexWriter writer = new IndexWriter(indexDir,
            new IndexWriterConfig(new WhitespaceAnalyzer()).setOpenMode(OpenMode.CREATE));

    // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter

    // Add documents with latitude/longitude location:
    // we index these both as DoubleFields (for bounding box/ranges) and as NumericDocValuesFields (for scoring)
    Document doc = new Document();
    doc.add(new DoubleField("latitude", 40.759011, Field.Store.NO));
    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.759011)));
    doc.add(new DoubleField("longitude", -73.9844722, Field.Store.NO));
    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-73.9844722)));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new DoubleField("latitude", 40.718266, Field.Store.NO));
    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.718266)));
    doc.add(new DoubleField("longitude", -74.007819, Field.Store.NO));
    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.007819)));
    writer.addDocument(doc);//from   w w  w .j  a  v  a 2 s  . c  o m

    doc = new Document();
    doc.add(new DoubleField("latitude", 40.7051157, Field.Store.NO));
    doc.add(new NumericDocValuesField("latitude", Double.doubleToRawLongBits(40.7051157)));
    doc.add(new DoubleField("longitude", -74.0088305, Field.Store.NO));
    doc.add(new NumericDocValuesField("longitude", Double.doubleToRawLongBits(-74.0088305)));
    writer.addDocument(doc);

    // Open near-real-time searcher
    searcher = new IndexSearcher(DirectoryReader.open(writer, true));
    writer.close();
}