Example usage for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException

Source Link

Document

Adds a document to this index.

Usage

From source file:com.lucene.index.test.IKAnalyzerdemo.java

License:Apache License

/**
 * //from w ww  . j av a2s . c o  m
 * ???
 * @param args
 */
public static void main(String[] args) {
    //Lucene Document??
    String fieldName = "text";
    //
    String text1 = "oracle,?";
    String text2 = "?";
    String text3 = "?";

    //IKAnalyzer?
    Analyzer analyzer = new IKAnalyzer();

    Directory directory1 = null;
    Directory directory2 = null;
    IndexWriter iwriter1 = null;
    IndexWriter iwriter2 = null;
    IndexReader ireader1 = null;
    IndexReader ireader2 = null;
    IndexSearcher isearcher = null;
    try {
        //
        directory1 = new RAMDirectory();
        directory2 = new RAMDirectory();

        //?IndexWriterConfig

        IndexWriterConfig iwConfig1 = new IndexWriterConfig(analyzer);
        iwConfig1.setOpenMode(OpenMode.CREATE);

        IndexWriterConfig iwConfig2 = new IndexWriterConfig(analyzer);
        iwConfig2.setOpenMode(OpenMode.CREATE);
        iwriter1 = new IndexWriter(directory1, iwConfig1);
        iwriter2 = new IndexWriter(directory2, iwConfig2);

        //
        Document doc1 = new Document();
        doc1.add(new StringField("ID", "10000", Field.Store.YES));
        doc1.add(new TextField("text1", text1, Field.Store.YES));
        iwriter1.addDocument(doc1);

        Document doc2 = new Document();
        doc2.add(new StringField("ID", "10001", Field.Store.YES));
        doc2.add(new TextField("text2", text2, Field.Store.YES));
        iwriter2.addDocument(doc2);

        iwriter1.close();
        iwriter2.close();

        //?**********************************
        //?   
        ireader1 = DirectoryReader.open(directory1);
        ireader2 = DirectoryReader.open(directory2);

        IndexReader[] mreader = { ireader1, ireader2 };

        MultiReader multiReader = new MultiReader(mreader);

        isearcher = new IndexSearcher(multiReader);

        String keyword = "?";
        //QueryParser?Query
        String[] fields = { "text1", "text2" };

        Map<String, Float> boosts = new HashMap<String, Float>();
        boosts.put("text1", 5.0f);
        boosts.put("text2", 2.0f);
        /**MultiFieldQueryParser??? 
         * */
        MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer, boosts);
        Query query = parser.parse(keyword);

        System.out.println("Query = " + query);

        //?5?
        TopDocs topDocs = isearcher.search(query, 5);
        System.out.println("" + topDocs.totalHits);
        //
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (int i = 0; i < topDocs.totalHits; i++) {
            Document targetDoc = isearcher.doc(scoreDocs[i].doc);
            System.out.println("" + targetDoc.toString());
        }

    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } finally {
        if (ireader1 != null) {
            try {
                ireader1.close();
                ireader2.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (directory1 != null) {
            try {
                directory1.close();
                directory2.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:com.lucene.index.test.IKAnalyzerdemoMutilField.java

License:Apache License

/**
 * /*w  w w .  j a v a  2 s  . c o m*/
 * ???
 * @param args
 */
public static void main(String[] args) {
    //Lucene Document??
    String fieldName = "text";
    //
    String text1 = "oracle?";
    String text2 = "?";
    String text3 = "?";

    //IKAnalyzer?
    Analyzer analyzer = new IKAnalyzer();

    Directory directory = null;
    IndexWriter iwriter = null;
    IndexReader ireader = null;
    IndexSearcher isearcher = null;
    try {
        //
        directory = new RAMDirectory();

        //?IndexWriterConfig

        IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer);
        iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwriter = new IndexWriter(directory, iwConfig);
        //
        Document doc1 = new Document();
        doc1.add(new StringField("ID", "10000", Field.Store.YES));
        doc1.add(new TextField(fieldName, text1, Field.Store.YES));
        iwriter.addDocument(doc1);

        Document doc2 = new Document();
        doc2.add(new StringField("ID", "10000", Field.Store.YES));
        doc2.add(new TextField(fieldName, text2, Field.Store.YES));
        iwriter.addDocument(doc2);

        Document doc3 = new Document();
        doc3.add(new StringField("ID", "10000", Field.Store.YES));
        doc3.add(new TextField(fieldName, text3, Field.Store.YES));
        iwriter.addDocument(doc3);
        iwriter.close();

        //?**********************************
        //?   
        ireader = DirectoryReader.open(directory);
        isearcher = new IndexSearcher(ireader);

        String keyword = "?";
        //QueryParser?Query
        QueryParser qp = new QueryParser(fieldName, analyzer);
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query query = qp.parse(keyword);
        System.out.println("Query = " + query);

        //?5?
        TopDocs topDocs = isearcher.search(query, 5);
        System.out.println("" + topDocs.totalHits);
        //
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (int i = 0; i < topDocs.totalHits; i++) {
            Document targetDoc = isearcher.doc(scoreDocs[i].doc);
            System.out.println("" + targetDoc.toString());
        }

    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (directory != null) {
            try {
                directory.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:com.lucene.index.test.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);/*  w  w w.  j  a  v a 2  s.  co  m*/

        // Add the last modified date of the file a field named "modified".
        // Use a LongField that is indexed (i.e. efficiently filterable with
        // NumericRangeFilter).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongField("modified", lastModified, Field.Store.NO));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            //System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.lucid.solr.sidecar.SidecarIndexReaderFactory.java

License:Apache License

DirectoryReader buildParallelReader(DirectoryReader main, SolrIndexSearcher source, boolean rebuild) {
    try {//from   ww  w  .  j a  va  2  s .  co m
        if (source == null) {
            throw new Exception("Source collection is missing.");
        }
        // create as a sibling path of the main index
        Directory d = main.directory();
        File primaryDir = null;
        if (d instanceof FSDirectory) {
            String path = ((FSDirectory) d).getDirectory().getPath();
            primaryDir = new File(path);
            sidecarIndex = new File(primaryDir.getParentFile(), sidecarIndexLocation);
        } else {
            String secondaryPath = System.getProperty("java.io.tmpdir") + File.separator + sidecarIndexLocation
                    + "-" + System.currentTimeMillis();
            sidecarIndex = new File(secondaryPath);
        }
        // create a new tmp dir for the secondary indexes
        File secondaryIndex = new File(sidecarIndex, System.currentTimeMillis() + "-index");
        if (rebuild) {
            safeDelete(sidecarIndex);
        }
        parallelFields.addAll(source.getFieldNames());
        parallelFields.remove("id");
        LOG.debug("building a new index");
        Directory dir = FSDirectory.open(secondaryIndex);
        if (IndexWriter.isLocked(dir)) {
            // try forcing unlock
            try {
                IndexWriter.unlock(dir);
            } catch (Exception e) {
                LOG.warn("Failed to unlock " + secondaryIndex);
            }
        }
        int[] mergeTargets;
        AtomicReader[] subReaders = SidecarIndexReader.getSequentialSubReaders(main);
        if (subReaders == null || subReaders.length == 0) {
            mergeTargets = new int[] { main.maxDoc() };
        } else {
            mergeTargets = new int[subReaders.length];
            for (int i = 0; i < subReaders.length; i++) {
                mergeTargets[i] = subReaders[i].maxDoc();
            }
        }
        Version ver = currentCore.getLatestSchema().getDefaultLuceneMatchVersion();
        IndexWriterConfig cfg = new IndexWriterConfig(ver, currentCore.getLatestSchema().getAnalyzer());
        //cfg.setInfoStream(System.err);
        cfg.setMergeScheduler(new SerialMergeScheduler());
        cfg.setMergePolicy(new SidecarMergePolicy(mergeTargets, false));
        IndexWriter iw = new IndexWriter(dir, cfg);
        LOG.info("processing " + main.maxDoc() + " docs / " + main.numDeletedDocs() + " dels in main index");
        int boostedDocs = 0;
        Bits live = MultiFields.getLiveDocs(main);

        int targetPos = 0;
        int nextTarget = mergeTargets[targetPos];
        BytesRef idRef = new BytesRef();
        for (int i = 0; i < main.maxDoc(); i++) {
            if (i == nextTarget) {
                iw.commit();
                nextTarget = nextTarget + mergeTargets[++targetPos];
            }
            if (live != null && !live.get(i)) {
                addDummy(iw); // this is required to preserve doc numbers.
                continue;
            } else {
                DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(docIdField);
                main.document(i, visitor);
                Document doc = visitor.getDocument();
                // get docId
                String id = doc.get(docIdField);
                if (id == null) {
                    LOG.debug("missing id, docNo=" + i);
                    addDummy(iw);
                    continue;
                } else {
                    // find the data, if any
                    doc = lookup(source, id, idRef, parallelFields);
                    if (doc == null) {
                        LOG.debug("missing boost data, docId=" + id);
                        addDummy(iw);
                        continue;
                    } else {
                        LOG.debug("adding boost data, docId=" + id + ", b=" + doc);
                        iw.addDocument(doc);
                        boostedDocs++;
                    }
                }
            }
        }
        iw.close();
        DirectoryReader other = DirectoryReader.open(dir);
        LOG.info("SidecarIndexReader with " + boostedDocs + " boosted documents.");
        SidecarIndexReader pr = createSidecarIndexReader(main, other, sourceCollection, secondaryIndex);
        return pr;
    } catch (Exception e) {
        LOG.warn("Unable to build parallel index: " + e.toString(), e);
        LOG.warn("Proceeding with single main index.");
        try {
            return new SidecarIndexReader(this, main, null, SidecarIndexReader.getSequentialSubReaders(main),
                    sourceCollection, null);
        } catch (Exception e1) {
            LOG.warn("Unexpected exception, returning single main index", e1);
            return main;
        }
    }
}

From source file:com.lucid.solr.sidecar.SidecarIndexReaderFactory.java

License:Apache License

private void addDummy(IndexWriter iw) throws IOException {
    Document dummy = new Document();
    Field f = new Field("_" + boostField, "d", StringField.TYPE_NOT_STORED);
    dummy.add(f);//from  w ww  .ja v  a2  s. com
    iw.addDocument(dummy);
}

From source file:com.m3958.apps.pcms.lucene.facet.MultiCategoryListsFacetsExample.java

License:Apache License

private void add(IndexWriter indexWriter, FacetFields facetFields, String... categoryPaths) throws IOException {
    Document doc = new Document();

    List<CategoryPath> paths = new ArrayList<CategoryPath>();
    for (String categoryPath : categoryPaths) {
        paths.add(new CategoryPath(categoryPath, '/'));
    }//from  ww  w .  j a  v  a  2s . co m
    facetFields.addFields(doc, paths);
    indexWriter.addDocument(doc);
}

From source file:com.mathworks.xzheng.advsearching.MultiPhraseQueryTest.java

License:Apache License

protected void setUp() throws Exception {
    Directory directory = new RAMDirectory();

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46,
            new WhitespaceAnalyzer(Version.LUCENE_46));
    IndexWriter writer = new IndexWriter(directory, config);
    Document doc1 = new Document();
    doc1.add(new Field("field", "the quick brown fox jumped over the lazy dog", Field.Store.YES,
            Field.Index.ANALYZED));
    writer.addDocument(doc1);
    Document doc2 = new Document();
    doc2.add(new Field("field", "the fast fox hopped over the hound", Field.Store.YES, Field.Index.ANALYZED));
    writer.addDocument(doc2);//w  ww.j a  v  a2  s .  c om
    writer.close();

    searcher = new IndexSearcher(DirectoryReader.open(directory));
}

From source file:com.mathworks.xzheng.advsearching.MultiSearcherTest.java

License:Apache License

public void setUp() throws Exception {
    String[] animals = { "aardvark", "beaver", "coati", "dog", "elephant", "frog", "gila monster", "horse",
            "iguana", "javelina", "kangaroo", "lemur", "moose", "nematode", "orca", "python", "quokka", "rat",
            "scorpion", "tarantula", "uromastyx", "vicuna", "walrus", "xiphias", "yak", "zebra" };

    Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_46);

    Directory aTOmDirectory = new RAMDirectory(); // #1
    Directory nTOzDirectory = new RAMDirectory(); // #1

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);

    IndexWriter aTOmWriter = new IndexWriter(aTOmDirectory, config);
    IndexWriter nTOzWriter = new IndexWriter(nTOzDirectory, config);

    for (int i = animals.length - 1; i >= 0; i--) {
        Document doc = new Document();
        String animal = animals[i];
        doc.add(new Field("animal", animal, Field.Store.YES, Field.Index.NOT_ANALYZED));
        if (animal.charAt(0) < 'n') {
            aTOmWriter.addDocument(doc); // #2
        } else {/*from w ww.j a  va  2  s.  c  o  m*/
            nTOzWriter.addDocument(doc); // #2
        }
    }

    aTOmWriter.close();
    nTOzWriter.close();

    searchers = new IndexSearcher[2];
    searchers[0] = new IndexSearcher(DirectoryReader.open(aTOmDirectory));
    searchers[1] = new IndexSearcher(DirectoryReader.open(nTOzDirectory));
}

From source file:com.mathworks.xzheng.advsearching.SecurityFilterTest.java

License:Apache License

protected void setUp() throws Exception {
    Directory directory = new RAMDirectory();

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46,
            new WhitespaceAnalyzer(Version.LUCENE_46));
    IndexWriter writer = new IndexWriter(directory, config);

    Document document = new Document(); // 1
    document.add(new Field("owner", // 1
            "elwood", // 1
            Field.Store.YES, // 1
            Field.Index.NOT_ANALYZED)); // 1
    document.add(new Field("keywords", // 1
            "elwood's sensitive info", // 1
            Field.Store.YES, // 1
            Field.Index.ANALYZED)); // 1
    writer.addDocument(document);

    document = new Document(); // 2
    document.add(new Field("owner", // 2
            "jake", // 2
            Field.Store.YES, // 2
            Field.Index.NOT_ANALYZED)); // 2
    document.add(new Field("keywords", // 2
            "jake's sensitive info", // 2
            Field.Store.YES, // 2
            Field.Index.ANALYZED)); // 2
    writer.addDocument(document);//from ww w .j a v  a  2 s  .co  m

    writer.close();
    searcher = new IndexSearcher(DirectoryReader.open(directory));
}

From source file:com.mathworks.xzheng.advsearching.SpanQueryTest.java

License:Apache License

protected void setUp() throws Exception {
    directory = new RAMDirectory();

    analyzer = new WhitespaceAnalyzer(Version.LUCENE_46);

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);

    IndexWriter writer = new IndexWriter(directory, config);

    Document doc = new Document();
    doc.add(new Field("f", "the quick brown fox jumps over the lazy dog", Field.Store.YES,
            Field.Index.ANALYZED));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new Field("f", "the quick red fox jumps over the sleepy cat", Field.Store.YES,
            Field.Index.ANALYZED));
    writer.addDocument(doc);//from ww w . j a v a2 s . c  o m

    writer.close();

    searcher = new IndexSearcher(DirectoryReader.open(directory));
    reader = searcher.getIndexReader();

    quick = new SpanTermQuery(new Term("f", "quick"));
    brown = new SpanTermQuery(new Term("f", "brown"));
    red = new SpanTermQuery(new Term("f", "red"));
    fox = new SpanTermQuery(new Term("f", "fox"));
    lazy = new SpanTermQuery(new Term("f", "lazy"));
    sleepy = new SpanTermQuery(new Term("f", "sleepy"));
    dog = new SpanTermQuery(new Term("f", "dog"));
    cat = new SpanTermQuery(new Term("f", "cat"));
}