Example usage for org.apache.lucene.index IndexWriter addDocument

List of usage examples for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Document

Adds a document to this index.

Usage

From source file:com.lucene.index.test.IKAnalyzerdemo.java

License:Apache License

/**
 * //from w ww  . j av a2s . c o  m
 * ???
 * @param args
 */
public static void main(String[] args) {
    //Lucene Document??
    String fieldName = "text";
    //
    String text1 = "oracle,?";
    String text2 = "?";
    String text3 = "?";

    //IKAnalyzer?
    Analyzer analyzer = new IKAnalyzer();

    Directory directory1 = null;
    Directory directory2 = null;
    IndexWriter iwriter1 = null;
    IndexWriter iwriter2 = null;
    IndexReader ireader1 = null;
    IndexReader ireader2 = null;
    IndexSearcher isearcher = null;
    try {
        //
        directory1 = new RAMDirectory();
        directory2 = new RAMDirectory();

        //?IndexWriterConfig

        IndexWriterConfig iwConfig1 = new IndexWriterConfig(analyzer);
        iwConfig1.setOpenMode(OpenMode.CREATE);

        IndexWriterConfig iwConfig2 = new IndexWriterConfig(analyzer);
        iwConfig2.setOpenMode(OpenMode.CREATE);
        iwriter1 = new IndexWriter(directory1, iwConfig1);
        iwriter2 = new IndexWriter(directory2, iwConfig2);

        //
        Document doc1 = new Document();
        doc1.add(new StringField("ID", "10000", Field.Store.YES));
        doc1.add(new TextField("text1", text1, Field.Store.YES));
        iwriter1.addDocument(doc1);

        Document doc2 = new Document();
        doc2.add(new StringField("ID", "10001", Field.Store.YES));
        doc2.add(new TextField("text2", text2, Field.Store.YES));
        iwriter2.addDocument(doc2);

        iwriter1.close();
        iwriter2.close();

        //?**********************************
        //?   
        ireader1 = DirectoryReader.open(directory1);
        ireader2 = DirectoryReader.open(directory2);

        IndexReader[] mreader = { ireader1, ireader2 };

        MultiReader multiReader = new MultiReader(mreader);

        isearcher = new IndexSearcher(multiReader);

        String keyword = "?";
        //QueryParser?Query
        String[] fields = { "text1", "text2" };

        Map<String, Float> boosts = new HashMap<String, Float>();
        boosts.put("text1", 5.0f);
        boosts.put("text2", 2.0f);
        /**MultiFieldQueryParser??? 
         * */
        MultiFieldQueryParser parser = new MultiFieldQueryParser(fields, analyzer, boosts);
        Query query = parser.parse(keyword);

        System.out.println("Query = " + query);

        //?5?
        TopDocs topDocs = isearcher.search(query, 5);
        System.out.println("" + topDocs.totalHits);
        //
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (int i = 0; i < topDocs.totalHits; i++) {
            Document targetDoc = isearcher.doc(scoreDocs[i].doc);
            System.out.println("" + targetDoc.toString());
        }

    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } finally {
        if (ireader1 != null) {
            try {
                ireader1.close();
                ireader2.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (directory1 != null) {
            try {
                directory1.close();
                directory2.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:com.lucene.index.test.IKAnalyzerdemoMutilField.java

License:Apache License

/**
 * /*w  w w .  j a v a  2 s  . c o m*/
 * ???
 * @param args
 */
public static void main(String[] args) {
    //Lucene Document??
    String fieldName = "text";
    //
    String text1 = "oracle?";
    String text2 = "?";
    String text3 = "?";

    //IKAnalyzer?
    Analyzer analyzer = new IKAnalyzer();

    Directory directory = null;
    IndexWriter iwriter = null;
    IndexReader ireader = null;
    IndexSearcher isearcher = null;
    try {
        //
        directory = new RAMDirectory();

        //?IndexWriterConfig

        IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer);
        iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwriter = new IndexWriter(directory, iwConfig);
        //
        Document doc1 = new Document();
        doc1.add(new StringField("ID", "10000", Field.Store.YES));
        doc1.add(new TextField(fieldName, text1, Field.Store.YES));
        iwriter.addDocument(doc1);

        Document doc2 = new Document();
        doc2.add(new StringField("ID", "10000", Field.Store.YES));
        doc2.add(new TextField(fieldName, text2, Field.Store.YES));
        iwriter.addDocument(doc2);

        Document doc3 = new Document();
        doc3.add(new StringField("ID", "10000", Field.Store.YES));
        doc3.add(new TextField(fieldName, text3, Field.Store.YES));
        iwriter.addDocument(doc3);
        iwriter.close();

        //?**********************************
        //?   
        ireader = DirectoryReader.open(directory);
        isearcher = new IndexSearcher(ireader);

        String keyword = "?";
        //QueryParser?Query
        QueryParser qp = new QueryParser(fieldName, analyzer);
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query query = qp.parse(keyword);
        System.out.println("Query = " + query);

        //?5?
        TopDocs topDocs = isearcher.search(query, 5);
        System.out.println("" + topDocs.totalHits);
        //
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (int i = 0; i < topDocs.totalHits; i++) {
            Document targetDoc = isearcher.doc(scoreDocs[i].doc);
            System.out.println("" + targetDoc.toString());
        }

    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (directory != null) {
            try {
                directory.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:com.lucene.index.test.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);/*  w  w w.  j  a  v a 2  s.  co  m*/

        // Add the last modified date of the file a field named "modified".
        // Use a LongField that is indexed (i.e. efficiently filterable with
        // NumericRangeFilter).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongField("modified", lastModified, Field.Store.NO));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            //System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.lucid.solr.sidecar.SidecarIndexReaderFactory.java

License:Apache License

DirectoryReader buildParallelReader(DirectoryReader main, SolrIndexSearcher source, boolean rebuild) {
    try {//from   ww  w  .  j a  va  2  s .  co m
        if (source == null) {
            throw new Exception("Source collection is missing.");
        }
        // create as a sibling path of the main index
        Directory d = main.directory();
        File primaryDir = null;
        if (d instanceof FSDirectory) {
            String path = ((FSDirectory) d).getDirectory().getPath();
            primaryDir = new File(path);
            sidecarIndex = new File(primaryDir.getParentFile(), sidecarIndexLocation);
        } else {
            String secondaryPath = System.getProperty("java.io.tmpdir") + File.separator + sidecarIndexLocation
                    + "-" + System.currentTimeMillis();
            sidecarIndex = new File(secondaryPath);
        }
        // create a new tmp dir for the secondary indexes
        File secondaryIndex = new File(sidecarIndex, System.currentTimeMillis() + "-index");
        if (rebuild) {
            safeDelete(sidecarIndex);
        }
        parallelFields.addAll(source.getFieldNames());
        parallelFields.remove("id");
        LOG.debug("building a new index");
        Directory dir = FSDirectory.open(secondaryIndex);
        if (IndexWriter.isLocked(dir)) {
            // try forcing unlock
            try {
                IndexWriter.unlock(dir);
            } catch (Exception e) {
                LOG.warn("Failed to unlock " + secondaryIndex);
            }
        }
        int[] mergeTargets;
        AtomicReader[] subReaders = SidecarIndexReader.getSequentialSubReaders(main);
        if (subReaders == null || subReaders.length == 0) {
            mergeTargets = new int[] { main.maxDoc() };
        } else {
            mergeTargets = new int[subReaders.length];
            for (int i = 0; i < subReaders.length; i++) {
                mergeTargets[i] = subReaders[i].maxDoc();
            }
        }
        Version ver = currentCore.getLatestSchema().getDefaultLuceneMatchVersion();
        IndexWriterConfig cfg = new IndexWriterConfig(ver, currentCore.getLatestSchema().getAnalyzer());
        //cfg.setInfoStream(System.err);
        cfg.setMergeScheduler(new SerialMergeScheduler());
        cfg.setMergePolicy(new SidecarMergePolicy(mergeTargets, false));
        IndexWriter iw = new IndexWriter(dir, cfg);
        LOG.info("processing " + main.maxDoc() + " docs / " + main.numDeletedDocs() + " dels in main index");
        int boostedDocs = 0;
        Bits live = MultiFields.getLiveDocs(main);

        int targetPos = 0;
        int nextTarget = mergeTargets[targetPos];
        BytesRef idRef = new BytesRef();
        for (int i = 0; i < main.maxDoc(); i++) {
            if (i == nextTarget) {
                iw.commit();
                nextTarget = nextTarget + mergeTargets[++targetPos];
            }
            if (live != null && !live.get(i)) {
                addDummy(iw); // this is required to preserve doc numbers.
                continue;
            } else {
                DocumentStoredFieldVisitor visitor = new DocumentStoredFieldVisitor(docIdField);
                main.document(i, visitor);
                Document doc = visitor.getDocument();
                // get docId
                String id = doc.get(docIdField);
                if (id == null) {
                    LOG.debug("missing id, docNo=" + i);
                    addDummy(iw);
                    continue;
                } else {
                    // find the data, if any
                    doc = lookup(source, id, idRef, parallelFields);
                    if (doc == null) {
                        LOG.debug("missing boost data, docId=" + id);
                        addDummy(iw);
                        continue;
                    } else {
                        LOG.debug("adding boost data, docId=" + id + ", b=" + doc);
                        iw.addDocument(doc);
                        boostedDocs++;
                    }
                }
            }
        }
        iw.close();
        DirectoryReader other = DirectoryReader.open(dir);
        LOG.info("SidecarIndexReader with " + boostedDocs + " boosted documents.");
        SidecarIndexReader pr = createSidecarIndexReader(main, other, sourceCollection, secondaryIndex);
        return pr;
    } catch (Exception e) {
        LOG.warn("Unable to build parallel index: " + e.toString(), e);
        LOG.warn("Proceeding with single main index.");
        try {
            return new SidecarIndexReader(this, main, null, SidecarIndexReader.getSequentialSubReaders(main),
                    sourceCollection, null);
        } catch (Exception e1) {
            LOG.warn("Unexpected exception, returning single main index", e1);
            return main;
        }
    }
}

From source file:com.lucid.solr.sidecar.SidecarIndexReaderFactory.java

License:Apache License

private void addDummy(IndexWriter iw) throws IOException {
    Document dummy = new Document();
    Field f = new Field("_" + boostField, "d", StringField.TYPE_NOT_STORED);
    dummy.add(f);//from  w ww  .ja v  a2  s. com
    iw.addDocument(dummy);
}

From source file:com.m3958.apps.pcms.lucene.facet.MultiCategoryListsFacetsExample.java

License:Apache License

private void add(IndexWriter indexWriter, FacetFields facetFields, String... categoryPaths) throws IOException {
    Document doc = new Document();

    List<CategoryPath> paths = new ArrayList<CategoryPath>();
    for (String categoryPath : categoryPaths) {
        paths.add(new CategoryPath(categoryPath, '/'));
    }//from  ww  w .  j a  v  a  2s . co m
    facetFields.addFields(doc, paths);
    indexWriter.addDocument(doc);
}

From source file:com.mathworks.xzheng.advsearching.MultiPhraseQueryTest.java

License:Apache License

protected void setUp() throws Exception {
    Directory directory = new RAMDirectory();

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46,
            new WhitespaceAnalyzer(Version.LUCENE_46));
    IndexWriter writer = new IndexWriter(directory, config);
    Document doc1 = new Document();
    doc1.add(new Field("field", "the quick brown fox jumped over the lazy dog", Field.Store.YES,
            Field.Index.ANALYZED));
    writer.addDocument(doc1);
    Document doc2 = new Document();
    doc2.add(new Field("field", "the fast fox hopped over the hound", Field.Store.YES, Field.Index.ANALYZED));
    writer.addDocument(doc2);//w  ww.j a  v  a2  s .  c om
    writer.close();

    searcher = new IndexSearcher(DirectoryReader.open(directory));
}

From source file:com.mathworks.xzheng.advsearching.MultiSearcherTest.java

License:Apache License

public void setUp() throws Exception {
    String[] animals = { "aardvark", "beaver", "coati", "dog", "elephant", "frog", "gila monster", "horse",
            "iguana", "javelina", "kangaroo", "lemur", "moose", "nematode", "orca", "python", "quokka", "rat",
            "scorpion", "tarantula", "uromastyx", "vicuna", "walrus", "xiphias", "yak", "zebra" };

    Analyzer analyzer = new WhitespaceAnalyzer(Version.LUCENE_46);

    Directory aTOmDirectory = new RAMDirectory(); // #1
    Directory nTOzDirectory = new RAMDirectory(); // #1

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);

    IndexWriter aTOmWriter = new IndexWriter(aTOmDirectory, config);
    IndexWriter nTOzWriter = new IndexWriter(nTOzDirectory, config);

    for (int i = animals.length - 1; i >= 0; i--) {
        Document doc = new Document();
        String animal = animals[i];
        doc.add(new Field("animal", animal, Field.Store.YES, Field.Index.NOT_ANALYZED));
        if (animal.charAt(0) < 'n') {
            aTOmWriter.addDocument(doc); // #2
        } else {/*from w ww.j a  va  2  s.  c  o  m*/
            nTOzWriter.addDocument(doc); // #2
        }
    }

    aTOmWriter.close();
    nTOzWriter.close();

    searchers = new IndexSearcher[2];
    searchers[0] = new IndexSearcher(DirectoryReader.open(aTOmDirectory));
    searchers[1] = new IndexSearcher(DirectoryReader.open(nTOzDirectory));
}

From source file:com.mathworks.xzheng.advsearching.SecurityFilterTest.java

License:Apache License

protected void setUp() throws Exception {
    Directory directory = new RAMDirectory();

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46,
            new WhitespaceAnalyzer(Version.LUCENE_46));
    IndexWriter writer = new IndexWriter(directory, config);

    Document document = new Document(); // 1
    document.add(new Field("owner", // 1
            "elwood", // 1
            Field.Store.YES, // 1
            Field.Index.NOT_ANALYZED)); // 1
    document.add(new Field("keywords", // 1
            "elwood's sensitive info", // 1
            Field.Store.YES, // 1
            Field.Index.ANALYZED)); // 1
    writer.addDocument(document);

    document = new Document(); // 2
    document.add(new Field("owner", // 2
            "jake", // 2
            Field.Store.YES, // 2
            Field.Index.NOT_ANALYZED)); // 2
    document.add(new Field("keywords", // 2
            "jake's sensitive info", // 2
            Field.Store.YES, // 2
            Field.Index.ANALYZED)); // 2
    writer.addDocument(document);//from ww w .j a v  a  2 s  .co  m

    writer.close();
    searcher = new IndexSearcher(DirectoryReader.open(directory));
}

From source file:com.mathworks.xzheng.advsearching.SpanQueryTest.java

License:Apache License

protected void setUp() throws Exception {
    directory = new RAMDirectory();

    analyzer = new WhitespaceAnalyzer(Version.LUCENE_46);

    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_46, analyzer);

    IndexWriter writer = new IndexWriter(directory, config);

    Document doc = new Document();
    doc.add(new Field("f", "the quick brown fox jumps over the lazy dog", Field.Store.YES,
            Field.Index.ANALYZED));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new Field("f", "the quick red fox jumps over the sleepy cat", Field.Store.YES,
            Field.Index.ANALYZED));
    writer.addDocument(doc);//from ww w . j a v a2 s . c  o m

    writer.close();

    searcher = new IndexSearcher(DirectoryReader.open(directory));
    reader = searcher.getIndexReader();

    quick = new SpanTermQuery(new Term("f", "quick"));
    brown = new SpanTermQuery(new Term("f", "brown"));
    red = new SpanTermQuery(new Term("f", "red"));
    fox = new SpanTermQuery(new Term("f", "fox"));
    lazy = new SpanTermQuery(new Term("f", "lazy"));
    sleepy = new SpanTermQuery(new Term("f", "sleepy"));
    dog = new SpanTermQuery(new Term("f", "dog"));
    cat = new SpanTermQuery(new Term("f", "cat"));
}