Example usage for org.apache.lucene.index IndexWriter addDocument

List of usage examples for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Document

Adds a document to this index.

Usage

From source file:com.vnet.demo.service.lucene.LuceneService.java

License:Apache License

public void addDoc(DocumentData documentData) {
    IndexWriterConfig config = new IndexWriterConfig(version, analyzer);
    IndexWriter write = null;
    try {/*w  w w .  j a v  a  2s.  co m*/
        write = new IndexWriter(index, config);
        Document doc = new Document();
        doc.add(new LongField("id", documentData.getId(), Field.Store.YES));
        doc.add(new TextField("title", documentData.getTitle(), Field.Store.YES));
        doc.add(new TextField("summary", documentData.getSummary(), Field.Store.YES));
        doc.add(new TextField("context", documentData.getContext(), Field.Store.YES));
        doc.add(new LongField("createDate", documentData.getCreateDate(), Field.Store.YES));
        write.addDocument(doc);
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        IOUtils.closeQuietly(write);
    }
}

From source file:com.weasel.lucene.ik.sample.IKAnalyzerDemo.java

License:Apache License

public static void main(String[] args) {
    //Lucene Document??
    String fieldName = "text";
    ////from w  ww. ja v  a  2s  .c om
    String text = "IK Analyzer???????";

    //IKAnalyzer?
    Analyzer analyzer = new IKAnalyzer();

    Directory directory = null;
    IndexWriter iwriter = null;
    IndexReader ireader = null;
    IndexSearcher isearcher = null;
    try {
        //
        directory = new RAMDirectory();

        //?IndexWriterConfig
        IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_34, analyzer);
        iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwriter = new IndexWriter(directory, iwConfig);
        //
        Document doc = new Document();
        doc.add(new Field("ID", "10000", Field.Store.YES, Field.Index.NOT_ANALYZED));
        doc.add(new Field(fieldName, text, Field.Store.YES, Field.Index.ANALYZED));
        iwriter.addDocument(doc);
        iwriter.close();

        //?**********************************
        //?   
        ireader = IndexReader.open(directory);
        isearcher = new IndexSearcher(ireader);

        String keyword = "?";
        //QueryParser?Query
        QueryParser qp = new QueryParser(Version.LUCENE_34, fieldName, analyzer);
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query query = qp.parse(keyword);

        //?5?
        TopDocs topDocs = isearcher.search(query, 5);
        System.out.println("" + topDocs.totalHits);
        //
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (int i = 0; i < topDocs.totalHits; i++) {
            Document targetDoc = isearcher.doc(scoreDocs[i].doc);
            System.out.println("" + targetDoc.toString());
        }

    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (directory != null) {
            try {
                directory.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:com.work.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);/*from   w w w. j  ava 2  s.c  o m*/

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.

        // doc.add(new LongPoint("modified", lastModified));
        doc.add(new StringField("modified", lastModified + "", Field.Store.YES));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.wrmsr.search.dsl.SearchServiceImpl.java

License:Apache License

@Override
public synchronized void addDoc(Doc doc) throws IOException {
    if (!this.indexWriter.isPresent()) {
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
        IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
        this.indexWriter = Optional.of(indexWriter);
    }//from w  ww . j a va  2 s.c  o m

    IndexWriter indexWriter = this.indexWriter.get();
    Document document = new Document();
    document.add(new Field("title", doc.getTitle(), FIELD_TYPE));
    document.add(new Field("isbn", doc.getIsbn(), FIELD_TYPE));
    indexWriter.addDocument(document);
}

From source file:com.yangxu.searchengine.index.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is
 * given, recurses over files and directories found under the given
 * directory./* w  w w.j a  v a  2  s . c o m*/
 * 
 * NOTE: This method indexes one document per input file. This is slow. For
 * good throughput, put multiple documents into your input file(s). An
 * example of this is in the benchmark module, which can create "line doc"
 * files, one document per line, using the <a href=
 * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 * 
 * @param writer
 *            Writer to the index where the given file/dir info will be
 *            stored
 * @param file
 *            The file to index, or the directory to recurse into to find
 *            files to index
 * @throws IOException
 */
private void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    // title
    String titleValue = null;
    // content
    String contentValue = null;
    String urlValue = null;
    String indextimeValue = null;
    String uploadtimeValue = null;
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {
            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
                LineNumberReader reader = new LineNumberReader(new InputStreamReader(fis, "UTF-8"));
                String line = null;
                StringBuilder sb = new StringBuilder();
                while ((line = reader.readLine()) != null) {
                    // int lineNumber = reader.getLineNumber();
                    switch (reader.getLineNumber()) {
                    case 1:
                        urlValue = line;
                        break;
                    case 2:
                        uploadtimeValue = line;
                        break;
                    case 3:
                        titleValue = line.split(":")[1];
                        break;
                    case 4:
                        break;
                    default:
                        sb.append(line);
                        break;

                    }
                    /*
                     * if (reader.getLineNumber() == 1) { urlValue = line; }
                     * 
                     * if (reader.getLineNumber() == 3) { titleValue =
                     * line.split(":")[1]; } else if (reader.getLineNumber()
                     * > 4) { sb.append(line); }
                     */
                }
                contentValue = sb.toString();
                reader.close();

            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this
                // exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {
                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path". Use a
                // field that is indexed (i.e. searchable), but don't
                // tokenize
                // the field into separate words and don't index term
                // frequency
                // or positional information:

                Field urlField = new Field("url", urlValue, Field.Store.YES, Field.Index.NOT_ANALYZED);
                urlField.setIndexOptions(IndexOptions.DOCS_ONLY);
                doc.add(urlField);

                Field titleField = new Field("title", titleValue, Field.Store.YES, Field.Index.ANALYZED);
                titleField.setIndexOptions(IndexOptions.DOCS_ONLY);
                doc.add(titleField);

                Field contentField = new Field("content", contentValue, Field.Store.YES, Field.Index.ANALYZED);
                contentField.setIndexOptions(IndexOptions.DOCS_ONLY);
                doc.add(contentField);
                // Add the last modified date of the file a field named
                // "modified".
                // Use a NumericField that is indexed (i.e. efficiently
                // filterable with
                // NumericRangeFilter). This indexes to milli-second
                // resolution, which
                // is often too fine. You could instead create a number
                // based on
                // year/month/day/hour/minutes/seconds, down the resolution
                // you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.               
                SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMdd HH:mm:ss");
                //Calendar cal = Calendar.getInstance(); // 
                //timeValue = formatter.format(cal.getTime());
                Date now = new Date();
                indextimeValue = formatter.format(now);

                Field indextimeField = new Field("indextime", indextimeValue, Field.Store.YES,
                        Field.Index.NOT_ANALYZED);
                titleField.setIndexOptions(IndexOptions.DOCS_ONLY);
                doc.add(indextimeField);

                Field uploadtimeField = new Field("uploadtime", uploadtimeValue, Field.Store.YES,
                        Field.Index.NOT_ANALYZED);
                titleField.setIndexOptions(IndexOptions.DOCS_ONLY);
                doc.add(uploadtimeField);

                // Add the contents of the file to a field named "contents".
                // Specify a Reader,
                // so that the text of the file is tokenized and indexed,
                // but not stored.
                // Note that FileReader expects the file to be in UTF-8
                // encoding.
                // If that's not the case searching for special characters
                // will fail.
                // doc.add(new Field("contents", new BufferedReader(
                // new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old
                    // document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have
                    // been indexed) so
                    // we use updateDocument instead to replace the old one
                    // matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("url", urlValue), doc);
                    writer.updateDocument(new Term("title", titleValue), doc);
                    writer.updateDocument(new Term("content", contentValue), doc);
                    writer.updateDocument(new Term("indextime", String.valueOf(indextimeValue)), doc);
                    writer.updateDocument(new Term("uploadtime", String.valueOf(uploadtimeValue)), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:com.Yasna.forum.database.DbSearchIndexer.java

License:Open Source License

/**
  * Indexes an indivual message. The connection is assumed to be open when
  * passed in and will remain open after the method is done executing.
  *//*from   w w  w. java 2s  . c o  m*/
protected final void addMessageToIndex(IndexWriter writer, int messageID, String subject, String body,
        int userID, int threadID, int forumID, java.util.Date creationDate) throws IOException {
    if (writer == null) {
        return;
    }
    //Ignore messages with a null subject or body.
    if (subject == null || body == null) {
        return;
    }

    Document doc = new Document();
    doc.add(Field.Keyword("messageID", Integer.toString(messageID)));
    doc.add(new Field("userID", Integer.toString(userID), false, true, false));
    doc.add(new Field("threadID", Integer.toString(threadID), false, true, false));
    doc.add(new Field("forumID", Integer.toString(forumID), false, true, false));
    doc.add(Field.UnStored("subject", subject));
    doc.add(Field.UnStored("body", body));
    doc.add(new Field("creationDate", DateField.dateToString(creationDate), false, true, false));

    writer.addDocument(doc);
}

From source file:com.yida.framework.lucene5.facet.DistanceFacetsExample.java

License:Creative Commons License

/**
 * /*  www.  j  a va2s .  c o  m*/
 */
public void index() throws IOException {
    IndexWriter writer = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()));

    // ??(?FacetField)
    Document doc = new Document();
    doc.add(new DoubleDocValuesField("latitude", 40.759011));
    doc.add(new DoubleDocValuesField("longitude", -73.9844722));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new DoubleDocValuesField("latitude", 40.718266));
    doc.add(new DoubleDocValuesField("longitude", -74.007819));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new DoubleDocValuesField("latitude", 40.7051157));
    doc.add(new DoubleDocValuesField("longitude", -74.0088305));
    writer.addDocument(doc);

    /*doc.add(new DoubleField("latitude", 40.759011, Field.Store.YES));
       doc.add(new DoubleField("longitude", -73.9844722, Field.Store.YES));
       writer.addDocument(doc);
               
       doc = new Document();
       doc.add(new DoubleField("latitude", 40.718266, Field.Store.YES));
       doc.add(new DoubleField("longitude", -74.007819, Field.Store.YES));
       writer.addDocument(doc);
               
       doc = new Document();
       doc.add(new DoubleField("latitude", 40.7051157, Field.Store.YES));
       doc.add(new DoubleField("longitude", -74.0088305, Field.Store.YES));
    writer.addDocument(doc);
    */

    searcher = new IndexSearcher(DirectoryReader.open(writer, true));
    writer.commit();
    writer.close();
}

From source file:com.zghw.lucene.demo.AssociationsFacetsExample.java

License:Apache License

/** Build the example index. */
private void index() throws IOException {
    IndexWriterConfig iwc = new IndexWriterConfig(FacetExamples.EXAMPLES_VER, new WhitespaceAnalyzer());
    IndexWriter indexWriter = new IndexWriter(indexDir, iwc);

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    Document doc = new Document();
    // 3 occurrences for tag 'lucene'
    doc.add(new IntAssociationFacetField(3, "tags", "lucene"));
    // 87% confidence level of genre 'computing'
    doc.add(new FloatAssociationFacetField(0.87f, "genre", "computing"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    // 1 occurrence for tag 'lucene'
    doc.add(new IntAssociationFacetField(1, "tags", "lucene"));
    // 2 occurrence for tag 'solr'
    doc.add(new IntAssociationFacetField(2, "tags", "solr"));
    // 75% confidence level of genre 'computing'
    doc.add(new FloatAssociationFacetField(0.75f, "genre", "computing"));
    // 34% confidence level of genre 'software'
    doc.add(new FloatAssociationFacetField(0.34f, "genre", "software"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    indexWriter.close();//w ww . j  ava2  s .  com
    taxoWriter.close();
}

From source file:com.zghw.lucene.demo.DistanceFacetsExample.java

License:Apache License

/** Build the example index. */
public void index() throws IOException {
    IndexWriter writer = new IndexWriter(indexDir,
            new IndexWriterConfig(FacetExamples.EXAMPLES_VER, new WhitespaceAnalyzer()));

    // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter

    // Add documents with latitude/longitude location:
    Document doc = new Document();
    doc.add(new DoubleField("latitude", 40.759011, Field.Store.NO));
    doc.add(new DoubleField("longitude", -73.9844722, Field.Store.NO));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new DoubleField("latitude", 40.718266, Field.Store.NO));
    doc.add(new DoubleField("longitude", -74.007819, Field.Store.NO));
    writer.addDocument(doc);//  w  w  w  .j av  a 2s.  co m

    doc = new Document();
    doc.add(new DoubleField("latitude", 40.7051157, Field.Store.NO));
    doc.add(new DoubleField("longitude", -74.0088305, Field.Store.NO));
    writer.addDocument(doc);

    // Open near-real-time searcher
    searcher = new IndexSearcher(DirectoryReader.open(writer, true));
    writer.close();
}

From source file:com.zghw.lucene.demo.ExpressionAggregationFacetsExample.java

License:Apache License

/** Build the example index. */
private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(FacetExamples.EXAMPLES_VER, new WhitespaceAnalyzer()));

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    Document doc = new Document();
    doc.add(new TextField("c", "foo bar", Store.NO));
    doc.add(new NumericDocValuesField("popularity", 5L));
    doc.add(new FacetField("A", "B"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new TextField("c", "foo foo bar", Store.NO));
    doc.add(new NumericDocValuesField("popularity", 3L));
    doc.add(new FacetField("A", "C"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    indexWriter.close();/*from   w  w w .j  a va2 s  .  c om*/
    taxoWriter.close();
}