Example usage for org.apache.lucene.index IndexWriter addDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter addDocument.

Prototype

public long addDocument(Iterable<? extends IndexableField> doc) throws IOException

Source Link

Document

Adds a document to this index.

Usage

From source file:com.vnet.demo.service.lucene.LuceneService.java

License:Apache License

public void addDoc(DocumentData documentData) {
    IndexWriterConfig config = new IndexWriterConfig(version, analyzer);
    IndexWriter write = null;
    try {/*w  w w .  j a v  a  2s.  co m*/
        write = new IndexWriter(index, config);
        Document doc = new Document();
        doc.add(new LongField("id", documentData.getId(), Field.Store.YES));
        doc.add(new TextField("title", documentData.getTitle(), Field.Store.YES));
        doc.add(new TextField("summary", documentData.getSummary(), Field.Store.YES));
        doc.add(new TextField("context", documentData.getContext(), Field.Store.YES));
        doc.add(new LongField("createDate", documentData.getCreateDate(), Field.Store.YES));
        write.addDocument(doc);
    } catch (IOException e) {
        e.printStackTrace();
    } finally {
        IOUtils.closeQuietly(write);
    }
}

From source file:com.weasel.lucene.ik.sample.IKAnalyzerDemo.java

License:Apache License

public static void main(String[] args) {
    //Lucene Document??
    String fieldName = "text";
    ////from w  ww. ja v  a  2s  .c om
    String text = "IK Analyzer???????";

    //IKAnalyzer?
    Analyzer analyzer = new IKAnalyzer();

    Directory directory = null;
    IndexWriter iwriter = null;
    IndexReader ireader = null;
    IndexSearcher isearcher = null;
    try {
        //
        directory = new RAMDirectory();

        //?IndexWriterConfig
        IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_34, analyzer);
        iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwriter = new IndexWriter(directory, iwConfig);
        //
        Document doc = new Document();
        doc.add(new Field("ID", "10000", Field.Store.YES, Field.Index.NOT_ANALYZED));
        doc.add(new Field(fieldName, text, Field.Store.YES, Field.Index.ANALYZED));
        iwriter.addDocument(doc);
        iwriter.close();

        //?**********************************
        //?   
        ireader = IndexReader.open(directory);
        isearcher = new IndexSearcher(ireader);

        String keyword = "?";
        //QueryParser?Query
        QueryParser qp = new QueryParser(Version.LUCENE_34, fieldName, analyzer);
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query query = qp.parse(keyword);

        //?5?
        TopDocs topDocs = isearcher.search(query, 5);
        System.out.println("" + topDocs.totalHits);
        //
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (int i = 0; i < topDocs.totalHits; i++) {
            Document targetDoc = isearcher.doc(scoreDocs[i].doc);
            System.out.println("" + targetDoc.toString());
        }

    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (directory != null) {
            try {
                directory.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:com.work.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);/*from   w w w. j  ava 2  s.c  o m*/

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.

        // doc.add(new LongPoint("modified", lastModified));
        doc.add(new StringField("modified", lastModified + "", Field.Store.YES));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.wrmsr.search.dsl.SearchServiceImpl.java

License:Apache License

@Override
public synchronized void addDoc(Doc doc) throws IOException {
    if (!this.indexWriter.isPresent()) {
        IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_4_9, analyzer);
        IndexWriter indexWriter = new IndexWriter(directory, indexWriterConfig);
        this.indexWriter = Optional.of(indexWriter);
    }//from w  ww . j a va  2 s.c  o m

    IndexWriter indexWriter = this.indexWriter.get();
    Document document = new Document();
    document.add(new Field("title", doc.getTitle(), FIELD_TYPE));
    document.add(new Field("isbn", doc.getIsbn(), FIELD_TYPE));
    indexWriter.addDocument(document);
}

From source file:com.yangxu.searchengine.index.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is
 * given, recurses over files and directories found under the given
 * directory./* w  w w.j a  v a  2  s . c o m*/
 * 
 * NOTE: This method indexes one document per input file. This is slow. For
 * good throughput, put multiple documents into your input file(s). An
 * example of this is in the benchmark module, which can create "line doc"
 * files, one document per line, using the <a href=
 * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 * 
 * @param writer
 *            Writer to the index where the given file/dir info will be
 *            stored
 * @param file
 *            The file to index, or the directory to recurse into to find
 *            files to index
 * @throws IOException
 */
private void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    // title
    String titleValue = null;
    // content
    String contentValue = null;
    String urlValue = null;
    String indextimeValue = null;
    String uploadtimeValue = null;
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {
            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
                LineNumberReader reader = new LineNumberReader(new InputStreamReader(fis, "UTF-8"));
                String line = null;
                StringBuilder sb = new StringBuilder();
                while ((line = reader.readLine()) != null) {
                    // int lineNumber = reader.getLineNumber();
                    switch (reader.getLineNumber()) {
                    case 1:
                        urlValue = line;
                        break;
                    case 2:
                        uploadtimeValue = line;
                        break;
                    case 3:
                        titleValue = line.split(":")[1];
                        break;
                    case 4:
                        break;
                    default:
                        sb.append(line);
                        break;

                    }
                    /*
                     * if (reader.getLineNumber() == 1) { urlValue = line; }
                     * 
                     * if (reader.getLineNumber() == 3) { titleValue =
                     * line.split(":")[1]; } else if (reader.getLineNumber()
                     * > 4) { sb.append(line); }
                     */
                }
                contentValue = sb.toString();
                reader.close();

            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this
                // exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {
                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path". Use a
                // field that is indexed (i.e. searchable), but don't
                // tokenize
                // the field into separate words and don't index term
                // frequency
                // or positional information:

                Field urlField = new Field("url", urlValue, Field.Store.YES, Field.Index.NOT_ANALYZED);
                urlField.setIndexOptions(IndexOptions.DOCS_ONLY);
                doc.add(urlField);

                Field titleField = new Field("title", titleValue, Field.Store.YES, Field.Index.ANALYZED);
                titleField.setIndexOptions(IndexOptions.DOCS_ONLY);
                doc.add(titleField);

                Field contentField = new Field("content", contentValue, Field.Store.YES, Field.Index.ANALYZED);
                contentField.setIndexOptions(IndexOptions.DOCS_ONLY);
                doc.add(contentField);
                // Add the last modified date of the file a field named
                // "modified".
                // Use a NumericField that is indexed (i.e. efficiently
                // filterable with
                // NumericRangeFilter). This indexes to milli-second
                // resolution, which
                // is often too fine. You could instead create a number
                // based on
                // year/month/day/hour/minutes/seconds, down the resolution
                // you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.               
                SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMdd HH:mm:ss");
                //Calendar cal = Calendar.getInstance(); // 
                //timeValue = formatter.format(cal.getTime());
                Date now = new Date();
                indextimeValue = formatter.format(now);

                Field indextimeField = new Field("indextime", indextimeValue, Field.Store.YES,
                        Field.Index.NOT_ANALYZED);
                titleField.setIndexOptions(IndexOptions.DOCS_ONLY);
                doc.add(indextimeField);

                Field uploadtimeField = new Field("uploadtime", uploadtimeValue, Field.Store.YES,
                        Field.Index.NOT_ANALYZED);
                titleField.setIndexOptions(IndexOptions.DOCS_ONLY);
                doc.add(uploadtimeField);

                // Add the contents of the file to a field named "contents".
                // Specify a Reader,
                // so that the text of the file is tokenized and indexed,
                // but not stored.
                // Note that FileReader expects the file to be in UTF-8
                // encoding.
                // If that's not the case searching for special characters
                // will fail.
                // doc.add(new Field("contents", new BufferedReader(
                // new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old
                    // document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have
                    // been indexed) so
                    // we use updateDocument instead to replace the old one
                    // matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("url", urlValue), doc);
                    writer.updateDocument(new Term("title", titleValue), doc);
                    writer.updateDocument(new Term("content", contentValue), doc);
                    writer.updateDocument(new Term("indextime", String.valueOf(indextimeValue)), doc);
                    writer.updateDocument(new Term("uploadtime", String.valueOf(uploadtimeValue)), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:com.Yasna.forum.database.DbSearchIndexer.java

License:Open Source License

/**
  * Indexes an indivual message. The connection is assumed to be open when
  * passed in and will remain open after the method is done executing.
  *//*from   w w  w. java 2s  . c o  m*/
protected final void addMessageToIndex(IndexWriter writer, int messageID, String subject, String body,
        int userID, int threadID, int forumID, java.util.Date creationDate) throws IOException {
    if (writer == null) {
        return;
    }
    //Ignore messages with a null subject or body.
    if (subject == null || body == null) {
        return;
    }

    Document doc = new Document();
    doc.add(Field.Keyword("messageID", Integer.toString(messageID)));
    doc.add(new Field("userID", Integer.toString(userID), false, true, false));
    doc.add(new Field("threadID", Integer.toString(threadID), false, true, false));
    doc.add(new Field("forumID", Integer.toString(forumID), false, true, false));
    doc.add(Field.UnStored("subject", subject));
    doc.add(Field.UnStored("body", body));
    doc.add(new Field("creationDate", DateField.dateToString(creationDate), false, true, false));

    writer.addDocument(doc);
}

From source file:com.yida.framework.lucene5.facet.DistanceFacetsExample.java

License:Creative Commons License

/**
 * /*  www.  j  a va2s .  c o  m*/
 */
public void index() throws IOException {
    IndexWriter writer = new IndexWriter(indexDir, new IndexWriterConfig(new WhitespaceAnalyzer()));

    // ??(?FacetField)
    Document doc = new Document();
    doc.add(new DoubleDocValuesField("latitude", 40.759011));
    doc.add(new DoubleDocValuesField("longitude", -73.9844722));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new DoubleDocValuesField("latitude", 40.718266));
    doc.add(new DoubleDocValuesField("longitude", -74.007819));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new DoubleDocValuesField("latitude", 40.7051157));
    doc.add(new DoubleDocValuesField("longitude", -74.0088305));
    writer.addDocument(doc);

    /*doc.add(new DoubleField("latitude", 40.759011, Field.Store.YES));
       doc.add(new DoubleField("longitude", -73.9844722, Field.Store.YES));
       writer.addDocument(doc);
               
       doc = new Document();
       doc.add(new DoubleField("latitude", 40.718266, Field.Store.YES));
       doc.add(new DoubleField("longitude", -74.007819, Field.Store.YES));
       writer.addDocument(doc);
               
       doc = new Document();
       doc.add(new DoubleField("latitude", 40.7051157, Field.Store.YES));
       doc.add(new DoubleField("longitude", -74.0088305, Field.Store.YES));
    writer.addDocument(doc);
    */

    searcher = new IndexSearcher(DirectoryReader.open(writer, true));
    writer.commit();
    writer.close();
}

From source file:com.zghw.lucene.demo.AssociationsFacetsExample.java

License:Apache License

/** Build the example index. */
private void index() throws IOException {
    IndexWriterConfig iwc = new IndexWriterConfig(FacetExamples.EXAMPLES_VER, new WhitespaceAnalyzer());
    IndexWriter indexWriter = new IndexWriter(indexDir, iwc);

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    Document doc = new Document();
    // 3 occurrences for tag 'lucene'
    doc.add(new IntAssociationFacetField(3, "tags", "lucene"));
    // 87% confidence level of genre 'computing'
    doc.add(new FloatAssociationFacetField(0.87f, "genre", "computing"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    // 1 occurrence for tag 'lucene'
    doc.add(new IntAssociationFacetField(1, "tags", "lucene"));
    // 2 occurrence for tag 'solr'
    doc.add(new IntAssociationFacetField(2, "tags", "solr"));
    // 75% confidence level of genre 'computing'
    doc.add(new FloatAssociationFacetField(0.75f, "genre", "computing"));
    // 34% confidence level of genre 'software'
    doc.add(new FloatAssociationFacetField(0.34f, "genre", "software"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    indexWriter.close();//w ww . j  ava2  s .  com
    taxoWriter.close();
}

From source file:com.zghw.lucene.demo.DistanceFacetsExample.java

License:Apache License

/** Build the example index. */
public void index() throws IOException {
    IndexWriter writer = new IndexWriter(indexDir,
            new IndexWriterConfig(FacetExamples.EXAMPLES_VER, new WhitespaceAnalyzer()));

    // TODO: we could index in radians instead ... saves all the conversions in getBoundingBoxFilter

    // Add documents with latitude/longitude location:
    Document doc = new Document();
    doc.add(new DoubleField("latitude", 40.759011, Field.Store.NO));
    doc.add(new DoubleField("longitude", -73.9844722, Field.Store.NO));
    writer.addDocument(doc);

    doc = new Document();
    doc.add(new DoubleField("latitude", 40.718266, Field.Store.NO));
    doc.add(new DoubleField("longitude", -74.007819, Field.Store.NO));
    writer.addDocument(doc);//  w  w  w  .j av  a 2s.  co m

    doc = new Document();
    doc.add(new DoubleField("latitude", 40.7051157, Field.Store.NO));
    doc.add(new DoubleField("longitude", -74.0088305, Field.Store.NO));
    writer.addDocument(doc);

    // Open near-real-time searcher
    searcher = new IndexSearcher(DirectoryReader.open(writer, true));
    writer.close();
}

From source file:com.zghw.lucene.demo.ExpressionAggregationFacetsExample.java

License:Apache License

/** Build the example index. */
private void index() throws IOException {
    IndexWriter indexWriter = new IndexWriter(indexDir,
            new IndexWriterConfig(FacetExamples.EXAMPLES_VER, new WhitespaceAnalyzer()));

    // Writes facet ords to a separate directory from the main index
    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(taxoDir);

    Document doc = new Document();
    doc.add(new TextField("c", "foo bar", Store.NO));
    doc.add(new NumericDocValuesField("popularity", 5L));
    doc.add(new FacetField("A", "B"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    doc = new Document();
    doc.add(new TextField("c", "foo foo bar", Store.NO));
    doc.add(new NumericDocValuesField("popularity", 3L));
    doc.add(new FacetField("A", "C"));
    indexWriter.addDocument(config.build(taxoWriter, doc));

    indexWriter.close();/*from   w  w w .j  a va2 s  .  c om*/
    taxoWriter.close();
}