Example usage for org.apache.lucene.index IndexWriter updateDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter updateDocument.

Prototype

private long updateDocument(final DocumentsWriterDeleteQueue.Node<?> delNode,
            Iterable<? extends IndexableField> doc) throws IOException

Source Link

Usage

From source file:com.heejong.lucene.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);/*from   w ww. j  a v  a2s  . c  o  m*/

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 4 would mean
        // February 17, 1, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.icdd.lucence.IndexFiles.java

License:Apache License

static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new,empty document
        Document doc = new Document();

        Field pathField = new StringField("path", file.toString(), Field.Store.YES);

        doc.add(pathField);//from w  w  w. j av  a 2s. c om
        doc.add(new SortedNumericDocValuesField("modified", lastModified));
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can
            // be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been
            // indexed) so
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.icdd.lucene.CreateIndex.java

License:Apache License

static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    // filter non-xml files
    if (filter.accept(file.toFile())) {

        System.out.println("num: " + num);
        num++;/*from  w  ww.ja va2  s  . co  m*/
        if (num < endset && num >= offset) {

            try (InputStream stream = Files.newInputStream(file)) {
                // make a new,empty document
                Document doc = new Document();

                Field pathField = new StringField("path", file.toString(), Field.Store.YES);
                String filename = file.getFileName().toString();
                int post = filename.indexOf('_');
                if (post > 0) {
                    filename = filename.substring(post + 1, filename.length() - 4);
                }

                doc.add(pathField);
                doc.add(new StringField("title", filename, Field.Store.YES));
                doc.add(new SortedNumericDocValuesField("modified", lastModified));
                doc.add(new TextField("contents",
                        new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old
                    // document
                    // can
                    // be there):
                    logger.info("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have
                    // been
                    // indexed) so
                    // path, if present:
                    logger.info("updating " + file);
                    writer.updateDocument(new Term("path", file.toString()), doc);
                }
            }
        }
    }
}

From source file:com.impetus.kundera.index.LuceneIndexer.java

License:Apache License

/**
 * Indexes document in file system using lucene.
 * /*from w  w  w  .jav a 2s.  c  o m*/
 * @param metadata
 *            the metadata
 * @param document
 *            the document
 */
public void updateDocument(String id, Document document, String EmbeddedEntityFieldName) {
    if (log.isDebugEnabled()) {
        log.debug("Updateing indexed document: {} for in file system using Lucene", document);
    }

    IndexWriter w = getIndexWriter();
    try {
        Term term = null;
        if (EmbeddedEntityFieldName == null) {
            term = new Term(IndexingConstants.ENTITY_ID_FIELD, id);
        } else {
            term = new Term(EmbeddedEntityFieldName, id);
        }
        w.updateDocument(term, document);

    } catch (LuceneIndexingException lie) {
        log.error("Error while updating LuceneIndexer, Caused by :.", lie);
        throw new LuceneIndexingException(lie);
    } catch (IOException ioe) {
        log.error("Error while reading Lucene indexes, Caused by :.", ioe);

    }
}

From source file:com.javapr.plaintextindex.search.Index.java

License:Apache License

public static void indexDocs(IndexWriter writer, File file) throws IOException, SAXException, TikaException {

    // nur lesbare Dateien verwenden
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();

            if (files != null) {
                for (int i = 0; i < files.length; i++) {

                    indexDocs(writer, new File(file, files[i]));

                }/*from  ww w.  j a  v a 2 s  . co m*/
            }
        } else {
            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {

                return;
            }

            try {

                //Word Dokumente mit Tika parsen
                ContentHandler contenthandler = new BodyContentHandler();
                Metadata metadata = new Metadata();
                metadata.set(Metadata.RESOURCE_NAME_KEY, file.getName());
                Parser parser = new AutoDetectParser();
                parser.parse(fis, contenthandler, metadata, new ParseContext());

                // Lucene Dokumenten-Objekt erstellen und geparsten Tika-Inhalt speichern
                Document doc = new Document();

                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                Field filename = new StringField("filename", file.getName(), Field.Store.YES);
                doc.add(filename);

                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                doc.add(new TextField("contents", contenthandler.toString(), Field.Store.NO));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    //neuer Index, wenn neues Dokument
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    long size = file.length() / 1024;
                    list.add(file + ", " + size + "kb");
                    //Index updaten, wenn lteres Index-Dokument schon vorhanden
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:com.lin.studytest.lucene.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try {//from  www .j  av a 2 s.c  o m
        InputStream stream = Files.newInputStream(file);
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongField that is indexed (i.e. efficiently filterable with
        // NumericRangeFilter).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongField("modified", lastModified, Field.Store.NO));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    } finally {

    }
}

From source file:com.lucene.index.test.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);//  w  w w .  j a v a 2  s  . com

        // Add the last modified date of the file a field named "modified".
        // Use a LongField that is indexed (i.e. efficiently filterable with
        // NumericRangeFilter).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongField("modified", lastModified, Field.Store.NO));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            //System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.m3958.apps.pcms.lucene.IndexFiles.java

License:Apache License

static void indexCusValues(IndexWriter writer) throws IOException {
    // make a new, empty document
    Document doc = new Document();

    Field custidField = new StringField("custid", "fhsites-6376743", Field.Store.YES);
    doc.add(custidField);//w w w  . j  a  v  a 2s  .c om

    Field emailField = new StringField("email", "jianglibo@gmail.com", Field.Store.YES);
    doc.add(emailField);

    Field idField = new StringField("_id", "iooweokodkkkkosdodosdoods", Field.Store.YES);
    doc.add(idField);

    Field pathField = new StringField("_sn", "2013-05-0001", Field.Store.YES);
    doc.add(pathField);

    Calendar c = Calendar.getInstance();
    doc.add(new LongField("_createdAt", c.getTimeInMillis(), Field.Store.NO));
    doc.add(new LongField("_updatedAt", c.getTimeInMillis(), Field.Store.NO));

    doc.add(new TextField("contents", new StringReader("hello cusvalue")));

    writer.updateDocument(new Term("_id", "iooweokodkkkkosdodosdoods"), doc);
    writer.deleteDocuments(new Term("_id", "iooweokodkkkkosdodosdoods"));
}

From source file:com.main.Indexer.java

static void indexDoc(IndexWriter writer, Path file, long lastModified)
        throws IOException, SAXException, TikaException {
    try (InputStream stream = Files.newInputStream(file)) {

        BodyContentHandler contenthandler = new BodyContentHandler();
        Metadata metadata = new Metadata();

        metadata.set(Metadata.RESOURCE_NAME_KEY, file.getFileName().toString());

        Parser parser = new AutoDetectParser();
        parser.parse(stream, contenthandler, metadata, new ParseContext());

        String[] metadataNames = metadata.names();
        String fileName = file.getFileName().toString();
        //Create lucene Document
        Document doc = new Document();
        for (String key : metadataNames) {
            //String name = key.toLowerCase();
            String value = metadata.get(key);

            if (StringUtils.isBlank(value)) {
                continue;
            }/* w  ww  .ja  va  2 s  .com*/

            if ("keywords".equalsIgnoreCase(key)) {
                for (String keyword : value.split(",?(\\s+)")) {
                    doc.add(new StringField("name", keyword, Field.Store.YES));
                }
            } else if ("title".equalsIgnoreCase(key)) {
                doc.add(new StringField("name", value, Field.Store.YES));
            } else {
                doc.add(new StringField("name", fileName, Field.Store.YES));
            }

        }
        doc.add(new StringField("path", file.toString(), Field.Store.YES));
        doc.add(new LongPoint("modified", lastModified));
        doc.add(new TextField("contents", contenthandler.toString(), Field.Store.YES));

        writer.updateDocument(new Term("path", file.toString()), doc);

    }
}

From source file:com.mycompany.lucenedemo.IndexFiles.java

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);//from w w w. j ava2s .c o  m

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}