Example usage for org.apache.lucene.index IndexWriter updateDocument

List of usage examples for org.apache.lucene.index IndexWriter updateDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter updateDocument.

Prototype

private long updateDocument(final DocumentsWriterDeleteQueue.Node<?> delNode,
            Iterable<? extends IndexableField> doc) throws IOException 

Source Link

Usage

From source file:com.heejong.lucene.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);/*from   w ww. j  a v  a2s  . c  o  m*/

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 4 would mean
        // February 17, 1, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.icdd.lucence.IndexFiles.java

License:Apache License

static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new,empty document
        Document doc = new Document();

        Field pathField = new StringField("path", file.toString(), Field.Store.YES);

        doc.add(pathField);//from w  w  w. j av  a 2s. c om
        doc.add(new SortedNumericDocValuesField("modified", lastModified));
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can
            // be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been
            // indexed) so
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.icdd.lucene.CreateIndex.java

License:Apache License

static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    // filter non-xml files
    if (filter.accept(file.toFile())) {

        System.out.println("num: " + num);
        num++;/*from  w  ww.ja va2  s  . co  m*/
        if (num < endset && num >= offset) {

            try (InputStream stream = Files.newInputStream(file)) {
                // make a new,empty document
                Document doc = new Document();

                Field pathField = new StringField("path", file.toString(), Field.Store.YES);
                String filename = file.getFileName().toString();
                int post = filename.indexOf('_');
                if (post > 0) {
                    filename = filename.substring(post + 1, filename.length() - 4);
                }

                doc.add(pathField);
                doc.add(new StringField("title", filename, Field.Store.YES));
                doc.add(new SortedNumericDocValuesField("modified", lastModified));
                doc.add(new TextField("contents",
                        new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old
                    // document
                    // can
                    // be there):
                    logger.info("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have
                    // been
                    // indexed) so
                    // path, if present:
                    logger.info("updating " + file);
                    writer.updateDocument(new Term("path", file.toString()), doc);
                }
            }
        }
    }
}

From source file:com.impetus.kundera.index.LuceneIndexer.java

License:Apache License

/**
 * Indexes document in file system using lucene.
 * /*from w  w  w  .jav a 2s.  c  o m*/
 * @param metadata
 *            the metadata
 * @param document
 *            the document
 */
public void updateDocument(String id, Document document, String EmbeddedEntityFieldName) {
    if (log.isDebugEnabled()) {
        log.debug("Updateing indexed document: {} for in file system using Lucene", document);
    }

    IndexWriter w = getIndexWriter();
    try {
        Term term = null;
        if (EmbeddedEntityFieldName == null) {
            term = new Term(IndexingConstants.ENTITY_ID_FIELD, id);
        } else {
            term = new Term(EmbeddedEntityFieldName, id);
        }
        w.updateDocument(term, document);

    } catch (LuceneIndexingException lie) {
        log.error("Error while updating LuceneIndexer, Caused by :.", lie);
        throw new LuceneIndexingException(lie);
    } catch (IOException ioe) {
        log.error("Error while reading Lucene indexes, Caused by :.", ioe);

    }
}

From source file:com.javapr.plaintextindex.search.Index.java

License:Apache License

public static void indexDocs(IndexWriter writer, File file) throws IOException, SAXException, TikaException {

    // nur lesbare Dateien verwenden
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();

            if (files != null) {
                for (int i = 0; i < files.length; i++) {

                    indexDocs(writer, new File(file, files[i]));

                }/*from  ww w.  j a  v a 2 s  . co m*/
            }
        } else {
            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {

                return;
            }

            try {

                //Word Dokumente mit Tika parsen
                ContentHandler contenthandler = new BodyContentHandler();
                Metadata metadata = new Metadata();
                metadata.set(Metadata.RESOURCE_NAME_KEY, file.getName());
                Parser parser = new AutoDetectParser();
                parser.parse(fis, contenthandler, metadata, new ParseContext());

                // Lucene Dokumenten-Objekt erstellen und geparsten Tika-Inhalt speichern
                Document doc = new Document();

                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                Field filename = new StringField("filename", file.getName(), Field.Store.YES);
                doc.add(filename);

                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                doc.add(new TextField("contents", contenthandler.toString(), Field.Store.NO));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    //neuer Index, wenn neues Dokument
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    long size = file.length() / 1024;
                    list.add(file + ", " + size + "kb");
                    //Index updaten, wenn lteres Index-Dokument schon vorhanden
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:com.lin.studytest.lucene.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try {//from  www .j  av a 2 s.c  o m
        InputStream stream = Files.newInputStream(file);
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongField that is indexed (i.e. efficiently filterable with
        // NumericRangeFilter).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongField("modified", lastModified, Field.Store.NO));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    } finally {

    }
}

From source file:com.lucene.index.test.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);//  w  w w .  j a v a 2  s  . com

        // Add the last modified date of the file a field named "modified".
        // Use a LongField that is indexed (i.e. efficiently filterable with
        // NumericRangeFilter).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongField("modified", lastModified, Field.Store.NO));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            //System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.m3958.apps.pcms.lucene.IndexFiles.java

License:Apache License

static void indexCusValues(IndexWriter writer) throws IOException {
    // make a new, empty document
    Document doc = new Document();

    Field custidField = new StringField("custid", "fhsites-6376743", Field.Store.YES);
    doc.add(custidField);//w w w  . j  a  v  a 2s  .c om

    Field emailField = new StringField("email", "jianglibo@gmail.com", Field.Store.YES);
    doc.add(emailField);

    Field idField = new StringField("_id", "iooweokodkkkkosdodosdoods", Field.Store.YES);
    doc.add(idField);

    Field pathField = new StringField("_sn", "2013-05-0001", Field.Store.YES);
    doc.add(pathField);

    Calendar c = Calendar.getInstance();
    doc.add(new LongField("_createdAt", c.getTimeInMillis(), Field.Store.NO));
    doc.add(new LongField("_updatedAt", c.getTimeInMillis(), Field.Store.NO));

    doc.add(new TextField("contents", new StringReader("hello cusvalue")));

    writer.updateDocument(new Term("_id", "iooweokodkkkkosdodosdoods"), doc);
    writer.deleteDocuments(new Term("_id", "iooweokodkkkkosdodosdoods"));
}

From source file:com.main.Indexer.java

static void indexDoc(IndexWriter writer, Path file, long lastModified)
        throws IOException, SAXException, TikaException {
    try (InputStream stream = Files.newInputStream(file)) {

        BodyContentHandler contenthandler = new BodyContentHandler();
        Metadata metadata = new Metadata();

        metadata.set(Metadata.RESOURCE_NAME_KEY, file.getFileName().toString());

        Parser parser = new AutoDetectParser();
        parser.parse(stream, contenthandler, metadata, new ParseContext());

        String[] metadataNames = metadata.names();
        String fileName = file.getFileName().toString();
        //Create lucene Document
        Document doc = new Document();
        for (String key : metadataNames) {
            //String name = key.toLowerCase();
            String value = metadata.get(key);

            if (StringUtils.isBlank(value)) {
                continue;
            }/* w  ww  .ja  va  2 s  .com*/

            if ("keywords".equalsIgnoreCase(key)) {
                for (String keyword : value.split(",?(\\s+)")) {
                    doc.add(new StringField("name", keyword, Field.Store.YES));
                }
            } else if ("title".equalsIgnoreCase(key)) {
                doc.add(new StringField("name", value, Field.Store.YES));
            } else {
                doc.add(new StringField("name", fileName, Field.Store.YES));
            }

        }
        doc.add(new StringField("path", file.toString(), Field.Store.YES));
        doc.add(new LongPoint("modified", lastModified));
        doc.add(new TextField("contents", contenthandler.toString(), Field.Store.YES));

        writer.updateDocument(new Term("path", file.toString()), doc);

    }
}

From source file:com.mycompany.lucenedemo.IndexFiles.java

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);//from w w w. j ava2s .c o  m

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}