Example usage for org.apache.lucene.index IndexWriter updateDocument

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter updateDocument.

Prototype

private long updateDocument(final DocumentsWriterDeleteQueue.Node<?> delNode,
            Iterable<? extends IndexableField> doc) throws IOException

Source Link

Usage

From source file:com.example.search.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * /*from   w  ww.  j a va  2  s . c  om*/
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {
            PageProcess pageProcessor;
            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
                pageProcessor = new PageProcess(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }
            WebInfo webInfo;
            try {
                //
                // make a new, empty document
                while ((webInfo = pageProcessor.next()) != null) {
                    Document doc = new Document();
                    //process page
                    // Add the path of the file as a field named "path".  Use a
                    // field that is indexed (i.e. searchable), but don't tokenize 
                    // the field into separate words and don't index term frequency
                    // or positional information:
                    //          Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS);
                    //          pathField.setIndexOptions(IndexOptions.DOCS_ONLY);
                    //          doc.add(pathField);
                    Field urlField = new Field("url", webInfo.url, Field.Store.YES, Field.Index.NO);
                    doc.add(urlField);
                    Field publishidField = new Field("publishid", webInfo.publishid, Field.Store.YES,
                            Field.Index.NO);
                    doc.add(publishidField);
                    Field subjectidField = new Field("subjectid", webInfo.subjectid, Field.Store.YES,
                            Field.Index.NO);
                    doc.add(subjectidField);
                    Field titleField = new Field("title", webInfo.title, Field.Store.YES, Field.Index.NO);
                    doc.add(titleField);
                    doc.add(new Field("keywords", webInfo.keywords, Field.Store.YES, Field.Index.NO));
                    doc.add(new Field("description", webInfo.description, Field.Store.YES, Field.Index.NO));
                    doc.add(new Field("content", webInfo.content, Field.Store.YES, Field.Index.ANALYZED));
                    // Add the last modified date of the file a field named "modified".
                    // Use a NumericField that is indexed (i.e. efficiently filterable with
                    // NumericRangeFilter).  This indexes to milli-second resolution, which
                    // is often too fine.  You could instead create a number based on
                    // year/month/day/hour/minutes/seconds, down the resolution you require.
                    // For example the long value 2011021714 would mean
                    // February 17, 2011, 2-3 PM.
                    //          NumericField modifiedField = new NumericField("modified");
                    //          modifiedField.setLongValue(file.lastModified());
                    //          doc.add(modifiedField);

                    // Add the contents of the file to a field named "contents".  Specify a Reader,
                    // so that the text of the file is tokenized and indexed, but not stored.
                    // Note that FileReader expects the file to be in UTF-8 encoding.
                    // If that's not the case searching for special characters will fail.
                    //          doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                    if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {//
                        // New index, so we just add the document (no old document can be there):
                        //            System.out.println("adding " + file);
                        writer.addDocument(doc);
                    } else {
                        // Existing index (an old copy of this document may have been indexed) so 
                        // we use updateDocument instead to replace the old one matching the exact 
                        // path, if present:
                        //            System.out.println("updating " + file);
                        writer.updateDocument(new Term("url", webInfo.url), doc);
                    }
                } //end while
            } finally {
                System.out.println("adding " + file);
                fis.close();
            }
        }
    }
}

From source file:com.flycode.CRIBSearch.SearchEngine.Demo.IndexFiles.java

License:Apache License

/**
 * Indexes a single document//ww w  .ja  v a2  s  .com
 */
private static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so
            // we use updateDocument instead to replace the old one matching the exact
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.fun.sb.demo.lucene.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);//from www .java 2  s. co  m

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        //      doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:com.gauronit.tagmata.core.Indexer.java

License:Open Source License

public void saveBookmark(String id, String indexName) {
    try {//  w ww .j  av a  2  s.  co  m
        IndexWriter mainIndexWriter = new IndexWriter(
                FSDirectory.open(new File(indexDir + File.separator + MAIN_INDEX)),
                new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));

        Document doc = new Document();
        doc.add(new Field("qcId", id, Store.YES, Index.NOT_ANALYZED));
        doc.add(new Field("qcIndexName", indexName, Store.YES, Index.NOT_ANALYZED));

        mainIndexWriter.updateDocument(new Term("id", id), doc);
        mainIndexWriter.prepareCommit();
        mainIndexWriter.commit();
        mainIndexWriter.close();
        mainIndexWriter = null;
    } catch (Exception ex) {
        ex.printStackTrace();
    }
}

From source file:com.github.alvanson.xltsearch.IndexTask.java

License:Apache License

@Override
protected Boolean call() {
    IndexWriter iwriter = null;
    boolean result = false;

    updateMessage("started");
    try {/*from w w w. j  a  va2 s  . co m*/
        int count = 0;
        Docket docket;

        IndexWriterConfig iwconfig = new IndexWriterConfig(config.getVersion(), config.getAnalyzer());
        iwconfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        iwconfig.setSimilarity(config.getSimilarity());
        iwriter = new IndexWriter(config.getDirectory(), iwconfig);

        while ((docket = inQueue.take()) != Docket.DONE) {
            count++;
            updateMessage(docket.relPath);
            switch (docket.status) {
            case PARSED:
                // index parsed file
                Document doc = new Document();
                // store relative path  ** must be indexed for updateDocument
                doc.add(new StringField(config.pathField, docket.relPath, Field.Store.YES));
                // index content
                doc.add(new TextField(config.contentField, docket.content.toString(), Field.Store.NO));
                // index standard metadata
                for (Map.Entry<String, Property> e : config.metadataFields.entrySet()) {
                    for (String value : docket.metadata.getValues(e.getValue())) {
                        doc.add(new TextField(e.getKey(), value, Field.Store.YES));
                    }
                }
                // store hashsum
                doc.add(new StringField(config.hashSumField, docket.hashSum, Field.Store.YES));
                // add/update document
                iwriter.updateDocument(new Term(config.pathField, docket.relPath), doc);
                // fall through
            case PASS:
                break;
            case DELETE:
                iwriter.deleteDocuments(new Term(config.pathField, docket.relPath));
                break;
            default:
                logger.error("Unexpected docket state while processing {}: {}", docket.relPath,
                        docket.status.toString());
                cancel(true); // cancel task
            }
            updateProgress(count, count + docket.workLeft);
        }
        // end of queue
        updateMessage("complete");
        updateProgress(count, count + docket.workLeft);
        result = true;
    } catch (IOException ex) {
        updateMessage("I/O exception");
        logger.error("I/O exception while writing to index", ex);
    } catch (InterruptedException ex) {
        if (isCancelled()) {
            updateMessage("cancelled");
        } else {
            updateMessage("interrupted");
            logger.error("Interrupted", ex);
        }
    }
    // close iwriter
    if (iwriter != null) {
        try {
            iwriter.close();
        } catch (IOException ex) {
            logger.warn("I/O exception while closing index writer", ex);
        }
    }
    return result;
}

From source file:com.github.mosuka.apache.lucene.example.cmd.UpdateCommand.java

License:Apache License

@Override
public void execute(Map<String, Object> attrs) {
    Map<String, Object> responseMap = new LinkedHashMap<String, Object>();

    String responseJSON = null;/*from w ww . ja  v a2s  .c om*/
    Directory indexDir = null;
    IndexWriter writer = null;

    try {
        String index = (String) attrs.get("index");
        String uniqueId = (String) attrs.get("unique_id");
        String text = (String) attrs.get("text");

        indexDir = FSDirectory.open(new File(index).toPath());

        Document document = LuceneExampleUtil.createDocument(uniqueId, text);

        IndexWriterConfig config = new IndexWriterConfig(LuceneExampleUtil.createAnalyzerWrapper());
        config.setOpenMode(OpenMode.CREATE_OR_APPEND);

        writer = new IndexWriter(indexDir, config);
        writer.updateDocument(new Term("id", document.get("id")), document);
        writer.commit();

        responseMap.put("status", 0);
        responseMap.put("message", "OK");
    } catch (IOException e) {
        responseMap.put("status", -1);
        responseMap.put("message", e.getMessage());
    } finally {
        try {
            if (writer != null) {
                writer.close();
            }
        } catch (IOException e) {
            responseMap.put("status", 1);
            responseMap.put("message", e.getMessage());
        }
        try {
            if (indexDir != null) {
                indexDir.close();
            }
        } catch (IOException e) {
            responseMap.put("status", 1);
            responseMap.put("message", e.getMessage());
        }
    }

    try {
        ObjectMapper mapper = new ObjectMapper();
        responseJSON = mapper.writeValueAsString(responseMap);
    } catch (IOException e) {
        responseJSON = String.format("{\"status\":1, \"message\":\"%s\"}", e.getMessage());
    }
    System.out.println(responseJSON);
}

From source file:com.github.rnewson.couchdb.lucene.Progress.java

License:Apache License

public void save(final IndexWriter writer) throws IOException {
    writer.updateDocument(PROGRESS_TERM, progress);
}

From source file:com.github.tenorviol.gitsearch.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 *
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the//from www  . ja  va 2s.  co m
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // TODO: make these exclusions configurable
    String fileName = file.getName();
    if (fileName.charAt(0) == '.') {
        return;
    }
    int dotLoc = fileName.lastIndexOf('.');
    String extension = fileName.substring(dotLoc + 1);
    // known binary extensions
    if (extension.equals("jpg") || extension.equals("png") || extension.equals("gif") || extension.equals("pdf")
            || extension.equals("fla") || extension.equals("flv") || extension.equals("swf")
            || extension.equals("swz")) {
        return;
    }

    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new Field("path", file.getPath(), Field.Store.YES,
                        Field.Index.NOT_ANALYZED_NO_NORMS);
                pathField.setIndexOptions(IndexOptions.DOCS_ONLY);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a NumericField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                NumericField modifiedField = new NumericField("modified");
                modifiedField.setLongValue(file.lastModified());
                doc.add(modifiedField);

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so
                    // we use updateDocument instead to replace the old one matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:com.github.wxiaoqi.search.lucene.LuceneDao.java

License:Open Source License

public void update(IndexObject indexObject) {

    IndexWriter indexWriter = null;

    try {/*from   www.  j  a v  a 2s  .  com*/

        Term term = new Term("id", indexObject.getId().toString());
        IndexWriterConfig config = new IndexWriterConfig(this.getAnalyzer());
        indexWriter = new IndexWriter(this.getDirectory(), config);
        indexWriter.updateDocument(term, DocumentUtil.IndexObject2Document(indexObject));

    } catch (Exception e) {
        e.printStackTrace();
        try {
            indexWriter.rollback();
        } catch (IOException e1) {
            e1.printStackTrace();
        }
    } finally {
        try {
            indexWriter.close();
        } catch (IOException e1) {
            e1.printStackTrace();
        }
    }
}

From source file:com.gmail.mosoft521.luceneDemo.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * <p/>/*from w w  w.  j ava2s . com*/
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * &gt;WriteLineDocTask</a&gt;.
 *
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file   The file to index, or the directory to recurse into to find files to index
 * @throws IOException If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a LongField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new TextField("contents",
                        new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so
                    // we use updateDocument instead to replace the old one matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}