List of usage examples for org.apache.lucene.index IndexWriter updateDocument
private long updateDocument(final DocumentsWriterDeleteQueue.Node<?> delNode, Iterable<? extends IndexableField> doc) throws IOException
From source file:com.example.search.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * /*from w ww. j a va 2 s . c om*/ * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { PageProcess pageProcessor; FileInputStream fis; try { fis = new FileInputStream(file); pageProcessor = new PageProcess(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } WebInfo webInfo; try { // // make a new, empty document while ((webInfo = pageProcessor.next()) != null) { Document doc = new Document(); //process page // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: // Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); // pathField.setIndexOptions(IndexOptions.DOCS_ONLY); // doc.add(pathField); Field urlField = new Field("url", webInfo.url, Field.Store.YES, Field.Index.NO); doc.add(urlField); Field publishidField = new Field("publishid", webInfo.publishid, Field.Store.YES, Field.Index.NO); doc.add(publishidField); Field subjectidField = new Field("subjectid", webInfo.subjectid, Field.Store.YES, Field.Index.NO); doc.add(subjectidField); Field titleField = new Field("title", webInfo.title, Field.Store.YES, Field.Index.NO); doc.add(titleField); doc.add(new Field("keywords", webInfo.keywords, Field.Store.YES, Field.Index.NO)); doc.add(new Field("description", webInfo.description, Field.Store.YES, Field.Index.NO)); doc.add(new Field("content", webInfo.content, Field.Store.YES, Field.Index.ANALYZED)); // Add the last modified date of the file a field named "modified". // Use a NumericField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. // NumericField modifiedField = new NumericField("modified"); // modifiedField.setLongValue(file.lastModified()); // doc.add(modifiedField); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. // doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {// // New index, so we just add the document (no old document can be there): // System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: // System.out.println("updating " + file); writer.updateDocument(new Term("url", webInfo.url), doc); } } //end while } finally { System.out.println("adding " + file); fis.close(); } } } }
From source file:com.flycode.CRIBSearch.SearchEngine.Demo.IndexFiles.java
License:Apache License
/** * Indexes a single document//ww w .ja v a2 s .com */ private static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.fun.sb.demo.lucene.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);//from www .java 2 s. co m // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. // doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.gauronit.tagmata.core.Indexer.java
License:Open Source License
public void saveBookmark(String id, String indexName) { try {// w ww .j av a 2 s. co m IndexWriter mainIndexWriter = new IndexWriter( FSDirectory.open(new File(indexDir + File.separator + MAIN_INDEX)), new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35))); Document doc = new Document(); doc.add(new Field("qcId", id, Store.YES, Index.NOT_ANALYZED)); doc.add(new Field("qcIndexName", indexName, Store.YES, Index.NOT_ANALYZED)); mainIndexWriter.updateDocument(new Term("id", id), doc); mainIndexWriter.prepareCommit(); mainIndexWriter.commit(); mainIndexWriter.close(); mainIndexWriter = null; } catch (Exception ex) { ex.printStackTrace(); } }
From source file:com.github.alvanson.xltsearch.IndexTask.java
License:Apache License
@Override protected Boolean call() { IndexWriter iwriter = null; boolean result = false; updateMessage("started"); try {/*from w w w. j a va2 s . co m*/ int count = 0; Docket docket; IndexWriterConfig iwconfig = new IndexWriterConfig(config.getVersion(), config.getAnalyzer()); iwconfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); iwconfig.setSimilarity(config.getSimilarity()); iwriter = new IndexWriter(config.getDirectory(), iwconfig); while ((docket = inQueue.take()) != Docket.DONE) { count++; updateMessage(docket.relPath); switch (docket.status) { case PARSED: // index parsed file Document doc = new Document(); // store relative path ** must be indexed for updateDocument doc.add(new StringField(config.pathField, docket.relPath, Field.Store.YES)); // index content doc.add(new TextField(config.contentField, docket.content.toString(), Field.Store.NO)); // index standard metadata for (Map.Entry<String, Property> e : config.metadataFields.entrySet()) { for (String value : docket.metadata.getValues(e.getValue())) { doc.add(new TextField(e.getKey(), value, Field.Store.YES)); } } // store hashsum doc.add(new StringField(config.hashSumField, docket.hashSum, Field.Store.YES)); // add/update document iwriter.updateDocument(new Term(config.pathField, docket.relPath), doc); // fall through case PASS: break; case DELETE: iwriter.deleteDocuments(new Term(config.pathField, docket.relPath)); break; default: logger.error("Unexpected docket state while processing {}: {}", docket.relPath, docket.status.toString()); cancel(true); // cancel task } updateProgress(count, count + docket.workLeft); } // end of queue updateMessage("complete"); updateProgress(count, count + docket.workLeft); result = true; } catch (IOException ex) { updateMessage("I/O exception"); logger.error("I/O exception while writing to index", ex); } catch (InterruptedException ex) { if (isCancelled()) { updateMessage("cancelled"); } else { updateMessage("interrupted"); logger.error("Interrupted", ex); } } // close iwriter if (iwriter != null) { try { iwriter.close(); } catch (IOException ex) { logger.warn("I/O exception while closing index writer", ex); } } return result; }
From source file:com.github.mosuka.apache.lucene.example.cmd.UpdateCommand.java
License:Apache License
@Override public void execute(Map<String, Object> attrs) { Map<String, Object> responseMap = new LinkedHashMap<String, Object>(); String responseJSON = null;/*from w ww . ja v a2s .c om*/ Directory indexDir = null; IndexWriter writer = null; try { String index = (String) attrs.get("index"); String uniqueId = (String) attrs.get("unique_id"); String text = (String) attrs.get("text"); indexDir = FSDirectory.open(new File(index).toPath()); Document document = LuceneExampleUtil.createDocument(uniqueId, text); IndexWriterConfig config = new IndexWriterConfig(LuceneExampleUtil.createAnalyzerWrapper()); config.setOpenMode(OpenMode.CREATE_OR_APPEND); writer = new IndexWriter(indexDir, config); writer.updateDocument(new Term("id", document.get("id")), document); writer.commit(); responseMap.put("status", 0); responseMap.put("message", "OK"); } catch (IOException e) { responseMap.put("status", -1); responseMap.put("message", e.getMessage()); } finally { try { if (writer != null) { writer.close(); } } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } try { if (indexDir != null) { indexDir.close(); } } catch (IOException e) { responseMap.put("status", 1); responseMap.put("message", e.getMessage()); } } try { ObjectMapper mapper = new ObjectMapper(); responseJSON = mapper.writeValueAsString(responseMap); } catch (IOException e) { responseJSON = String.format("{\"status\":1, \"message\":\"%s\"}", e.getMessage()); } System.out.println(responseJSON); }
From source file:com.github.rnewson.couchdb.lucene.Progress.java
License:Apache License
public void save(final IndexWriter writer) throws IOException { writer.updateDocument(PROGRESS_TERM, progress); }
From source file:com.github.tenorviol.gitsearch.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the//from www . ja va 2s. co m * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException */ static void indexDocs(IndexWriter writer, File file) throws IOException { // TODO: make these exclusions configurable String fileName = file.getName(); if (fileName.charAt(0) == '.') { return; } int dotLoc = fileName.lastIndexOf('.'); String extension = fileName.substring(dotLoc + 1); // known binary extensions if (extension.equals("jpg") || extension.equals("png") || extension.equals("gif") || extension.equals("pdf") || extension.equals("fla") || extension.equals("flv") || extension.equals("swf") || extension.equals("swz")) { return; } // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); pathField.setIndexOptions(IndexOptions.DOCS_ONLY); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a NumericField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. NumericField modifiedField = new NumericField("modified"); modifiedField.setLongValue(file.lastModified()); doc.add(modifiedField); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:com.github.wxiaoqi.search.lucene.LuceneDao.java
License:Open Source License
public void update(IndexObject indexObject) { IndexWriter indexWriter = null; try {/*from www. j a v a 2s . com*/ Term term = new Term("id", indexObject.getId().toString()); IndexWriterConfig config = new IndexWriterConfig(this.getAnalyzer()); indexWriter = new IndexWriter(this.getDirectory(), config); indexWriter.updateDocument(term, DocumentUtil.IndexObject2Document(indexObject)); } catch (Exception e) { e.printStackTrace(); try { indexWriter.rollback(); } catch (IOException e1) { e1.printStackTrace(); } } finally { try { indexWriter.close(); } catch (IOException e1) { e1.printStackTrace(); } } }
From source file:com.gmail.mosoft521.luceneDemo.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * <p/>/*from w w w. j ava2s . com*/ * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException If there is a low-level I/O error */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }