List of usage examples for org.apache.lucene.index IndexWriter updateDocument
private long updateDocument(final DocumentsWriterDeleteQueue.Node<?> delNode, Iterable<? extends IndexableField> doc) throws IOException
From source file:com.tistory.devyongsik.demo.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * //from ww w . jav a2 s .c o m * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); //10. . } } } else { FileInputStream fis; try { fis = new FileInputStream(file); //11. Stream . } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document //12. . Document Row. Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: //13. Document, Document . // . path path . // . Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); pathField.setOmitTermFreqAndPositions(true); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a NumericField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. //14. Field . // , . NumericField modifiedField = new NumericField("modified"); modifiedField.setLongValue(file.lastModified()); doc.add(modifiedField); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. //15. path, modified, contents Document . // , String, Numeric, Reader // . doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { //16. add... // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); //17. Create or Update update . // 3.X API . writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:com.vnet.demo.service.lucene.LuceneService.java
License:Apache License
public void updateDoc(DocumentData documentData) { IndexWriterConfig config = new IndexWriterConfig(version, analyzer); IndexWriter write = null; try {//from w ww . j av a 2 s .c om write = new IndexWriter(index, config); Document doc = new Document(); doc.add(new LongField("id", documentData.getId(), Field.Store.YES)); doc.add(new TextField("title", documentData.getTitle(), Field.Store.YES)); doc.add(new TextField("summary", documentData.getSummary(), Field.Store.YES)); doc.add(new TextField("context", documentData.getContext(), Field.Store.YES)); doc.add(new LongField("createDate", documentData.getCreateDate(), Field.Store.YES)); write.updateDocument(new Term("id", documentData.getId().toString()), doc); } catch (IOException e) { e.printStackTrace(); } finally { IOUtils.closeQuietly(write); } }
From source file:com.work.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);/* w w w . j av a 2 s.c om*/ // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. // doc.add(new LongPoint("modified", lastModified)); doc.add(new StringField("modified", lastModified + "", Field.Store.YES)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.xpn.xwiki.plugin.lucene.internal.IndexUpdater.java
License:Open Source License
private void addToIndex(IndexWriter writer, AbstractIndexData data, XWikiContext context) throws IOException, XWikiException { LOGGER.debug("addToIndex: [{}]", data); Document luceneDoc = new Document(); data.addDataToLuceneDocument(luceneDoc, context); // collecting all the fields for using up in search for (IndexableField field : luceneDoc.getFields()) { if (!fields.contains(field.name())) { fields.add(field.name());//from w w w. j a va 2 s .co m } } writer.updateDocument(data.getTerm(), luceneDoc); }
From source file:com.yahoo.bard.webservice.data.dimension.impl.LuceneSearchProvider.java
License:Apache License
/** * Update the dimension row in the index. * * @param luceneDimensionRowDoc Document to use for doing the update * @param fieldMap Mapping of DimensionFields to the Document's fields * @param writer Lucene IndexWriter to update the indexes of * @param newRow Row to update/*from www.ja va2s . c o m*/ * * @throws IOException if there is a problem updating the document */ private void updateDimensionRow(Document luceneDimensionRowDoc, Map<DimensionField, Field> fieldMap, IndexWriter writer, DimensionRow newRow) throws IOException { // Update the document fields with each field from the new dimension row for (DimensionField field : dimension.getDimensionFields()) { // Get the field to update from the lookup map Field fieldToUpdate = fieldMap.get(field); // Set field value to updated value fieldToUpdate.setStringValue(newRow.getOrDefault(field, "")); } // Build the term to delete the old document by the key value (which should be unique) Term keyTerm = new Term(fieldMap.get(dimension.getKey()).name(), newRow.getOrDefault(dimension.getKey(), "")); // Update the document by the key term writer.updateDocument(keyTerm, luceneDimensionRowDoc); }
From source file:com.yangxu.searchengine.index.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is * given, recurses over files and directories found under the given * directory.// w ww. j av a 2 s.co m * * NOTE: This method indexes one document per input file. This is slow. For * good throughput, put multiple documents into your input file(s). An * example of this is in the benchmark module, which can create "line doc" * files, one document per line, using the <a href= * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer * Writer to the index where the given file/dir info will be * stored * @param file * The file to index, or the directory to recurse into to find * files to index * @throws IOException */ private void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read // title String titleValue = null; // content String contentValue = null; String urlValue = null; String indextimeValue = null; String uploadtimeValue = null; if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); LineNumberReader reader = new LineNumberReader(new InputStreamReader(fis, "UTF-8")); String line = null; StringBuilder sb = new StringBuilder(); while ((line = reader.readLine()) != null) { // int lineNumber = reader.getLineNumber(); switch (reader.getLineNumber()) { case 1: urlValue = line; break; case 2: uploadtimeValue = line; break; case 3: titleValue = line.split(":")[1]; break; case 4: break; default: sb.append(line); break; } /* * if (reader.getLineNumber() == 1) { urlValue = line; } * * if (reader.getLineNumber() == 3) { titleValue = * line.split(":")[1]; } else if (reader.getLineNumber() * > 4) { sb.append(line); } */ } contentValue = sb.toString(); reader.close(); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this // exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't // tokenize // the field into separate words and don't index term // frequency // or positional information: Field urlField = new Field("url", urlValue, Field.Store.YES, Field.Index.NOT_ANALYZED); urlField.setIndexOptions(IndexOptions.DOCS_ONLY); doc.add(urlField); Field titleField = new Field("title", titleValue, Field.Store.YES, Field.Index.ANALYZED); titleField.setIndexOptions(IndexOptions.DOCS_ONLY); doc.add(titleField); Field contentField = new Field("content", contentValue, Field.Store.YES, Field.Index.ANALYZED); contentField.setIndexOptions(IndexOptions.DOCS_ONLY); doc.add(contentField); // Add the last modified date of the file a field named // "modified". // Use a NumericField that is indexed (i.e. efficiently // filterable with // NumericRangeFilter). This indexes to milli-second // resolution, which // is often too fine. You could instead create a number // based on // year/month/day/hour/minutes/seconds, down the resolution // you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. SimpleDateFormat formatter = new SimpleDateFormat("yyyyMMdd HH:mm:ss"); //Calendar cal = Calendar.getInstance(); // //timeValue = formatter.format(cal.getTime()); Date now = new Date(); indextimeValue = formatter.format(now); Field indextimeField = new Field("indextime", indextimeValue, Field.Store.YES, Field.Index.NOT_ANALYZED); titleField.setIndexOptions(IndexOptions.DOCS_ONLY); doc.add(indextimeField); Field uploadtimeField = new Field("uploadtime", uploadtimeValue, Field.Store.YES, Field.Index.NOT_ANALYZED); titleField.setIndexOptions(IndexOptions.DOCS_ONLY); doc.add(uploadtimeField); // Add the contents of the file to a field named "contents". // Specify a Reader, // so that the text of the file is tokenized and indexed, // but not stored. // Note that FileReader expects the file to be in UTF-8 // encoding. // If that's not the case searching for special characters // will fail. // doc.add(new Field("contents", new BufferedReader( // new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old // document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have // been indexed) so // we use updateDocument instead to replace the old one // matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("url", urlValue), doc); writer.updateDocument(new Term("title", titleValue), doc); writer.updateDocument(new Term("content", contentValue), doc); writer.updateDocument(new Term("indextime", String.valueOf(indextimeValue)), doc); writer.updateDocument(new Term("uploadtime", String.valueOf(uploadtimeValue)), doc); } } finally { fis.close(); } } } }
From source file:com.zghw.lucene.demo.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * /*w w w . j a va2s. com*/ * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException If there is a low-level I/O error */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:com.zsq.lucene.chapter1.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);/*from w w w . j a v a 2s .co m*/ // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 4 would mean // February 17, 1, 2-3 PM. doc.add(new LongField("modified", lastModified, Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:concurrency.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);// w w w . j a va2 s .com // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, // which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you // require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", lastModified, Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify // a Reader, // so that the text of the file is tokenized and indexed, but not // stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will // fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can // be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been // indexed) so // we use updateDocument instead to replace the old one matching // the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:cs412.project.search.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * //from ww w .j a va 2 s . c om * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException If there is a low-level I/O error */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.YES)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): // System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); writer.updateDocument(new Term("modified", Long.toString(file.lastModified())), doc); } } finally { fis.close(); } } } }