List of usage examples for org.apache.lucene.index IndexWriter updateDocument
private long updateDocument(final DocumentsWriterDeleteQueue.Node<?> delNode, Iterable<? extends IndexableField> doc) throws IOException
From source file:com.mycompany.restlet.search.sample.indexer.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { InputStream stream = Files.newInputStream(file); // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);//w ww. ja v a 2s .com // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", lastModified, Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } }
From source file:com.nero.model.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * /* w w w . j a v a 2 s .c o m*/ * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); pathField.setIndexOptions(IndexOptions.DOCS_ONLY); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a NumericField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. NumericField modifiedField = new NumericField("modified"); modifiedField.setLongValue(file.lastModified()); doc.add(modifiedField); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:com.paladin.common.LuceneHelper.java
License:Apache License
/** * ?// w w w .j ava2 s . co m * //TODO:??? * * @param writer * @param table */ private static void indexTable(IndexWriter writer, String table) throws IOException { String sql = "SELECT ID, TITLE, CONTENT, TAG, CREATE_DATE FROM " + table.toUpperCase(); if (table.equalsIgnoreCase("motto")) sql = "SELECT ID, CONTENT, TAG, CREATE_DATE FROM " + table.toUpperCase(); List<Map<String, Object>> blogs = QueryHelper.queryList(sql); for (Map<String, Object> blog : blogs) { Document doc = new Document(); Field id_field = new Field("id", blog.get("ID").toString(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); // ? StringBuilder builder = new StringBuilder(); if (table.equalsIgnoreCase("motto")) builder.append(blog.get("CONTENT")); else builder.append(blog.get("TITLE")); builder.append(Constants.LUCENE_FIELD_SEP); builder.append(blog.get("CONTENT")); builder.append(Constants.LUCENE_FIELD_SEP); builder.append(blog.get("TAG")); Field t_c_t_field = new Field("title_content_tag", builder.toString(), Field.Store.YES, Field.Index.ANALYZED); doc.add(id_field); doc.add(t_c_t_field); if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) writer.addDocument(doc); else// id?? writer.updateDocument(new Term("id", blog.get("ID").toString()), doc); } }
From source file:com.paladin.sys.lucene.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * <p/>//from w w w. java 2s . co m * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (!file.canRead()) return; if (file.isDirectory() && file.list() != null) { String[] files = file.list(); for (int i = 0; i < files.length; i++) indexDocs(writer, new File(file, files[i])); } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException e) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { Document doc = new Document(); // Add the path of the file as a field named "path". Use a field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency or positional information: Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); pathField.setOmitTermFreqAndPositions(true); doc.add(pathField); // Add the last modified date of the file a field named "modified". Use a NumericField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. NumericField modifiedField = new NumericField("modified"); modifiedField.setLongValue(file.lastModified()); doc.add(modifiedField); // Add the contents of the file to a field named "contents". Specify a Reader, so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. If that's not the case searching for special characters will fail. doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { out.println("adding " + file); writer.addDocument(doc); } else { out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } }
From source file:com.searchcode.app.service.CodeIndexer.java
License:Open Source License
/** * Given a queue of documents to index, index them by popping the queue limited to default of 1000 items. * This method must be synchronized as we have not added any logic to deal with multiple threads writing to the * index./*from ww w.j a v a 2 s .com*/ * TODO investigate how Lucene deals with multiple writes */ public synchronized void indexDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException { Directory indexDirectory = FSDirectory.open(this.INDEX_LOCATION); Directory facetDirectory = FSDirectory.open(this.FACET_LOCATION); Analyzer analyzer = new CodeAnalyzer(); IndexWriterConfig indexWriterConfig = new IndexWriterConfig(analyzer); FacetsConfig facetsConfig; indexWriterConfig.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(indexDirectory, indexWriterConfig); TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(facetDirectory); try { CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll(); int count = 0; while (codeIndexDocument != null) { Singleton.getLogger() .info("Indexing file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename()); this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines()); facetsConfig = new FacetsConfig(); facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME); facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME); facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER); Document doc = this.buildDocument(codeIndexDocument); writer.updateDocument( new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()), facetsConfig.build(taxonomyWriter, doc)); count++; if (count >= INDEX_QUEUE_BATCH_SIZE) { codeIndexDocument = null; } else { codeIndexDocument = codeIndexDocumentQueue.poll(); } } } finally { try { writer.close(); } finally { taxonomyWriter.close(); } Singleton.getLogger().info("Closing writers"); } }
From source file:com.searchcode.app.service.CodeIndexer.java
License:Open Source License
/** * Given a queue of documents to index, index them by popping the queue limited to 1000 items. * This method must be synchronized as we have not added any logic to deal with multiple threads writing to the * index.//from www. j a v a 2 s. co m * TODO investigate how Lucene deals with multiple writes */ public synchronized void indexTimeDocuments(Queue<CodeIndexDocument> codeIndexDocumentQueue) throws IOException { // Index all documents and commit at the end for performance gains Directory dir = FSDirectory.open(Paths.get( Properties.getProperties().getProperty(Values.TIMEINDEXLOCATION, Values.DEFAULTTIMEINDEXLOCATION))); Directory facetsdir = FSDirectory.open(Paths.get(Properties.getProperties() .getProperty(Values.TIMEINDEXFACETLOCATION, Values.DEFAULTTIMEINDEXFACETLOCATION))); Analyzer analyzer = new CodeAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); FacetsConfig facetsConfig; iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); IndexWriter writer = new IndexWriter(dir, iwc); TaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(facetsdir); try { CodeIndexDocument codeIndexDocument = codeIndexDocumentQueue.poll(); int count = 0; while (codeIndexDocument != null) { Singleton.getLogger() .info("Indexing time file " + codeIndexDocument.getRepoLocationRepoNameLocationFilename()); this.sharedService.decrementCodeIndexLinesCount(codeIndexDocument.getCodeLines()); Document doc = new Document(); // Path is the primary key for documents // needs to include repo location, project name and then filepath including file and revision Field pathField = new StringField("path", codeIndexDocument.getRepoLocationRepoNameLocationFilename() + ":" + codeIndexDocument.getRevision(), Field.Store.YES); doc.add(pathField); // Add in facets facetsConfig = new FacetsConfig(); facetsConfig.setIndexFieldName(Values.LANGUAGENAME, Values.LANGUAGENAME); facetsConfig.setIndexFieldName(Values.REPONAME, Values.REPONAME); facetsConfig.setIndexFieldName(Values.CODEOWNER, Values.CODEOWNER); facetsConfig.setIndexFieldName(Values.DATEYEARMONTHDAY, Values.DATEYEARMONTHDAY); facetsConfig.setIndexFieldName(Values.DATEYEARMONTH, Values.DATEYEARMONTH); facetsConfig.setIndexFieldName(Values.DATEYEAR, Values.DATEYEAR); facetsConfig.setIndexFieldName(Values.REVISION, Values.REVISION); facetsConfig.setIndexFieldName(Values.DELETED, Values.DELETED); if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getLanguageName()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName())); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRepoName()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.REPONAME, codeIndexDocument.getRepoName())); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getCodeOwner()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.CODEOWNER, codeIndexDocument.getCodeOwner())); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTHDAY, codeIndexDocument.getYearMonthDay())); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.DATEYEARMONTH, codeIndexDocument.getYearMonthDay().substring(0, 6))); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getYearMonthDay()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.DATEYEAR, codeIndexDocument.getYearMonthDay().substring(0, 4))); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.getRevision()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.REVISION, codeIndexDocument.getRevision())); } if (Singleton.getHelpers().isNullEmptyOrWhitespace(codeIndexDocument.isDeleted()) == false) { doc.add(new SortedSetDocValuesFacetField(Values.DELETED, codeIndexDocument.isDeleted())); } String indexContents = Values.EMPTYSTRING; indexContents += this.searchcodeLib.splitKeywords(codeIndexDocument.getContents()); indexContents += this.searchcodeLib.codeCleanPipeline(codeIndexDocument.getContents()); this.searchcodeLib.addToSpellingCorrector(codeIndexDocument.getContents()); // Store in spelling corrector indexContents = indexContents.toLowerCase(); doc.add(new TextField(Values.REPONAME, codeIndexDocument.getRepoName(), Field.Store.YES)); doc.add(new TextField(Values.FILENAME, codeIndexDocument.getFileName(), Field.Store.YES)); doc.add(new TextField(Values.FILELOCATION, codeIndexDocument.getFileLocation(), Field.Store.YES)); doc.add(new TextField(Values.FILELOCATIONFILENAME, codeIndexDocument.getFileLocationFilename(), Field.Store.YES)); doc.add(new TextField(Values.MD5HASH, codeIndexDocument.getMd5hash(), Field.Store.YES)); doc.add(new TextField(Values.LANGUAGENAME, codeIndexDocument.getLanguageName(), Field.Store.YES)); doc.add(new IntField(Values.CODELINES, codeIndexDocument.getCodeLines(), Field.Store.YES)); doc.add(new TextField(Values.CONTENTS, indexContents, Field.Store.NO)); doc.add(new TextField(Values.REPOLOCATION, codeIndexDocument.getRepoRemoteLocation(), Field.Store.YES)); doc.add(new TextField(Values.CODEOWNER, codeIndexDocument.getCodeOwner(), Field.Store.YES)); doc.add(new TextField(Values.REVISION, codeIndexDocument.getRevision(), Field.Store.YES)); doc.add(new TextField(Values.DATEYEARMONTHDAY, codeIndexDocument.getYearMonthDay(), Field.Store.YES)); doc.add(new TextField(Values.DATEYEARMONTH, codeIndexDocument.getYearMonth(), Field.Store.YES)); doc.add(new TextField(Values.DATEYEAR, codeIndexDocument.getYear(), Field.Store.YES)); doc.add(new TextField(Values.MESSAGE, codeIndexDocument.getMessage(), Field.Store.YES)); doc.add(new TextField(Values.DELETED, codeIndexDocument.isDeleted(), Field.Store.YES)); // Extra metadata in this case when it was last indexed doc.add(new LongField(Values.MODIFIED, new Date().getTime(), Field.Store.YES)); writer.updateDocument( new Term(Values.PATH, codeIndexDocument.getRepoLocationRepoNameLocationFilename()), facetsConfig.build(taxoWriter, doc)); count++; if (count >= INDEX_QUEUE_BATCH_SIZE) { codeIndexDocument = null; } else { codeIndexDocument = codeIndexDocumentQueue.poll(); } } } finally { Singleton.getLogger().info("Closing writers"); writer.close(); taxoWriter.close(); } }
From source file:com.sg.business.vault.index.demo.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * // ww w .java 2 s. com * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException If there is a low-level I/O error */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); //$NON-NLS-1$ doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. // doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. // doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); try { doc.add(new TextField("contents", FileUtil.getContent(file.getName(), fis), //$NON-NLS-1$ Field.Store.NO)); } catch (Exception e) { e.printStackTrace(); } if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); //$NON-NLS-1$ writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); //$NON-NLS-1$ writer.updateDocument(new Term("path", file.getPath()), doc); //$NON-NLS-1$ } } finally { fis.close(); } } } }
From source file:com.slieer.app.lecene3x.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is * given, recurses over files and directories found under the given * directory.//from w w w.jav a2 s . co m * * NOTE: This method indexes one document per input file. This is slow. For * good throughput, put multiple documents into your input file(s). An * example of this is in the benchmark module, which can create "line doc" * files, one document per line, using the <a href= * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer * Writer to the index where the given file/dir info will be * stored * @param file * The file to index, or the directory to recurse into to find * files to index * @throws IOException * If there is a low-level I/O error */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this // exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't // tokenize // the field into separate words and don't index term // frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named // "modified". // Use a LongField that is indexed (i.e. efficiently // filterable with // NumericRangeFilter). This indexes to milli-second // resolution, which // is often too fine. You could instead create a number // based on // year/month/day/hour/minutes/seconds, down the resolution // you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". // Specify a Reader, // so that the text of the file is tokenized and indexed, // but not stored. // Note that FileReader expects the file to be in UTF-8 // encoding. // If that's not the case searching for special characters // will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old // document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have // been indexed) so // we use updateDocument instead to replace the old one // matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:com.slieer.lucene.apachedemo.IndexFiles.java
License:Apache License
static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); }//from w w w.j a v a2 s .c om } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this // exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't // tokenize // the field into separate words and don't index term // frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named // "modified". // Use a LongField that is indexed (i.e. efficiently // filterable with // NumericRangeFilter). This indexes to milli-second // resolution, which // is often too fine. You could instead create a number // based on // year/month/day/hour/minutes/seconds, down the resolution // you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". // Specify a Reader, // so that the text of the file is tokenized and indexed, // but not stored. // Note that FileReader expects the file to be in UTF-8 // encoding. // If that's not the case searching for special characters // will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old // document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have // been indexed) so // we use updateDocument instead to replace the old one // matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:com.sxc.lucene.index.IndexingTest.java
License:Apache License
public void testUpdate() throws IOException { assertEquals(1, getHitCount("city", "amsterdam")); IndexWriter writer = getWriter(); Document doc = new Document(); // A doc.add(new StringField("id", "1", Field.Store.YES)); // A doc.add(new TextField("country", "Netherlands", Field.Store.YES)); // A doc.add(new TextField("contents", "Den Haag has a lot of museums", Field.Store.NO)); // A doc.add(new TextField("city", "Den Haag", Field.Store.YES)); // A writer.updateDocument(new Term("id", "1"), // B doc); // B writer.close();/*ww w . j ava 2s. c o m*/ assertEquals(0, getHitCount("city", "amsterdam"));// C assertEquals(1, getHitCount("city", "haag")); // D }