List of usage examples for org.apache.lucene.index IndexWriter updateDocument
private long updateDocument(final DocumentsWriterDeleteQueue.Node<?> delNode, Iterable<? extends IndexableField> doc) throws IOException
From source file:com.heejong.lucene.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);/*from w ww. j a v a2s . c o m*/ // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 4 would mean // February 17, 1, 2-3 PM. doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.icdd.lucence.IndexFiles.java
License:Apache License
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new,empty document Document doc = new Document(); Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);//from w w w. j av a 2s. c om doc.add(new SortedNumericDocValuesField("modified", lastModified)); doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can // be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been // indexed) so // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.icdd.lucene.CreateIndex.java
License:Apache License
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { // filter non-xml files if (filter.accept(file.toFile())) { System.out.println("num: " + num); num++;/*from w ww.ja va2 s . co m*/ if (num < endset && num >= offset) { try (InputStream stream = Files.newInputStream(file)) { // make a new,empty document Document doc = new Document(); Field pathField = new StringField("path", file.toString(), Field.Store.YES); String filename = file.getFileName().toString(); int post = filename.indexOf('_'); if (post > 0) { filename = filename.substring(post + 1, filename.length() - 4); } doc.add(pathField); doc.add(new StringField("title", filename, Field.Store.YES)); doc.add(new SortedNumericDocValuesField("modified", lastModified)); doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old // document // can // be there): logger.info("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have // been // indexed) so // path, if present: logger.info("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } } } }
From source file:com.impetus.kundera.index.LuceneIndexer.java
License:Apache License
/** * Indexes document in file system using lucene. * /*from w w w .jav a 2s. c o m*/ * @param metadata * the metadata * @param document * the document */ public void updateDocument(String id, Document document, String EmbeddedEntityFieldName) { if (log.isDebugEnabled()) { log.debug("Updateing indexed document: {} for in file system using Lucene", document); } IndexWriter w = getIndexWriter(); try { Term term = null; if (EmbeddedEntityFieldName == null) { term = new Term(IndexingConstants.ENTITY_ID_FIELD, id); } else { term = new Term(EmbeddedEntityFieldName, id); } w.updateDocument(term, document); } catch (LuceneIndexingException lie) { log.error("Error while updating LuceneIndexer, Caused by :.", lie); throw new LuceneIndexingException(lie); } catch (IOException ioe) { log.error("Error while reading Lucene indexes, Caused by :.", ioe); } }
From source file:com.javapr.plaintextindex.search.Index.java
License:Apache License
public static void indexDocs(IndexWriter writer, File file) throws IOException, SAXException, TikaException { // nur lesbare Dateien verwenden if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); }/*from ww w. j a v a 2 s . co m*/ } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { return; } try { //Word Dokumente mit Tika parsen ContentHandler contenthandler = new BodyContentHandler(); Metadata metadata = new Metadata(); metadata.set(Metadata.RESOURCE_NAME_KEY, file.getName()); Parser parser = new AutoDetectParser(); parser.parse(fis, contenthandler, metadata, new ParseContext()); // Lucene Dokumenten-Objekt erstellen und geparsten Tika-Inhalt speichern Document doc = new Document(); Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); Field filename = new StringField("filename", file.getName(), Field.Store.YES); doc.add(filename); doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); doc.add(new TextField("contents", contenthandler.toString(), Field.Store.NO)); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { //neuer Index, wenn neues Dokument System.out.println("adding " + file); writer.addDocument(doc); } else { long size = file.length() / 1024; list.add(file + ", " + size + "kb"); //Index updaten, wenn lteres Index-Dokument schon vorhanden System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:com.lin.studytest.lucene.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try {//from www .j av a 2 s.c o m InputStream stream = Files.newInputStream(file); // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", lastModified, Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } finally { } }
From source file:com.lucene.index.test.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);// w w w . j a v a 2 s . com // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", lastModified, Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): //System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.m3958.apps.pcms.lucene.IndexFiles.java
License:Apache License
static void indexCusValues(IndexWriter writer) throws IOException { // make a new, empty document Document doc = new Document(); Field custidField = new StringField("custid", "fhsites-6376743", Field.Store.YES); doc.add(custidField);//w w w . j a v a 2s .c om Field emailField = new StringField("email", "jianglibo@gmail.com", Field.Store.YES); doc.add(emailField); Field idField = new StringField("_id", "iooweokodkkkkosdodosdoods", Field.Store.YES); doc.add(idField); Field pathField = new StringField("_sn", "2013-05-0001", Field.Store.YES); doc.add(pathField); Calendar c = Calendar.getInstance(); doc.add(new LongField("_createdAt", c.getTimeInMillis(), Field.Store.NO)); doc.add(new LongField("_updatedAt", c.getTimeInMillis(), Field.Store.NO)); doc.add(new TextField("contents", new StringReader("hello cusvalue"))); writer.updateDocument(new Term("_id", "iooweokodkkkkosdodosdoods"), doc); writer.deleteDocuments(new Term("_id", "iooweokodkkkkosdodosdoods")); }
From source file:com.main.Indexer.java
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException, SAXException, TikaException { try (InputStream stream = Files.newInputStream(file)) { BodyContentHandler contenthandler = new BodyContentHandler(); Metadata metadata = new Metadata(); metadata.set(Metadata.RESOURCE_NAME_KEY, file.getFileName().toString()); Parser parser = new AutoDetectParser(); parser.parse(stream, contenthandler, metadata, new ParseContext()); String[] metadataNames = metadata.names(); String fileName = file.getFileName().toString(); //Create lucene Document Document doc = new Document(); for (String key : metadataNames) { //String name = key.toLowerCase(); String value = metadata.get(key); if (StringUtils.isBlank(value)) { continue; }/* w ww .ja va 2 s .com*/ if ("keywords".equalsIgnoreCase(key)) { for (String keyword : value.split(",?(\\s+)")) { doc.add(new StringField("name", keyword, Field.Store.YES)); } } else if ("title".equalsIgnoreCase(key)) { doc.add(new StringField("name", value, Field.Store.YES)); } else { doc.add(new StringField("name", fileName, Field.Store.YES)); } } doc.add(new StringField("path", file.toString(), Field.Store.YES)); doc.add(new LongPoint("modified", lastModified)); doc.add(new TextField("contents", contenthandler.toString(), Field.Store.YES)); writer.updateDocument(new Term("path", file.toString()), doc); } }
From source file:com.mycompany.lucenedemo.IndexFiles.java
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);//from w w w. j ava2s .c o m // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }