List of usage examples for org.apache.lucene.index IndexWriter updateDocument
private long updateDocument(final DocumentsWriterDeleteQueue.Node<?> delNode, Iterable<? extends IndexableField> doc) throws IOException
From source file:org.abstracthorizon.proximity.indexer.LuceneIndexer.java
License:Apache License
/** * Adds the item to index.//from w w w . j a v a 2s.c om * * @param writer the writer * @param UID the UID * @param item the item * @throws IOException Signals that an I/O exception has occurred. */ protected void addItemToIndex(IndexWriter writer, String UID, ItemProperties item) throws IOException { Document ipDoc = itemProperties2Document(item); ipDoc.add(new Field("UID", UID, Field.Store.YES, Field.Index.UN_TOKENIZED)); writer.updateDocument(new Term("UID", UID), ipDoc); dirtyItems++; if (dirtyItems > dirtyItemTreshold) { logger.debug("Optimizing Lucene index as dirtyItemTreshold is exceeded."); writer.optimize(); dirtyItems = 0; } }
From source file:org.apache.maven.index.context.DefaultIndexingContext.java
License:Apache License
private void storeDescriptor() throws IOException { Document hdr = new Document(); hdr.add(new Field(FLD_DESCRIPTOR, FLD_DESCRIPTOR_CONTENTS, Field.Store.YES, Field.Index.NOT_ANALYZED)); hdr.add(new Field(FLD_IDXINFO, VERSION + ArtifactInfo.FS + getRepositoryId(), Field.Store.YES, Field.Index.NO));//ww w . j a va 2 s .com IndexWriter w = getIndexWriter(); w.updateDocument(DESCRIPTOR_TERM, hdr); w.commit(); }
From source file:org.apache.maven.index.context.DefaultIndexingContext.java
License:Apache License
protected void setGroups(Collection<String> groups, String groupField, String groupFieldValue, String groupListField) throws IOException, CorruptIndexException { final IndexWriter w = getIndexWriter(); w.updateDocument(new Term(groupField, groupFieldValue), createGroupsDocument(groups, groupField, groupFieldValue, groupListField)); }
From source file:org.apache.maven.index.DefaultIndexerEngine.java
License:Apache License
public void update(IndexingContext context, ArtifactContext ac) throws IOException { if (ac != null && ac.getGav() != null) { Document d = ac.createDocument(context); if (d != null) { Document old = getOldDocument(context, ac); if (!equals(d, old)) { IndexWriter w = context.getIndexWriter(); w.updateDocument(new Term(ArtifactInfo.UINFO, ac.getArtifactInfo().getUinfo()), d); updateGroups(context, ac); context.updateTimestamp(); }//from ww w . j a va 2 s . c o m } } }
From source file:org.apache.ofbiz.content.search.DocumentIndexer.java
License:Apache License
@Override public void run() { IndexWriter indexWriter = null; int uncommittedDocs = 0; while (true) { LuceneDocument ofbizDocument;// ww w . j a v a 2s . co m try { // Execution will pause here until the queue receives a LuceneDocument for indexing ofbizDocument = documentIndexQueue.take(); } catch (InterruptedException e) { Debug.logError(e, module); if (indexWriter != null) { try { indexWriter.close(); indexWriter = null; } catch (IOException ioe) { Debug.logError(ioe, module); } } break; } Term documentIdentifier = ofbizDocument.getDocumentIdentifier(); Document document = ofbizDocument.prepareDocument(this.delegator); if (indexWriter == null) { try { StandardAnalyzer analyzer = new StandardAnalyzer(); analyzer.setVersion(SearchWorker.getLuceneVersion()); indexWriter = new IndexWriter(this.indexDirectory, new IndexWriterConfig(analyzer)); } catch (CorruptIndexException e) { Debug.logError("Corrupted lucene index: " + e.getMessage(), module); break; } catch (LockObtainFailedException e) { Debug.logError("Could not obtain Lock on lucene index " + e.getMessage(), module); // TODO: put the thread to sleep waiting for the locked to be released break; } catch (IOException e) { Debug.logError(e.getMessage(), module); break; } } try { if (document == null) { indexWriter.deleteDocuments(documentIdentifier); if (Debug.infoOn()) Debug.logInfo(getName() + ": deleted Lucene document: " + ofbizDocument, module); } else { indexWriter.updateDocument(documentIdentifier, document); if (Debug.infoOn()) Debug.logInfo(getName() + ": indexed Lucene document: " + ofbizDocument, module); } } catch (Exception e) { Debug.logError(e, getName() + ": error processing Lucene document: " + ofbizDocument, module); if (documentIndexQueue.peek() == null) { try { indexWriter.close(); indexWriter = null; } catch (IOException ioe) { Debug.logError(ioe, module); } } continue; } uncommittedDocs++; if (uncommittedDocs == UNCOMMITTED_DOC_LIMIT || documentIndexQueue.peek() == null) { // limit reached or queue empty, time to commit try { indexWriter.commit(); } catch (IOException e) { Debug.logError(e, module); } uncommittedDocs = 0; } if (documentIndexQueue.peek() == null) { try { indexWriter.close(); indexWriter = null; } catch (IOException e) { Debug.logError(e, module); } } } }
From source file:org.apache.pdfbox.examples.lucene.IndexPDFFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, recurses over files and directories * found under the given directory.//from w w w. j a v a 2 s. co m * * NOTE: This method indexes one document per input file. This is slow. For good throughput, put multiple documents * into your input file(s). An example of this is in the benchmark module, which can create "line doc" files, one * document per line, using the <a * href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException If there is a low-level I/O error */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (String fileName : files) { indexDocs(writer, new File(file, fileName)); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { String path = file.getName().toUpperCase(); Document doc = null; if (path.toLowerCase().endsWith(".pdf")) { System.out.println("Indexing PDF document: " + file); doc = LucenePDFDocument.getDocument(file); } else { System.out.println("Skipping " + file); return; } if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("uid", LucenePDFDocument.createUID(file)), doc); } } finally { fis.close(); } } } }
From source file:org.apereo.portal.index.PortalSearchIndexer.java
License:Apache License
private void indexPortlet(IPortletDefinition portlet, IndexWriter indexWriter) { final String fname = portlet.getFName(); // Unique identifier try {/* w w w . j a va 2 s . c om*/ final Document doc = new Document(); doc.add(new TextField(SearchField.FNAME.getValue(), fname, Field.Store.YES)); doc.add(new TextField(SearchField.NAME.getValue(), portlet.getName(), Field.Store.YES)); doc.add(new TextField(SearchField.TITLE.getValue(), portlet.getTitle(), Field.Store.YES)); final String description = portlet.getDescription(); if (StringUtils.isNotBlank(description)) { doc.add(new TextField(SearchField.DESCRIPTION.getValue(), description, Field.Store.YES)); } final IPortletDefinitionParameter keywords = portlet.getParameter("keywords"); if (keywords != null && StringUtils.isNotBlank(keywords.getValue())) { doc.add(new TextField(SearchField.KEYWORDS.getValue(), keywords.getValue(), Field.Store.YES)); } final String content = extractContent(portlet); if (StringUtils.isNotBlank(content)) { doc.add(new TextField(SearchField.CONTENT.getValue(), content, Field.Store.YES)); } indexWriter.updateDocument(new Term("fname", fname), doc); } catch (IOException ioe) { logger.warn("Unable to index portlet with fname='{}'", fname); return; } logger.debug("Indexed portlet '{}'", fname); }
From source file:org.archive.index.AsAReference.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * /*from w ww . ja va 2 s . c om*/ * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException If there is a low-level I/O error */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 4 would mean // February 17, 1, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:org.bidtime.lucene.utils.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is * given, recurses over files and directories found under the given * directory./*from ww w . j ava2 s . com*/ * * NOTE: This method indexes one document per input file. This is slow. For * good throughput, put multiple documents into your input file(s). An * example of this is in the benchmark module, which can create "line doc" * files, one document per line, using the <a href= * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer * Writer to the index where the given file/dir info will be * stored * @param file * The file to index, or the directory to recurse into to find * files to index * @throws IOException * If there is a low-level I/O error */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this // exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't // tokenize // the field into separate words and don't index term // frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.NO); doc.add(pathField); // Add the last modified date of the file a field named // "modified". // Use a LongField that is indexed (i.e. efficiently // filterable with // NumericRangeFilter). This indexes to milli-second // resolution, which // is often too fine. You could instead create a number // based on // year/month/day/hour/minutes/seconds, down the resolution // you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". // Specify a Reader, // so that the text of the file is tokenized and indexed, // but not stored. // Note that FileReader expects the file to be in UTF-8 // encoding. // If that's not the case searching for special characters // will fail. // doc.add(new TextField("contents", new BufferedReader( // new InputStreamReader(fis, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old // document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have // been indexed) so // we use updateDocument instead to replace the old one // matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:org.Demo.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);/*from w w w .j a v a2 s . c o m*/ BufferedReader buff = new BufferedReader( new InputStreamReader(Files.newInputStream(file), StandardCharsets.UTF_8)); String title = buff.readLine(); buff.close(); Field titleField = new StringField("title", title, Field.Store.YES); doc.add(titleField); // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }