List of usage examples for org.apache.lucene.index IndexWriter getConfig
public LiveIndexWriterConfig getConfig()
From source file:com.icdd.lucene.CreateIndex.java
License:Apache License
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { // filter non-xml files if (filter.accept(file.toFile())) { System.out.println("num: " + num); num++;//w w w.j a va 2 s . c o m if (num < endset && num >= offset) { try (InputStream stream = Files.newInputStream(file)) { // make a new,empty document Document doc = new Document(); Field pathField = new StringField("path", file.toString(), Field.Store.YES); String filename = file.getFileName().toString(); int post = filename.indexOf('_'); if (post > 0) { filename = filename.substring(post + 1, filename.length() - 4); } doc.add(pathField); doc.add(new StringField("title", filename, Field.Store.YES)); doc.add(new SortedNumericDocValuesField("modified", lastModified)); doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old // document // can // be there): logger.info("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have // been // indexed) so // path, if present: logger.info("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } } } }
From source file:com.javapr.plaintextindex.search.Index.java
License:Apache License
public static void indexDocs(IndexWriter writer, File file) throws IOException, SAXException, TikaException { // nur lesbare Dateien verwenden if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); }/*from w w w .j a v a 2 s . c o m*/ } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { return; } try { //Word Dokumente mit Tika parsen ContentHandler contenthandler = new BodyContentHandler(); Metadata metadata = new Metadata(); metadata.set(Metadata.RESOURCE_NAME_KEY, file.getName()); Parser parser = new AutoDetectParser(); parser.parse(fis, contenthandler, metadata, new ParseContext()); // Lucene Dokumenten-Objekt erstellen und geparsten Tika-Inhalt speichern Document doc = new Document(); Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); Field filename = new StringField("filename", file.getName(), Field.Store.YES); doc.add(filename); doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); doc.add(new TextField("contents", contenthandler.toString(), Field.Store.NO)); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { //neuer Index, wenn neues Dokument System.out.println("adding " + file); writer.addDocument(doc); } else { long size = file.length() / 1024; list.add(file + ", " + size + "kb"); //Index updaten, wenn lteres Index-Dokument schon vorhanden System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:com.lin.studytest.lucene.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try {//from w ww . j ava2 s . c om InputStream stream = Files.newInputStream(file); // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", lastModified, Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } finally { } }
From source file:com.lucene.index.test.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);/*from w w w. j av a2 s . c om*/ // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", lastModified, Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): //System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.mathworks.xzheng.admin.SearcherManager.java
License:Apache License
public SearcherManager(IndexWriter writer) throws IOException { //2 this.writer = writer; currentSearcher = new IndexSearcher(DirectoryReader.open(writer.getDirectory())); //C warm(currentSearcher);// w w w .j av a 2 s . c o m writer.getConfig().setMergedSegmentWarmer( // 3 new IndexWriter.IndexReaderWarmer() { // 3 public void warm(AtomicReader reader) throws IOException { // 3 SearcherManager.this.warm(new IndexSearcher(reader)); // 3 } // 3 }); // 3 }
From source file:com.mycompany.lucenedemo.IndexFiles.java
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);//from ww w . jav a2 s . com // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.mycompany.restlet.search.sample.indexer.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { InputStream stream = Files.newInputStream(file); // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);// ww w.j a v a 2 s .com // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", lastModified, Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } }
From source file:com.nearinfinity.blur.manager.writer.nrt.SearcherManager.java
License:Apache License
/** * Creates and returns a new SearcherManager from the given * {@link IndexWriter}.//from ww w .j a v a2 s . c o m * * @param writer * the IndexWriter to open the IndexReader from. * @param applyAllDeletes * If <code>true</code>, all buffered deletes will be applied (made * visible) in the {@link IndexSearcher} / {@link IndexReader}. If * <code>false</code>, the deletes may or may not be applied, but * remain buffered (in IndexWriter) so that they will be applied in * the future. Applying deletes can be costly, so if your app can * tolerate deleted documents being returned you might gain some * performance by passing <code>false</code>. See * {@link IndexReader#openIfChanged(IndexReader, IndexWriter, boolean)} * . * @param warmer * An optional {@link SearcherWarmer}. Pass <code>null</code> if you * don't require the searcher to warmed before going live. If this is * <code>non-null</code> then a merged segment warmer is installed on * the provided IndexWriter's config. * @param es * An optional {@link ExecutorService} so different segments can be * searched concurrently (see * {@link IndexSearcher#IndexSearcher(IndexReader,ExecutorService)}. * Pass <code>null</code> to search segments sequentially. * * @throws IOException */ public SearcherManager(IndexWriter writer, boolean applyAllDeletes, final SearcherWarmer warmer, final ExecutorService es) throws IOException { this.es = es; this.warmer = warmer; currentSearcher = new IndexSearcher(IndexReader.open(writer, applyAllDeletes)); if (warmer != null) { writer.getConfig().setMergedSegmentWarmer(new IndexWriter.IndexReaderWarmer() { @Override public void warm(IndexReader reader) throws IOException { warmer.warm(new IndexSearcher(reader, es)); } }); } }
From source file:com.nero.model.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * //from ww w .ja va 2s .c o m * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); pathField.setIndexOptions(IndexOptions.DOCS_ONLY); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a NumericField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. NumericField modifiedField = new NumericField("modified"); modifiedField.setLongValue(file.lastModified()); doc.add(modifiedField); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:com.paladin.common.LuceneHelper.java
License:Apache License
/** * ?/*from ww w .j av a 2s .co m*/ * //TODO:??? * * @param writer * @param table */ private static void indexTable(IndexWriter writer, String table) throws IOException { String sql = "SELECT ID, TITLE, CONTENT, TAG, CREATE_DATE FROM " + table.toUpperCase(); if (table.equalsIgnoreCase("motto")) sql = "SELECT ID, CONTENT, TAG, CREATE_DATE FROM " + table.toUpperCase(); List<Map<String, Object>> blogs = QueryHelper.queryList(sql); for (Map<String, Object> blog : blogs) { Document doc = new Document(); Field id_field = new Field("id", blog.get("ID").toString(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); // ? StringBuilder builder = new StringBuilder(); if (table.equalsIgnoreCase("motto")) builder.append(blog.get("CONTENT")); else builder.append(blog.get("TITLE")); builder.append(Constants.LUCENE_FIELD_SEP); builder.append(blog.get("CONTENT")); builder.append(Constants.LUCENE_FIELD_SEP); builder.append(blog.get("TAG")); Field t_c_t_field = new Field("title_content_tag", builder.toString(), Field.Store.YES, Field.Index.ANALYZED); doc.add(id_field); doc.add(t_c_t_field); if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) writer.addDocument(doc); else// id?? writer.updateDocument(new Term("id", blog.get("ID").toString()), doc); } }