List of usage examples for org.apache.lucene.index IndexWriter getConfig
public LiveIndexWriterConfig getConfig()
From source file:com.czw.search.lucene.example.IndexFiles.java
License:Apache License
/** * Indexes a single document//from ww w . ja va 2 s. c om */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.eden.lucene.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * /*from ww w .j a v a 2 s. co m*/ * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException If there is a low-level I/O error */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 4 would mean // February 17, 1, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:com.ekinoks.lucene.introduction.demos.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is * given, recurses over files and directories found under the given * directory./* w w w . j a v a2 s . c o m*/ * * NOTE: This method indexes one document per input file. This is slow. For * good throughput, put multiple documents into your input file(s). An * example of this is in the benchmark module, which can create "line doc" * files, one document per line, using the <a href= * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer * Writer to the index where the given file/dir info will be * stored * @param file * The file to index, or the directory to recurse into to find * files to index * @throws IOException */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this // exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't // tokenize // the field into separate words and don't index term // frequency // or positional information: Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); pathField.setOmitTermFreqAndPositions(true); doc.add(pathField); // Add the last modified date of the file a field named // "modified". // Use a NumericField that is indexed (i.e. efficiently // filterable with // NumericRangeFilter). This indexes to milli-second // resolution, which // is often too fine. You could instead create a number // based on // year/month/day/hour/minutes/seconds, down the resolution // you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. NumericField modifiedField = new NumericField("modified"); modifiedField.setLongValue(file.lastModified()); doc.add(modifiedField); // Add the contents of the file to a field named "contents". // Specify a Reader, // so that the text of the file is tokenized and indexed, // but not stored. // Note that FileReader expects the file to be in UTF-8 // encoding. // If that's not the case searching for special characters // will fail. doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old // document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have // been indexed) so // we use updateDocument instead to replace the old one // matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:com.example.search.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * /*from w ww .j ava 2s.c om*/ * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { PageProcess pageProcessor; FileInputStream fis; try { fis = new FileInputStream(file); pageProcessor = new PageProcess(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } WebInfo webInfo; try { // // make a new, empty document while ((webInfo = pageProcessor.next()) != null) { Document doc = new Document(); //process page // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: // Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); // pathField.setIndexOptions(IndexOptions.DOCS_ONLY); // doc.add(pathField); Field urlField = new Field("url", webInfo.url, Field.Store.YES, Field.Index.NO); doc.add(urlField); Field publishidField = new Field("publishid", webInfo.publishid, Field.Store.YES, Field.Index.NO); doc.add(publishidField); Field subjectidField = new Field("subjectid", webInfo.subjectid, Field.Store.YES, Field.Index.NO); doc.add(subjectidField); Field titleField = new Field("title", webInfo.title, Field.Store.YES, Field.Index.NO); doc.add(titleField); doc.add(new Field("keywords", webInfo.keywords, Field.Store.YES, Field.Index.NO)); doc.add(new Field("description", webInfo.description, Field.Store.YES, Field.Index.NO)); doc.add(new Field("content", webInfo.content, Field.Store.YES, Field.Index.ANALYZED)); // Add the last modified date of the file a field named "modified". // Use a NumericField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. // NumericField modifiedField = new NumericField("modified"); // modifiedField.setLongValue(file.lastModified()); // doc.add(modifiedField); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. // doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {// // New index, so we just add the document (no old document can be there): // System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: // System.out.println("updating " + file); writer.updateDocument(new Term("url", webInfo.url), doc); } } //end while } finally { System.out.println("adding " + file); fis.close(); } } } }
From source file:com.flycode.CRIBSearch.SearchEngine.Demo.IndexFiles.java
License:Apache License
/** * Indexes a single document/*from w ww .j a v a2 s . c om*/ */ private static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.fun.sb.demo.lucene.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);/*from w w w.ja va 2 s. c o m*/ // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. // doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.github.tenorviol.gitsearch.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the//from ww w . ja v a2 s . c o m * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException */ static void indexDocs(IndexWriter writer, File file) throws IOException { // TODO: make these exclusions configurable String fileName = file.getName(); if (fileName.charAt(0) == '.') { return; } int dotLoc = fileName.lastIndexOf('.'); String extension = fileName.substring(dotLoc + 1); // known binary extensions if (extension.equals("jpg") || extension.equals("png") || extension.equals("gif") || extension.equals("pdf") || extension.equals("fla") || extension.equals("flv") || extension.equals("swf") || extension.equals("swz")) { return; } // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new Field("path", file.getPath(), Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS); pathField.setIndexOptions(IndexOptions.DOCS_ONLY); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a NumericField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. NumericField modifiedField = new NumericField("modified"); modifiedField.setLongValue(file.lastModified()); doc.add(modifiedField); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new Field("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:com.gmail.mosoft521.luceneDemo.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * <p/>// www.j av a2 s.c om * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException If there is a low-level I/O error */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:com.heejong.lucene.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);// www . ja va 2 s. c o m // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 4 would mean // February 17, 1, 2-3 PM. doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:com.icdd.lucence.IndexFiles.java
License:Apache License
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new,empty document Document doc = new Document(); Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);/*from ww w . j a v a 2s. c o m*/ doc.add(new SortedNumericDocValuesField("modified", lastModified)); doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can // be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been // indexed) so // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }