List of usage examples for org.apache.lucene.index IndexWriter getConfig
public LiveIndexWriterConfig getConfig()
From source file:index.IndexOmimtsv.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);//from w ww. j ava 2 s . c o m // Add the last modified date of the file a field named "modified". // Use a LongPoint that is indexed (i.e. efficiently filterable with // PointRangeQuery). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongPoint("20110227030432", lastModified)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. InputStreamReader ipsr = new InputStreamReader(stream); BufferedReader br = new BufferedReader(ipsr); String line = br.readLine(); int cpt = 0; while ((line = br.readLine()) != null) { String[] tokens = line.trim().split("\t"); if (tokens.length > 6) { String id = tokens[0].split("/")[tokens[0].split("/").length - 1].trim(); if (id.matches("^[0-9]*")) { doc = new Document(); cpt++; doc.add(new TextField("ID", id, Field.Store.NO)); if (!tokens[5].trim().matches("^C[0-9].*")) { for (String token : tokens) { if (token.trim().matches("^C[0-9].*")) { doc.add(new StoredField("CUI", token.trim())); break; } } if (doc.getFields().size() != 2) doc.add(new StoredField("CUI", "")); } else doc.add(new StoredField("CUI", tokens[5].trim())); doc.add(new StoredField("Label", tokens[1].trim())); writer.addDocument(doc); } } } System.out.println("Nombre d'lments : " + cpt); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument( new Term("F:/Ecole(Telecom)/cours telecom/Projet_GMD/bases/chemical.sources.v5.0.tsv", file.toString()), doc); } } }
From source file:index.IndexWikiAbstract.java
public static void main(String[] args) throws FileNotFoundException, IOException { //index/*from w w w . ja v a 2 s . c o m*/ String indexPath = "/Users/smita/Documents/ES/index/abstract/"; String docsPath = null; boolean create = true; String path = "/Users/smita/Documents/data/dbpedia/long_abstracts_en.nt"; Date start = new Date(); // System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(Paths.get(indexPath)); Analyzer analyzer = new StandardAnalyzer(); IndexWriterConfig iwc = new IndexWriterConfig(analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE); } else { iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND); } // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); FileInputStream inputStream = null; Scanner sc = null; try { int linecount = 0; inputStream = new FileInputStream(path); sc = new Scanner(inputStream, "UTF-8"); String hash = sc.nextLine(); while (sc.hasNextLine()) { linecount++; String line = sc.nextLine(); try { String title = line.split(" ")[0]; String prop = line.split(" ")[1]; String abs = line.substring(title.length() + prop.length() + 2); //System.out.println(abs); abs = abs.substring(0, abs.length() - 6); title = title.replaceAll("_", " "); title = title.substring(29, title.length() - 1); //System.out.println(abs); //index line as a doc Document doc = new Document(); doc.add(new TextField("title", title, Field.Store.YES)); doc.add(new TextField("abs", abs, Field.Store.YES)); if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) { System.out.println("adding " + linecount); writer.addDocument(doc); } else { System.out.println("updating "); //writer.updateDocument(new Term("path", file.toString()), doc); } } catch (Exception e2) { } } // note that Scanner suppresses exceptions if (sc.ioException() != null) { throw sc.ioException(); } } finally { if (inputStream != null) { inputStream.close(); } if (sc != null) { sc.close(); } } writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); }
From source file:indexer.LuceneIndexerAddDocument.java
/** * Indexes a single document with the aid of Apache Tika. * * @param writer Writer to the index where the given file/dir info will be * stored./*from w w w . ja v a2 s .c o m*/ * @param file The file to index, or the directory to recurse into to find * files to index. * @param attrs This is the attributes from the given file gathered from * walking the file tree. * @param global This is for reference to the global class variables and * methods. * @throws IOException */ static void indexDoc(IndexWriter writer, Path file, BasicFileAttributes attrs, Global global) throws IOException { File document = file.toFile(); if (document.renameTo(document)) { try (InputStream stream = Files.newInputStream(file)) { //make a new, empty document Document doc = new Document(); //Add the path of the file as a field named "path". Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField); //Add the last modified date of the file as a field named "modified". doc.add(new LongField("modified", attrs.lastModifiedTime().toMillis(), Field.Store.YES)); //Add the created date of the file as a field named "created". doc.add(new LongField("created", attrs.creationTime().toMillis(), Field.Store.YES)); //Add the document File Name doc.add(new StringField("filename", file.getFileName().toString(), Field.Store.YES)); //Add the contents of the file as a field named "vcontents". //Parser type for Tika BodyContentHandler handler = new BodyContentHandler(global.WRITE_LIMIT); Metadata metadata = new Metadata(); FileInputStream inputstream = new FileInputStream(new File(file.toString())); ParseContext pcontext = new ParseContext(); //New Field Type FieldType bodyType = new FieldType(); bodyType.setStored(true); bodyType.setTokenized(true); // for Highlighter, FastvectorHighlighter bodyType.setStoreTermVectors(true); bodyType.setStoreTermVectorPositions(true); bodyType.setStoreTermVectorOffsets(true); // for PostingsHighlighter bodyType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS); /** * Determine the document type and the proper parser for the * document After the document is determined we grab the content * and position offset for highlighting. */ try { if (file.toString().endsWith(".pdf")) { PDFParser pdfparser = new PDFParser(); pdfparser.parse(inputstream, handler, metadata, pcontext); doc.add(new Field("vcontent", handler.toString(), bodyType)); } else if (file.toString().endsWith(".docx") || file.toString().endsWith(".pptx") || file.toString().endsWith(".xlsx") || file.toString().endsWith(".docm") || file.toString().endsWith(".pptm") || file.toString().endsWith(".xlsm")) { OOXMLParser msofficeparser = new OOXMLParser(); msofficeparser.parse(inputstream, handler, metadata, pcontext); doc.add(new Field("vcontent", handler.toString(), bodyType)); } else if (file.toString().endsWith(".doc") || file.toString().endsWith(".ppt") || file.toString().endsWith(".xlx")) { OfficeParser msofficeparser = new OfficeParser(); msofficeparser.parse(inputstream, handler, metadata, pcontext); doc.add(new Field("vcontent", handler.toString(), bodyType)); } else if (file.toString().endsWith(".odt") || file.toString().endsWith(".odp") || file.toString().endsWith(".ods")) { OpenDocumentParser openofficeparser = new OpenDocumentParser(); openofficeparser.parse(inputstream, handler, metadata, pcontext); doc.add(new Field("vcontent", handler.toString(), bodyType)); } else if (file.toString().endsWith(".epub")) { EpubParser epubParser = new EpubParser(); epubParser.parse(inputstream, handler, metadata, pcontext); doc.add(new Field("vcontent", handler.toString(), bodyType)); } else if (file.toString().endsWith(".xml")) { XMLParser XMLparser = new XMLParser(); XMLparser.parse(inputstream, handler, metadata, pcontext); doc.add(new Field("vcontent", handler.toString(), bodyType)); } else if (file.toString().endsWith(".htm") || file.toString().endsWith(".html") || file.toString().endsWith(".mhtml")) { HtmlParser HTMLparser = new HtmlParser(); HTMLparser.parse(inputstream, handler, metadata, pcontext); doc.add(new Field("vcontent", handler.toString(), bodyType)); } else if (file.toString().endsWith(".rtf")) { RTFParser RTFparser = new RTFParser(); RTFparser.parse(inputstream, handler, metadata, pcontext); doc.add(new Field("vcontent", handler.toString(), bodyType)); } else if (file.toString().endsWith(".txt")) { TXTParser TXTparser = new TXTParser(); TXTparser.parse(inputstream, handler, metadata, pcontext); doc.add(new Field("vcontent", handler.toString(), bodyType)); } else { BufferedReader buffedRead = new BufferedReader( new InputStreamReader(stream, StandardCharsets.UTF_8)); doc.add(new TextField("vcontent", buffedRead)); } } catch (SAXException | TikaException ex) { log.fatal("Document Parsing Exception"); } if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): writer.addDocument(doc); System.out.println("adding " + file); } else { /** * Existing index (an old copy of this document may have * been indexed) so we use updateDocument instead to replace * the old one matching the exact path, if present: */ writer.updateDocument(new Term("path", file.toString()), doc); System.out.println("updating " + file); } } } else { System.out.println("LOCKED: " + file); } }
From source file:InformationRetrieval.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * /* w w w . ja v a 2 s .c o m*/ * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException If there is a low-level I/O error */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); BufferedReader br = new BufferedReader(new FileReader(file)); String L1 = br.readLine(); String L2 = br.readLine(); Field Title = new TextField("Title", L2, Field.Store.YES); Title.setBoost(90F); doc.add(Title); String s1 = null, s2 = null, s3 = null; while (br.readLine() != null) { s1 = br.readLine(); break; } String snip = s1 + " " + s2 + " " + s3; Field Snippet = new StringField("Snippet", snip, Field.Store.YES); doc.add(Snippet); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:io.datalayer.lucene.index.IndexerMain.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is * given, recurses over files and directories found under the given * directory.//www.ja va2 s.c o m * * NOTE: This method indexes one document per input file. This is slow. For * good throughput, put multiple documents into your input file(s). An * example of this is in the benchmark module, which can create "line doc" * files, one document per line, using the <a href= * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer * Writer to the index where the given file/dir info will be * stored * @param file * The file to index, or the directory to recurse into to find * files to index * @throws IOException */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this // exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't // tokenize // the field into separate words and don't index term // frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named // "modified". // Use a LongField that is indexed (i.e. efficiently // filterable with // NumericRangeFilter). This indexes to milli-second // resolution, which // is often too fine. You could instead create a number // based on // year/month/day/hour/minutes/seconds, down the resolution // you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". // Specify a Reader, // so that the text of the file is tokenized and indexed, // but not stored. // Note that FileReader expects the file to be in UTF-8 // encoding. // If that's not the case searching for special characters // will fail. /* * doc.add(new TextField("contents", new BufferedReader(new * InputStreamReader(fis, "UTF-8")), Field.Store.NO)); */ if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old // document can be there): LOGGER.info("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have // been indexed) so // we use updateDocument instead to replace the old one // matching the exact // path, if present: LOGGER.info("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:l3.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, recurses over files and directories * found under the given directory./*from www . j a v a2 s.c om*/ * * NOTE: This method indexes one document per input file. This is slow. For good throughput, put multiple documents * into your input file(s). An example of this is in the benchmark module, which can create "line doc" files, one * document per line, using the <a * href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer * Writer to the index where the given file/dir info will be stored * @param file * The file to index, or the directory to recurse into to find files to index * @throws IOException * If there is a low-level I/O error */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:lia.recent.IndexFiles.java
License:Apache License
/** * Indexes the given file using the given writer, or if a directory is given, * recurses over files and directories found under the given directory. * //from ww w.ja va 2 s. c o m * NOTE: This method indexes one document per input file. This is slow. For good * throughput, put multiple documents into your input file(s). An example of this is * in the benchmark module, which can create "line doc" files, one document per line, * using the * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html" * >WriteLineDocTask</a>. * * @param writer Writer to the index where the given file/dir info will be stored * @param file The file to index, or the directory to recurse into to find files to index * @throws IOException If there is a low-level I/O error */ static void indexDocs(IndexWriter writer, File file) throws IOException { // do not try to index files that cannot be read if (file.canRead()) { if (file.isDirectory()) { String[] files = file.list(); // an IO error could occur if (files != null) { for (int i = 0; i < files.length; i++) { indexDocs(writer, new File(file, files[i])); } } } else { FileInputStream fis; try { fis = new FileInputStream(file); System.out.println("fis " + file); } catch (FileNotFoundException fnfe) { // at least on windows, some temporary files raise this exception with an "access denied" message // checking if the file can be read doesn't help return; } try { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.getPath(), Field.Store.YES); doc.add(pathField); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", file.lastModified(), Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8")))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.getPath()), doc); } } finally { fis.close(); } } } }
From source file:lucene.demo.search.FileIndexer.java
License:Apache License
private void addDoc(IndexWriter writer, File file, Document doc) throws IOException { if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) { try {/*from ww w . j a va 2s . c o m*/ writer.addDocument(doc); } catch (Exception e) { // } } else { writer.updateDocument(new Term("path", file.getPath()), doc); } }
From source file:mm.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); doc.add(pathField);/*from w w w .j av a 2 s . co m*/ // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongPoint("modified", lastModified)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): System.out.println("adding " + file); writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }
From source file:model.IndexFiles.java
License:Apache License
/** Indexes a single document */ static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException { HashMap<String, String> mappingPathToTitle = new HashMap<String, String>(); try (InputStream stream = Files.newInputStream(file)) { // make a new, empty document Document doc = new Document(); // strip tags in here as they're unrequired //doc.add(new TextField("fullContents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8)))); // Add the path of the file as a field named "path". Use a // field that is indexed (i.e. searchable), but don't tokenize // the field into separate words and don't index term frequency // or positional information: Field pathField = new StringField("path", file.toString(), Field.Store.YES); //System.out.println(pathField); String path = file.toString(); String pathContents = readFileToString(path, path, mappingPathToTitle); Field pathContents1 = new TextField("contents", pathContents, Field.Store.YES); if (!mappingPathToTitle.isEmpty()) { Field title = new TextField("title", mappingPathToTitle.get(path), Field.Store.YES); doc.add(title);//from w w w .j a v a 2s.co m } doc.add(pathField); doc.add(pathContents1); // Add the last modified date of the file a field named "modified". // Use a LongField that is indexed (i.e. efficiently filterable with // NumericRangeFilter). This indexes to milli-second resolution, which // is often too fine. You could instead create a number based on // year/month/day/hour/minutes/seconds, down the resolution you require. // For example the long value 2011021714 would mean // February 17, 2011, 2-3 PM. doc.add(new LongField("modified", lastModified, Field.Store.NO)); // Add the contents of the file to a field named "contents". Specify a Reader, // so that the text of the file is tokenized and indexed, but not stored. // Note that FileReader expects the file to be in UTF-8 encoding. // If that's not the case searching for special characters will fail. if (writer.getConfig().getOpenMode() == OpenMode.CREATE) { // New index, so we just add the document (no old document can be there): writer.addDocument(doc); } else { // Existing index (an old copy of this document may have been indexed) so // we use updateDocument instead to replace the old one matching the exact // path, if present: System.out.println("updating " + file); writer.updateDocument(new Term("path", file.toString()), doc); } } }