Example usage for org.apache.lucene.index IndexWriter getConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter getConfig.

Prototype

public LiveIndexWriterConfig getConfig()

Source Link

Document

Returns a LiveIndexWriterConfig , which can be used to query the IndexWriter current settings, as well as modify "live" ones.

Usage

From source file:index.IndexOmimtsv.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);//from w  ww. j ava 2 s . c  o m

        // Add the last modified date of the file a field named "modified".
        // Use a LongPoint that is indexed (i.e. efficiently filterable with
        // PointRangeQuery).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("20110227030432", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        InputStreamReader ipsr = new InputStreamReader(stream);
        BufferedReader br = new BufferedReader(ipsr);
        String line = br.readLine();
        int cpt = 0;
        while ((line = br.readLine()) != null) {
            String[] tokens = line.trim().split("\t");
            if (tokens.length > 6) {
                String id = tokens[0].split("/")[tokens[0].split("/").length - 1].trim();
                if (id.matches("^[0-9]*")) {
                    doc = new Document();
                    cpt++;
                    doc.add(new TextField("ID", id, Field.Store.NO));
                    if (!tokens[5].trim().matches("^C[0-9].*")) {
                        for (String token : tokens) {
                            if (token.trim().matches("^C[0-9].*")) {
                                doc.add(new StoredField("CUI", token.trim()));
                                break;
                            }
                        }
                        if (doc.getFields().size() != 2)
                            doc.add(new StoredField("CUI", ""));
                    } else
                        doc.add(new StoredField("CUI", tokens[5].trim()));
                    doc.add(new StoredField("Label", tokens[1].trim()));
                    writer.addDocument(doc);
                }
            }
        }
        System.out.println("Nombre d'lments : " + cpt);
        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(
                    new Term("F:/Ecole(Telecom)/cours telecom/Projet_GMD/bases/chemical.sources.v5.0.tsv",
                            file.toString()),
                    doc);
        }
    }
}

From source file:index.IndexWikiAbstract.java

public static void main(String[] args) throws FileNotFoundException, IOException {
    //index/*from w w  w . ja  v  a  2  s  .  c o m*/
    String indexPath = "/Users/smita/Documents/ES/index/abstract/";
    String docsPath = null;
    boolean create = true;

    String path = "/Users/smita/Documents/data/dbpedia/long_abstracts_en.nt";

    Date start = new Date();
    //        System.out.println("Indexing to directory '" + indexPath + "'...");
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    if (create) {
        // Create a new index in the directory, removing any
        // previously indexed documents:
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    } else {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }
    //         // iwc.setRAMBufferSizeMB(256.0);
    IndexWriter writer = new IndexWriter(dir, iwc);

    FileInputStream inputStream = null;
    Scanner sc = null;
    try {
        int linecount = 0;
        inputStream = new FileInputStream(path);
        sc = new Scanner(inputStream, "UTF-8");
        String hash = sc.nextLine();
        while (sc.hasNextLine()) {

            linecount++;
            String line = sc.nextLine();

            try {
                String title = line.split(" ")[0];
                String prop = line.split(" ")[1];
                String abs = line.substring(title.length() + prop.length() + 2);
                //System.out.println(abs);
                abs = abs.substring(0, abs.length() - 6);
                title = title.replaceAll("_", " ");
                title = title.substring(29, title.length() - 1);
                //System.out.println(abs);

                //index line as a doc
                Document doc = new Document();
                doc.add(new TextField("title", title, Field.Store.YES));
                doc.add(new TextField("abs", abs, Field.Store.YES));
                if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
                    System.out.println("adding " + linecount);
                    writer.addDocument(doc);

                } else {
                    System.out.println("updating ");
                    //writer.updateDocument(new Term("path", file.toString()), doc);
                }
            }

            catch (Exception e2) {
            }

        }

        // note that Scanner suppresses exceptions
        if (sc.ioException() != null) {
            throw sc.ioException();
        }
    } finally {
        if (inputStream != null) {
            inputStream.close();
        }
        if (sc != null) {
            sc.close();
        }
    }
    writer.close();

    Date end = new Date();
    System.out.println(end.getTime() - start.getTime() + " total milliseconds");

}

From source file:indexer.LuceneIndexerAddDocument.java

/**
 * Indexes a single document with the aid of Apache Tika.
 *
 * @param writer Writer to the index where the given file/dir info will be
 * stored./*from   w w  w .  ja  v a2 s  .c  o m*/
 * @param file The file to index, or the directory to recurse into to find
 * files to index.
 * @param attrs This is the attributes from the given file gathered from
 * walking the file tree.
 * @param global This is for reference to the global class variables and
 * methods.
 * @throws IOException
 */
static void indexDoc(IndexWriter writer, Path file, BasicFileAttributes attrs, Global global)
        throws IOException {
    File document = file.toFile();
    if (document.renameTo(document)) {
        try (InputStream stream = Files.newInputStream(file)) {

            //make a new, empty document
            Document doc = new Document();

            //Add the path of the file as a field named "path".
            Field pathField = new StringField("path", file.toString(), Field.Store.YES);
            doc.add(pathField);

            //Add the last modified date of the file as a field named "modified".
            doc.add(new LongField("modified", attrs.lastModifiedTime().toMillis(), Field.Store.YES));

            //Add the created date of the file as a field named "created".
            doc.add(new LongField("created", attrs.creationTime().toMillis(), Field.Store.YES));

            //Add the document File Name
            doc.add(new StringField("filename", file.getFileName().toString(), Field.Store.YES));

            //Add the contents of the file as a field named "vcontents". 
            //Parser type for Tika
            BodyContentHandler handler = new BodyContentHandler(global.WRITE_LIMIT);
            Metadata metadata = new Metadata();
            FileInputStream inputstream = new FileInputStream(new File(file.toString()));
            ParseContext pcontext = new ParseContext();

            //New Field Type
            FieldType bodyType = new FieldType();
            bodyType.setStored(true);
            bodyType.setTokenized(true);
            // for Highlighter, FastvectorHighlighter
            bodyType.setStoreTermVectors(true);
            bodyType.setStoreTermVectorPositions(true);
            bodyType.setStoreTermVectorOffsets(true);
            // for PostingsHighlighter
            bodyType.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS_AND_OFFSETS);

            /**
             * Determine the document type and the proper parser for the
             * document After the document is determined we grab the content
             * and position offset for highlighting.
             */
            try {
                if (file.toString().endsWith(".pdf")) {
                    PDFParser pdfparser = new PDFParser();
                    pdfparser.parse(inputstream, handler, metadata, pcontext);
                    doc.add(new Field("vcontent", handler.toString(), bodyType));
                } else if (file.toString().endsWith(".docx") || file.toString().endsWith(".pptx")
                        || file.toString().endsWith(".xlsx") || file.toString().endsWith(".docm")
                        || file.toString().endsWith(".pptm") || file.toString().endsWith(".xlsm")) {
                    OOXMLParser msofficeparser = new OOXMLParser();
                    msofficeparser.parse(inputstream, handler, metadata, pcontext);
                    doc.add(new Field("vcontent", handler.toString(), bodyType));
                } else if (file.toString().endsWith(".doc") || file.toString().endsWith(".ppt")
                        || file.toString().endsWith(".xlx")) {
                    OfficeParser msofficeparser = new OfficeParser();
                    msofficeparser.parse(inputstream, handler, metadata, pcontext);
                    doc.add(new Field("vcontent", handler.toString(), bodyType));
                } else if (file.toString().endsWith(".odt") || file.toString().endsWith(".odp")
                        || file.toString().endsWith(".ods")) {
                    OpenDocumentParser openofficeparser = new OpenDocumentParser();
                    openofficeparser.parse(inputstream, handler, metadata, pcontext);
                    doc.add(new Field("vcontent", handler.toString(), bodyType));
                } else if (file.toString().endsWith(".epub")) {
                    EpubParser epubParser = new EpubParser();
                    epubParser.parse(inputstream, handler, metadata, pcontext);
                    doc.add(new Field("vcontent", handler.toString(), bodyType));
                } else if (file.toString().endsWith(".xml")) {
                    XMLParser XMLparser = new XMLParser();
                    XMLparser.parse(inputstream, handler, metadata, pcontext);
                    doc.add(new Field("vcontent", handler.toString(), bodyType));
                } else if (file.toString().endsWith(".htm") || file.toString().endsWith(".html")
                        || file.toString().endsWith(".mhtml")) {
                    HtmlParser HTMLparser = new HtmlParser();
                    HTMLparser.parse(inputstream, handler, metadata, pcontext);
                    doc.add(new Field("vcontent", handler.toString(), bodyType));
                } else if (file.toString().endsWith(".rtf")) {
                    RTFParser RTFparser = new RTFParser();
                    RTFparser.parse(inputstream, handler, metadata, pcontext);
                    doc.add(new Field("vcontent", handler.toString(), bodyType));
                } else if (file.toString().endsWith(".txt")) {
                    TXTParser TXTparser = new TXTParser();
                    TXTparser.parse(inputstream, handler, metadata, pcontext);
                    doc.add(new Field("vcontent", handler.toString(), bodyType));
                } else {
                    BufferedReader buffedRead = new BufferedReader(
                            new InputStreamReader(stream, StandardCharsets.UTF_8));
                    doc.add(new TextField("vcontent", buffedRead));
                }
            } catch (SAXException | TikaException ex) {
                log.fatal("Document Parsing Exception");
            }

            if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
                // New index, so we just add the document (no old document can be there):
                writer.addDocument(doc);
                System.out.println("adding " + file);
            } else {
                /**
                 * Existing index (an old copy of this document may have
                 * been indexed) so we use updateDocument instead to replace
                 * the old one matching the exact path, if present:
                 */
                writer.updateDocument(new Term("path", file.toString()), doc);
                System.out.println("updating " + file);
            }
        }
    } else {
        System.out.println("LOCKED: " + file);
    }
}

From source file:InformationRetrieval.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * /* w w  w  .  ja v  a 2 s  .c  o  m*/
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);
                BufferedReader br = new BufferedReader(new FileReader(file));
                String L1 = br.readLine();
                String L2 = br.readLine();
                Field Title = new TextField("Title", L2, Field.Store.YES);
                Title.setBoost(90F);
                doc.add(Title);
                String s1 = null, s2 = null, s3 = null;
                while (br.readLine() != null) {
                    s1 = br.readLine();
                    break;
                }
                String snip = s1 + " " + s2 + " " + s3;

                Field Snippet = new StringField("Snippet", snip, Field.Store.YES);
                doc.add(Snippet);

                // Add the last modified date of the file a field named "modified".
                // Use a LongField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:io.datalayer.lucene.index.IndexerMain.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is
 * given, recurses over files and directories found under the given
 * directory.//www.ja va2 s.c  o m
 * 
 * NOTE: This method indexes one document per input file. This is slow. For
 * good throughput, put multiple documents into your input file(s). An
 * example of this is in the benchmark module, which can create "line doc"
 * files, one document per line, using the <a href=
 * "../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 * 
 * @param writer
 *            Writer to the index where the given file/dir info will be
 *            stored
 * @param file
 *            The file to index, or the directory to recurse into to find
 *            files to index
 * @throws IOException
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {

    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this
                // exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path". Use a
                // field that is indexed (i.e. searchable), but don't
                // tokenize
                // the field into separate words and don't index term
                // frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named
                // "modified".
                // Use a LongField that is indexed (i.e. efficiently
                // filterable with
                // NumericRangeFilter). This indexes to milli-second
                // resolution, which
                // is often too fine. You could instead create a number
                // based on
                // year/month/day/hour/minutes/seconds, down the resolution
                // you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".
                // Specify a Reader,
                // so that the text of the file is tokenized and indexed,
                // but not stored.
                // Note that FileReader expects the file to be in UTF-8
                // encoding.
                // If that's not the case searching for special characters
                // will fail.
                /*
                 * doc.add(new TextField("contents", new BufferedReader(new
                 * InputStreamReader(fis, "UTF-8")), Field.Store.NO));
                 */
                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old
                    // document can be there):
                    LOGGER.info("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have
                    // been indexed) so
                    // we use updateDocument instead to replace the old one
                    // matching the exact
                    // path, if present:
                    LOGGER.info("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }

    }

}

From source file:l3.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given, recurses over files and directories
 * found under the given directory./*from   www .  j  a  v  a2 s.c om*/
 * 
 * NOTE: This method indexes one document per input file. This is slow. For good throughput, put multiple documents
 * into your input file(s). An example of this is in the benchmark module, which can create "line doc" files, one
 * document per line, using the <a
 * href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 * 
 * @param writer
 *            Writer to the index where the given file/dir info will be stored
 * @param file
 *            The file to index, or the directory to recurse into to find files to index
 * @throws IOException
 *             If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path". Use a
                // field that is indexed (i.e. searchable), but don't tokenize
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a LongField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter). This indexes to milli-second resolution, which
                // is often too fine. You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents". Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so
                    // we use updateDocument instead to replace the old one matching the exact
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:lia.recent.IndexFiles.java

License:Apache License

/**
 * Indexes the given file using the given writer, or if a directory is given,
 * recurses over files and directories found under the given directory.
 * //from ww  w.ja va 2  s. c o  m
 * NOTE: This method indexes one document per input file.  This is slow.  For good
 * throughput, put multiple documents into your input file(s).  An example of this is
 * in the benchmark module, which can create "line doc" files, one document per line,
 * using the
 * <a href="../../../../../contrib-benchmark/org/apache/lucene/benchmark/byTask/tasks/WriteLineDocTask.html"
 * >WriteLineDocTask</a>.
 *  
 * @param writer Writer to the index where the given file/dir info will be stored
 * @param file The file to index, or the directory to recurse into to find files to index
 * @throws IOException If there is a low-level I/O error
 */
static void indexDocs(IndexWriter writer, File file) throws IOException {
    // do not try to index files that cannot be read
    if (file.canRead()) {
        if (file.isDirectory()) {
            String[] files = file.list();
            // an IO error could occur
            if (files != null) {
                for (int i = 0; i < files.length; i++) {
                    indexDocs(writer, new File(file, files[i]));
                }
            }
        } else {

            FileInputStream fis;
            try {
                fis = new FileInputStream(file);
                System.out.println("fis " + file);
            } catch (FileNotFoundException fnfe) {
                // at least on windows, some temporary files raise this exception with an "access denied" message
                // checking if the file can be read doesn't help
                return;
            }

            try {

                // make a new, empty document
                Document doc = new Document();

                // Add the path of the file as a field named "path".  Use a
                // field that is indexed (i.e. searchable), but don't tokenize 
                // the field into separate words and don't index term frequency
                // or positional information:
                Field pathField = new StringField("path", file.getPath(), Field.Store.YES);
                doc.add(pathField);

                // Add the last modified date of the file a field named "modified".
                // Use a LongField that is indexed (i.e. efficiently filterable with
                // NumericRangeFilter).  This indexes to milli-second resolution, which
                // is often too fine.  You could instead create a number based on
                // year/month/day/hour/minutes/seconds, down the resolution you require.
                // For example the long value 2011021714 would mean
                // February 17, 2011, 2-3 PM.
                doc.add(new LongField("modified", file.lastModified(), Field.Store.NO));

                // Add the contents of the file to a field named "contents".  Specify a Reader,
                // so that the text of the file is tokenized and indexed, but not stored.
                // Note that FileReader expects the file to be in UTF-8 encoding.
                // If that's not the case searching for special characters will fail.
                doc.add(new TextField("contents", new BufferedReader(new InputStreamReader(fis, "UTF-8"))));

                if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
                    // New index, so we just add the document (no old document can be there):
                    System.out.println("adding " + file);
                    writer.addDocument(doc);
                } else {
                    // Existing index (an old copy of this document may have been indexed) so 
                    // we use updateDocument instead to replace the old one matching the exact 
                    // path, if present:
                    System.out.println("updating " + file);
                    writer.updateDocument(new Term("path", file.getPath()), doc);
                }

            } finally {
                fis.close();
            }
        }
    }
}

From source file:lucene.demo.search.FileIndexer.java

License:Apache License

private void addDoc(IndexWriter writer, File file, Document doc) throws IOException {
    if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {

        try {/*from  ww w  .  j a va 2s . c o  m*/
            writer.addDocument(doc);
        } catch (Exception e) {
            //
        }

    } else {
        writer.updateDocument(new Term("path", file.getPath()), doc);
    }
}

From source file:mm.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        doc.add(pathField);/*from w  w  w  .j  av a 2  s  . co m*/

        // Add the last modified date of the file a field named "modified".
        // Use a LongField that is indexed (i.e. efficiently filterable with
        // NumericRangeFilter).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongPoint("modified", lastModified));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.
        doc.add(new TextField("contents",
                new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            System.out.println("adding " + file);
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }
}

From source file:model.IndexFiles.java

License:Apache License

/** Indexes a single document */
static void indexDoc(IndexWriter writer, Path file, long lastModified) throws IOException {
    HashMap<String, String> mappingPathToTitle = new HashMap<String, String>();

    try (InputStream stream = Files.newInputStream(file)) {
        // make a new, empty document
        Document doc = new Document();

        // strip tags in here as they're unrequired
        //doc.add(new TextField("fullContents", new BufferedReader(new InputStreamReader(stream, StandardCharsets.UTF_8))));

        // Add the path of the file as a field named "path".  Use a
        // field that is indexed (i.e. searchable), but don't tokenize 
        // the field into separate words and don't index term frequency
        // or positional information:
        Field pathField = new StringField("path", file.toString(), Field.Store.YES);
        //System.out.println(pathField);

        String path = file.toString();

        String pathContents = readFileToString(path, path, mappingPathToTitle);
        Field pathContents1 = new TextField("contents", pathContents, Field.Store.YES);

        if (!mappingPathToTitle.isEmpty()) {

            Field title = new TextField("title", mappingPathToTitle.get(path), Field.Store.YES);
            doc.add(title);//from   w w  w  .j a  v a  2s.co  m
        }

        doc.add(pathField);
        doc.add(pathContents1);

        // Add the last modified date of the file a field named "modified".
        // Use a LongField that is indexed (i.e. efficiently filterable with
        // NumericRangeFilter).  This indexes to milli-second resolution, which
        // is often too fine.  You could instead create a number based on
        // year/month/day/hour/minutes/seconds, down the resolution you require.
        // For example the long value 2011021714 would mean
        // February 17, 2011, 2-3 PM.
        doc.add(new LongField("modified", lastModified, Field.Store.NO));

        // Add the contents of the file to a field named "contents".  Specify a Reader,
        // so that the text of the file is tokenized and indexed, but not stored.
        // Note that FileReader expects the file to be in UTF-8 encoding.
        // If that's not the case searching for special characters will fail.

        if (writer.getConfig().getOpenMode() == OpenMode.CREATE) {
            // New index, so we just add the document (no old document can be there):
            writer.addDocument(doc);
        } else {
            // Existing index (an old copy of this document may have been indexed) so 
            // we use updateDocument instead to replace the old one matching the exact 
            // path, if present:
            System.out.println("updating " + file);
            writer.updateDocument(new Term("path", file.toString()), doc);
        }
    }

}