Example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig IndexWriterConfig.

Prototype

public IndexWriterConfig(Analyzer analyzer)

Source Link

Document

Creates a new config that with the provided Analyzer .

Usage

From source file:cs412.project.search.IndexFiles.java

License:Apache License

public IndexFiles(String docsPath, String indexPath) {

    boolean create = true;

    if (docsPath == null) {
        System.exit(1);// w  w w . j a v a  2  s .  co  m
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath).toPath());
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }

}

From source file:cs412.project.search.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    // String docsPath = "H:\\data set 4";
    //CHANGE BELOW TO YOUR PATH
    String docsPath = "Split Files/";
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];//from   w  w  w  .j a v a 2s.  c  om
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath).toPath());
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:cs571.proj1.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "index";
    String docsPath = null;//from w w w  . j a va2  s.  co  m
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        } else if ("-tfidf".equals(args[i])) {
            tfidf = true;
        } else if ("-bm25".equals(args[i])) {
            bm25 = true;
        }

    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        if (tfidf)
            iwc.setSimilarity(new TFIDF());
        if (bm25)
            iwc.setSimilarity(new BM25());
        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");
        System.out.println("Total # of Docs Indexed: " + numOfDocuments);

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:csdn.lucene.first.version.Indexer.java

License:Apache License

public Indexer(String indexDir) throws IOException {
    // open file in indexDir
    Path pathA = Paths.get(indexDir);
    Directory dir = FSDirectory.open(pathA);
    IndexWriterConfig config = new IndexWriterConfig(new IKAnalyzer(true));
    writer = new IndexWriter(dir, config);
}

From source file:dbn.db.FullTextTrigger.java

/**
 * Get the Lucene index access/*from   ww  w  .  ja  v a  2  s.  c  o m*/
 *
 * @param   conn                SQL connection
 * @throws  SQLException        Unable to access the Lucene index
 */
private static void getIndexAccess(Connection conn) throws SQLException {
    if (!isActive) {
        throw new SQLException("NRS is no longer active");
    }
    boolean obtainedUpdateLock = false;
    if (!indexLock.writeLock().hasLock()) {
        indexLock.updateLock().lock();
        obtainedUpdateLock = true;
    }
    try {
        if (indexPath == null || indexWriter == null) {
            indexLock.writeLock().lock();
            try {
                if (indexPath == null) {
                    getIndexPath(conn);
                }
                if (directory == null) {
                    directory = FSDirectory.open(indexPath);
                }
                if (indexWriter == null) {
                    IndexWriterConfig config = new IndexWriterConfig(analyzer);
                    config.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
                    indexWriter = new IndexWriter(directory, config);
                    Document document = new Document();
                    document.add(new StringField("_QUERY", "_CONTROL_DOCUMENT_", Field.Store.YES));
                    indexWriter.updateDocument(new Term("_QUERY", "_CONTROL_DOCUMENT_"), document);
                    indexWriter.commit();
                    indexReader = DirectoryReader.open(directory);
                    indexSearcher = new IndexSearcher(indexReader);
                }
            } finally {
                indexLock.writeLock().unlock();
            }
        }
    } catch (IOException | SQLException exc) {
        Logger.logErrorMessage("Unable to access the Lucene index", exc);
        throw new SQLException("Unable to access the Lucene index", exc);
    } finally {
        if (obtainedUpdateLock) {
            indexLock.updateLock().unlock();
        }
    }
}

From source file:de.anycook.db.lucene.FulltextIndex.java

License:Open Source License

private IndexWriterConfig createIndexWriterConfig() {
    return new IndexWriterConfig(analyzer);
}

From source file:de.citec.lucene.CreateIndex.java

public static void main(String[] args) throws IOException {
    Analyzer analyzer = null;//from w ww . j a  v  a 2 s.  c  om

    List<String> files = new ArrayList<>();
    files.add("/Users/swalter/Documents/EsaDeutsch/new_copus_german.txt");
    String indexPath = "/Users/swalter/Documents/EsaDeutsch/Index/";
    Language language = Language.DE;
    Directory dir = FSDirectory.open(Paths.get(indexPath));

    if (language.equals(Language.DE))
        analyzer = new GermanAnalyzer();
    if (language.equals(Language.ES))
        analyzer = new SpanishAnalyzer();
    if (language.equals(Language.EN))
        analyzer = new EnglishAnalyzer();

    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(12000);
    try (IndexWriter writer = new IndexWriter(dir, iwc)) {
        files.forEach(f -> {
            try {
                indexDocs(writer, Paths.get(f));
            } catch (IOException ex) {
                Logger.getLogger(CreateIndex.class.getName()).log(Level.SEVERE, null, ex);
            }
        });

    }
    System.out.println(counter);

}

From source file:de.citec.sc.sentence.preprocessing.lucene.CreateIndex.java

public static void main(String[] args) throws IOException {
    Analyzer analyzer = null;//from  w  w w .  j  a va 2s  .co m

    List<String> files = new ArrayList<>();
    files.add("/Users/swalter/Downloads/german_sentences_reduced.txt");
    String indexPath = "/Users/swalter/Index/GermanIndexReduced/";
    Language language = Language.DE;
    Directory dir = FSDirectory.open(Paths.get(indexPath));

    //files.add("/home/bettina/CITEC/MATOLL/preprocessSentences/idealSentences/idealSents_mecab_jdepp_rmvPunct_CoNLLU");
    //String indexPath = "/home/bettina/CITEC/MATOLL/preprocessSentences/idealSentences/index";
    //Language language = Language.JA;
    //Directory dir = FSDirectory.open(Paths.get(indexPath));

    if (language.equals(Language.DE))
        analyzer = new GermanAnalyzer();
    if (language.equals(Language.ES))
        analyzer = new SpanishAnalyzer();
    if (language.equals(Language.EN))
        analyzer = new EnglishAnalyzer();
    if (language.equals(Language.JA))
        analyzer = new JapaneseAnalyzer();

    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    iwc.setRAMBufferSizeMB(12000);
    try (IndexWriter writer = new IndexWriter(dir, iwc)) {
        files.forEach(f -> {
            try {
                indexDocs(writer, Paths.get(f), language);
            } catch (IOException ex) {
                Logger.getLogger(CreateIndex.class.getName()).log(Level.SEVERE, null, ex);
            }
        });

    }

}

From source file:de.dfki.km.leech.lucene.ToLuceneContentHandler.java

License:Open Source License

synchronized protected IndexWriter getCurrentWriter()
        throws CorruptIndexException, LockObtainFailedException, IOException {

    if (getSplitAndMergeIndex() <= 0)
        return m_initialLuceneWriter;

    if (m_luceneWriter.maxDoc() < getSplitAndMergeIndex())
        return m_luceneWriter;

    Directory directory = m_initialLuceneWriter.getDirectory();

    Path fOurTmpDir = null;/*  w ww . j a  va  2 s .  co m*/
    if (directory instanceof FSDirectory) {
        if (m_luceneWriter != m_initialLuceneWriter)
            m_llIndexWriter2Close.add(m_luceneWriter);

        String strTmpPath = ((FSDirectory) directory).getDirectory().toAbsolutePath().toString();
        // if(strTmpPath.charAt(strTmpPath.length() - 1) == '/' || strTmpPath.charAt(strTmpPath.length() - 1) == '\\')
        // strTmpPath = strTmpPath.substring(0, strTmpPath.length() - 1);
        strTmpPath += "_" + (m_hsTmpLuceneWriterPaths2Merge.size() + 1);
        fOurTmpDir = Paths.get(strTmpPath);
    } else {
        // wir brauchen was temporres
        File parentDir = new File(System.getProperty("java.io.tmpdir"));
        fOurTmpDir = Paths.get(parentDir.getAbsolutePath() + "/leechTmp/"
                + UUID.randomUUID().toString().replaceAll("\\W", "_"));
    }

    Logger.getLogger(ToLuceneContentHandler.class.getName()).info("Current index exceeds "
            + m_iSplitIndexDocumentCount + " documents. Will create another temporary one under " + fOurTmpDir);

    @SuppressWarnings("deprecation")
    IndexWriterConfig config = new IndexWriterConfig(m_initialLuceneWriter.getConfig().getAnalyzer());
    config.setOpenMode(OpenMode.CREATE);

    m_luceneWriter = new IndexWriter(new SimpleFSDirectory(fOurTmpDir), config);
    m_hsTmpLuceneWriterPaths2Merge.add(fOurTmpDir.toAbsolutePath().toString());

    return m_luceneWriter;
}

From source file:de.dfki.km.leech.parser.incremental.IncrementalCrawlingHistory.java

License:Open Source License

/**
 * Creates all writer, reader, and searcher objects if necessary
 * /*w ww .j av a 2  s .  c  om*/
 * @throws CorruptIndexException
 * @throws LockObtainFailedException
 * @throws IOException
 */
public void openLuceneStuff() throws CorruptIndexException, LockObtainFailedException, IOException {
    if (m_indexWriter == null) {
        IndexWriterConfig config = new IndexWriterConfig(new KeywordAnalyzer());
        config.setOpenMode(OpenMode.CREATE_OR_APPEND);

        m_indexWriter = new IndexWriter(new SimpleFSDirectory(Paths.get(m_strHistoryPath)), config);
    }

    if (m_indexReader == null)
        m_indexReader = DirectoryReader.open(m_indexWriter, true, true);

    if (m_indexSearcher == null)
        m_indexSearcher = new IndexSearcher(m_indexReader);
}