Example usage for org.apache.lucene.index IndexWriterConfig setOpenMode

List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setOpenMode.

Prototype

public IndexWriterConfig setOpenMode(OpenMode openMode) 

Source Link

Document

Specifies OpenMode of the index.

Usage

From source file:lucene.demo.search.FileSearcher.java

License:Apache License

private void removeDocs(Query query) throws IOException {
    Directory dir = FSDirectory.open(new File(indexPath));
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_48);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, analyzer);
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);

    IndexWriter writer = new IndexWriter(dir, iwc);
    writer.deleteDocuments(query);/*from   w w w .jav  a 2s. c  o m*/
    writer.commit();
    writer.close();
}

From source file:luceneindexcreator.LuceneIndexCreator.java

public boolean openIndex() {
    try {//from   w w  w .j a  v a2 s.c  om
        Directory dir = FSDirectory.open(new File(indexPath));

        //populating stopwords with stopwords.txt file
        CharArraySet stopWords = new CharArraySet(Version.LUCENE_CURRENT, 0, true);
        try {
            File file = new File("stopwords.txt");
            FileReader fr = new FileReader(file);
            BufferedReader br = new BufferedReader(fr);
            String line;
            while ((line = br.readLine()) != null) {
                stopWords.add(line);
            }
            fr.close();
        } catch (IOException e) {
            e.printStackTrace();
        }

        StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_47, stopWords);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_47, analyzer);

        //Always overwrite the directory
        iwc.setOpenMode(OpenMode.CREATE);
        indexWriter = new IndexWriter(dir, iwc);

        return true;
    } catch (Exception e) {
        System.err.println("Error opening the index. " + e.getMessage());

    }
    return false;

}

From source file:luceneindexer.files.LuceneWriter.java

public boolean openIndex() {

    try {//ww w  . j a  va  2s  .  co  m

        //Open the directory so lucene knows how to deal with it
        Directory dir = FSDirectory.open(new File(pathToIndex));

        //Chose the analyzer we are going to use to write documents to the index. We need to specify the version 
        //of the Lucene index type we want to use
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_44);

        //Create an index writer configuration. Same thing here with the index version
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, analyzer);

        //we are always going to overwrite the index that is currently in the directory
        iwc.setOpenMode(OpenMode.CREATE);

        //let's open that index and get a writer to hand back to the main code
        indexWriter = new IndexWriter(dir, iwc);

        return true;
    } catch (Exception e) {
        System.out.println("Threw an exception trying to open the index for writing: " + e.getClass() + " :: "
                + e.getMessage());
        return false;
    }

}

From source file:luceneingester.TrecIngester.java

License:Apache License

public static void main(String[] clArgs) throws Exception {
    Args args = new Args(clArgs);
    final String dirPath = args.getString("-indexPath") + "/index";
    final String dataDir = args.getString("-dataDir");
    final int docCountLimit = args.getInt("-docCountLimit"); // -1 means all docs from the source:
    final int numThreads = args.getInt("-threadCount");
    final boolean verbose = args.getFlag("-verbose");
    final boolean printDPS = args.getFlag("-printDPS");
    final boolean doUpdate = args.getFlag("-update");
    final boolean positions = args.getFlag("-positions");

    args.check();//from w w w.java  2s  .  c om

    final Analyzer a = new EnglishAnalyzer();
    final TrecContentSource trecSource = createTrecSource(dataDir);
    final Directory dir = FSDirectory.open(Paths.get(dirPath));

    System.out.println("Index path: " + dirPath);
    System.out.println("Doc count limit: " + (docCountLimit == -1 ? "all docs" : "" + docCountLimit));
    System.out.println("Threads: " + numThreads);
    System.out.println("Verbose: " + (verbose ? "yes" : "no"));
    System.out.println("Positions: " + (positions ? "yes" : "no"));

    if (verbose) {
        InfoStream.setDefault(new PrintStreamInfoStream(System.out));
    }

    final IndexWriterConfig iwc = new IndexWriterConfig(a);

    if (doUpdate) {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.APPEND);
    } else {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    }

    System.out.println("IW config=" + iwc);

    final IndexWriter w = new IndexWriter(dir, iwc);
    IndexThreads threads = new IndexThreads(w, positions, trecSource, numThreads, docCountLimit, printDPS);
    System.out.println("\nIndexer: start");

    final long t0 = System.currentTimeMillis();

    threads.start();

    while (!threads.done()) {
        Thread.sleep(100);
    }
    threads.stop();

    final long t1 = System.currentTimeMillis();
    System.out.println(
            "\nIndexer: indexing done (" + (t1 - t0) / 1000.0 + " sec); total " + w.maxDoc() + " docs");
    if (!doUpdate && docCountLimit != -1 && w.maxDoc() != docCountLimit) {
        throw new RuntimeException("w.maxDoc()=" + w.maxDoc() + " but expected " + docCountLimit);
    }
    if (threads.failed.get()) {
        throw new RuntimeException("exceptions during indexing");
    }

    final long t2;
    t2 = System.currentTimeMillis();

    final Map<String, String> commitData = new HashMap<String, String>();
    commitData.put("userData", "multi");
    w.setCommitData(commitData);
    w.commit();
    final long t3 = System.currentTimeMillis();
    System.out.println("\nIndexer: commit multi (took " + (t3 - t2) / 1000.0 + " sec)");

    System.out.println("\nIndexer: at close: " + w.segString());
    final long tCloseStart = System.currentTimeMillis();
    w.close();
    System.out.println("\nIndexer: close took " + (System.currentTimeMillis() - tCloseStart) / 1000.0 + " sec");
    dir.close();
    final long tFinal = System.currentTimeMillis();
    System.out.println("\nIndexer: finished (" + (tFinal - t0) / 1000.0 + " sec)");
    System.out.println("\nIndexer: net bytes indexed " + threads.getBytesIndexed());
    System.out.println(
            "\nIndexer: " + (threads.getBytesIndexed() / 1024. / 1024. / 1024. / ((tFinal - t0) / 3600000.))
                    + " GB/hour plain text");
}

From source file:lucenetew.LuceneTEW.java

public static void ParseXML(StringBuilder sb) throws IOException, ParseException {

    String indexPath = "C:\\Users\\Hp Kevin\\Documents\\NetBeansProjects\\LuceneTEW\\Indice";
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    iwc.setOpenMode(OpenMode.CREATE);
    IndexWriter writer = new IndexWriter(dir, iwc);
    org.apache.lucene.document.Document doc;

    //... Continuar aqui!

    String xml = sb.toString();/*  w  ww.j  a va 2s . co  m*/
    xml = limpiarXML(xml);
    //System.out.println(xml);
    DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
    DocumentBuilder builder;
    try {
        builder = factory.newDocumentBuilder();
        Document document = builder.parse(new InputSource(new StringReader(xml)));
        NodeList nSubList;
        Node tempNodo, tempSubNodo;
        NodeList nList = document.getElementsByTagName("DOC");
        for (int i = 0; i < nList.getLength(); i++) {
            tempNodo = nList.item(i);
            nSubList = tempNodo.getChildNodes();
            doc = new org.apache.lucene.document.Document();
            for (int j = 0; j < nSubList.getLength(); j++) {
                tempSubNodo = nSubList.item(j);
                if (!tempSubNodo.getNodeName().contains("#")) {
                    //System.out.println(tempSubNodo.getNodeName());
                    //System.out.println(tempSubNodo.getTextContent());
                    //LLenar indice

                    doc.add(new StringField(tempSubNodo.getNodeName(), tempSubNodo.getTextContent(),
                            Field.Store.YES));
                    System.out.println(tempSubNodo.getNodeName() + ' ' + tempSubNodo.getTextContent());
                }
            }
            writer.addDocument(doc);
            // System.out.println(doc.toString());
        }
        //System.out.println(document);
        //System.out.println(nList.getLength());
        writer.close();
        System.out.println("Indice Creado");

    } catch (ParserConfigurationException | SAXException | IOException | DOMException e) {
    }

    //Reader
    IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath)));
    IndexSearcher searcher = new IndexSearcher(reader);
    Analyzer analyzer2 = new StandardAnalyzer();
    QueryParser parser = new QueryParser("BRAND", analyzer2);
    org.apache.lucene.search.Query query = parser.parse("jeep");
    TopDocs results = searcher.search(query, 100000);
    ScoreDoc[] hits = results.scoreDocs;

    System.out.println(hits.length);

}

From source file:lucenetools.DocIndexer.java

License:Apache License

/**
 * @param args the command line arguments
 * @throws java.io.IOException/*www. j av  a 2  s .com*/
 * @throws java.text.ParseException
 * @throws java.lang.ClassNotFoundException
 * @throws java.lang.NoSuchMethodException
 * @throws java.lang.InstantiationException
 * @throws java.lang.IllegalAccessException
 * @throws java.lang.reflect.InvocationTargetException
 */
public static void main(String[] args)
        throws IOException, ParseException, ClassNotFoundException, NoSuchMethodException,
        InstantiationException, IllegalAccessException, IllegalArgumentException, InvocationTargetException {
    // disable the exceedingly verbose node4j output from Tika
    Logger.getRootLogger().removeAllAppenders();
    Logger.getRootLogger().setLevel(Level.OFF);

    Options opts = new Options();
    CommandLine commandLine = new CommandLine();

    // if no command line options specified, user wants help
    if (0 == args.length) {
        commandLine.showHelp();
        System.exit(0);
    }

    // extract command line args and store in opts
    if (!commandLine.parse(args, opts))
        System.exit(1);

    if (opts.showHelp) {
        commandLine.showHelp();
        System.exit(0);
    }

    // validate all command line options
    if (!commandLine.isValid(opts))
        System.exit(1);

    // consolidate stop files into a single CharSetArray
    String[] stopFiles = { opts.defaultStopFile, opts.userStopFile };
    CharArraySet stopWordSet = StopWordSetGenerator.generate(stopFiles);

    // consolidate spelling files
    String[] spellingFiles = { opts.defaultSpellFile, opts.userSpellFile };
    SpellingFile.consolidate(spellingFiles);
    HashMap<String, String> spellingHashtable = SpellingFile.getHashtable();

    // generate the slang hash map
    String[] slangFiles = { opts.defaultSlangFile };
    SpellingFile.consolidate(slangFiles);
    HashMap<String, String> slangHashtable = SpellingFile.getHashtable();

    // create the user-specified analyzer
    analyzer = AnalyzerFactory.create(opts.analyzerName, stopWordSet, spellingHashtable, slangHashtable,
            opts.tokenOpts, opts.modelFile);

    // check if the analyzer is valid
    if (analyzer == null) {
        System.out.println("Error: No analyzer with that name.");
        System.exit(1);
    }

    System.out.println("\nDocIndexer version " + VERSION + ".\n");
    commandLine.printOpts(opts);

    // naive way to determine whether to use Twitter document extraction
    // or assume each document is a single document
    isTwitter = opts.analyzerName.toLowerCase().contains("twitter");

    long maxMemory = Runtime.getRuntime().maxMemory() / 1024 / 1024;
    System.out.println("Java runtime max memory: " + maxMemory + " MB.");

    if (opts.analyze > 0) {

        // show analysis results then exit
        if (opts.useMongo) {
            DocIndexerMongo.showTweetAnalysis(opts.analyze, opts, analyzer);
            System.exit(0);

        } else {
            if (isTwitter)
                DebugAnalyzer.showTweetAnalysis(opts.analyze, opts.inputDir, analyzer);
            else
                DebugAnalyzer.showDocAnalysis(opts.analyze, opts.inputDir, analyzer);
            System.exit(0);
        }
    }

    long start = System.nanoTime();
    try {
        Path outpath = Paths.get(opts.outputDir);
        outpath = outpath.toAbsolutePath();
        System.out.println("Writing index to: '" + outpath.toString() + "' ...\n");

        Directory dir = FSDirectory.open(new File(opts.outputDir));
        IndexWriterConfig config = new IndexWriterConfig(Version.LATEST, analyzer);

        // create a new index in the directory, removing any 
        // previously-indexed documents
        config.setOpenMode(OpenMode.CREATE);

        // Optional: for better indexing performance, if you are 
        // indexing many documents, increase the RAM buffer.  But if
        // you do this, increase the max heap size available to the 
        // JVM (eg add -Xmx512m or -Xmxlg).
        //config.setRAMBufferSizeMB(256.0);
        IndexWriter writer = new IndexWriter(dir, config);

        if (opts.useMongo) {
            // Parse the configuration file and use the connection
            // details to index the documents.
            DocIndexerMongo.indexDocs(writer, opts, analyzer);
            docsIndexed = DocIndexerMongo.getDocsIndexed();
        } else {
            // Index documents from the provided input directory.
            final File docDir = new File(opts.inputDir);
            rootDir = Paths.get(docDir.getPath());
            indexDocs(writer, docDir, isTwitter, opts);
        }

        // NOTE: if you want to maximize search performance, you can
        // optionally call forceMerge here.  This can be a terribly 
        // costly operation, so generally it's only worth it when
        // your index is relatively static (i.e. you are finished
        // adding documents to it).
        //writer.forceMerge(1);

        // commit docs to the index
        writer.close();
    } catch (IOException e) {
        ErrorReporter.reportException(e.getClass().toString(), e.getMessage());
        System.exit(-1);
    }

    long end = System.nanoTime();
    double elapsed = (end - start) * 1.0e-9;

    System.out.println("\n\nIndexed " + docsIndexed + " documents.");
    System.out.printf("Elapsed time: %.2f seconds, avg. rate: %.2f docs/s.\n\n", elapsed,
            docsIndexed / elapsed);
}

From source file:mm.IndexFiles.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {

    String indexPath = "C:\\Users\\mnorhamizan\\Documents\\index";
    String docsPath = "C:\\Users\\mnorhamizan\\Documents\\testdata";
    boolean create = true;

    final Path docDir = Paths.get(docsPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);/*from   w w w  . java  2 s . c  o m*/
    }
    Runtime runtime = Runtime.getRuntime();
    long usedMemoryBefore = runtime.totalMemory() - runtime.freeMemory();
    System.out.println("Used Memory before" + usedMemoryBefore);
    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);

        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);

        writer.close();

        long usedMemoryAfter = runtime.totalMemory() - runtime.freeMemory();
        System.out.println("Memory increased:" + (usedMemoryAfter - usedMemoryBefore));
        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:model.Index.java

public static void main(String[] args) throws FileNotFoundException, IOException {
    //set the split word tech  
    Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_43);
    //indexwriter config info  
    IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_43, analyzer);
    //open the index, if there is no index, build a new one
    indexWriterConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
    Directory directory = null;//from  www  . ja  v  a  2s  .co m
    IndexWriter indexWrite = null;
    try {
        //set path of the original data
        directory = FSDirectory.open(new File(Path.IndexDir));
        //if the directory is locked , unlock it
        if (IndexWriter.isLocked(directory)) {
            IndexWriter.unlock(directory);
        }
        //new a object indexWrite  
        indexWrite = new IndexWriter(directory, indexWriterConfig);
    } catch (Exception e) {
        e.printStackTrace();
    }

    PreProcessDoc getDoc = new PreProcessDoc();
    WebDocument tempDoc = null;
    while ((tempDoc = getDoc.nextDocument()) != null) {
        Document doc = new Document();
        doc.add(new TextField("link", tempDoc.getDocLink(), Store.YES));
        doc.add(new TextField("content", tempDoc.getDocContent(), Store.YES));
        try {
            //write doc into index  
            indexWrite.addDocument(doc);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }

    //commit the data, if not , it would not be saved
    try {
        indexWrite.commit();
        //close the resource
        indexWrite.close();
        directory.close();
    } catch (Exception e) {
        e.printStackTrace();
    }
}

From source file:model.IndexFiles.java

License:Apache License

public IndexFiles() {
    /** Index all text files under a directory. */

    String indexPath = "index";
    String docPath = "data";
    boolean create = true;

    if (docPath == null) {
        System.exit(1);//from w  w w .j a v a  2s . c om
    }
    final Path docDir = Paths.get(docPath);
    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");
        Directory dir = FSDirectory.open(Paths.get(indexPath));

        final List<String> stopWords = Arrays.asList("a", "an", "are", "as", "at", "be", "but", "by", "in",
                "into", "is", "it", "no", "on", "such", "that", "the", "their", "then", "there", "these",
                "they", "to", "was", "will", "with");

        //final List<String> stopWords = Arrays.asList("for", "if");   

        final CharArraySet stopSet = new CharArraySet(stopWords, false);

        Analyzer analyzer = new StandardAnalyzer(stopSet);

        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }
        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        iwc.setRAMBufferSizeMB(256.0);
        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);
        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);
        writer.close();
        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");
    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:mri1.MRI1.java

/**
 * @param args the command line arguments
 * @throws java.io.IOException/*w  w  w.ja  va 2 s .c  o m*/
 * @throws org.apache.lucene.queryparser.classic.ParseException
 */
public static void main(String[] args) throws IOException, ParseException {

    File dest = new File("dest");
    File source = new File("source");
    FSDirectory fsdir = FSDirectory.open(dest);
    IndexWriterConfig iwc = new IndexWriterConfig(Version.LATEST, new StandardAnalyzer());
    iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    try (IndexWriter writer = new IndexWriter(fsdir, iwc)) {
        File[] listFiles = source.listFiles();
        for (File file : listFiles) {
            System.out.println(file.getName());
            if (file.isFile() && file.getName().endsWith(".txt")) {
                Document doc = new Document();
                doc.add(new StringField("path", file.getAbsolutePath(), Field.Store.YES));
                doc.add(new TextField("content", new FileReader(file)));
                writer.addDocument(doc);
            }
        }
    }

    DirectoryReader idxReader = DirectoryReader.open(fsdir);
    IndexSearcher searcher = new IndexSearcher(idxReader);
    //Query q=new TermQuery(new Term("content","system"));
    QueryParser parser = new QueryParser("content", new StandardAnalyzer());
    Query q = parser.parse("basic");
    TopDocs topDocs = searcher.search(q, 20);
    ScoreDoc[] sd = topDocs.scoreDocs;
    for (ScoreDoc d : sd) {
        System.out.println(searcher.doc(d.doc).get("path") + "\t" + d.score);
    }

}