Example usage for org.apache.lucene.index IndexWriterConfig setOpenMode

List of usage examples for org.apache.lucene.index IndexWriterConfig setOpenMode

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriterConfig setOpenMode.

Prototype

public IndexWriterConfig setOpenMode(OpenMode openMode) 

Source Link

Document

Specifies OpenMode of the index.

Usage

From source file:gov.noaa.pfel.erddap.util.EDStatic.java

License:Open Source License

/** 
 * This creates an IndexWriter./*from  w ww . ja  va  2  s.  c  o m*/
 * Normally, this is created once in RunLoadDatasets.
 * But if trouble, a new one will be created.
 *
 * @throws RuntimeException if trouble
 */
public static void createLuceneIndexWriter(boolean firstTime) {

    try {
        String2.log("createLuceneIndexWriter(" + firstTime + ")");
        long tTime = System.currentTimeMillis();

        //if this is being called, directory shouldn't be locked
        //see javaDocs for indexWriter.close()
        if (IndexWriter.isLocked(luceneDirectory))
            IndexWriter.unlock(luceneDirectory);

        //create indexWriter
        IndexWriterConfig lucConfig = new IndexWriterConfig(luceneVersion, luceneAnalyzer);
        lucConfig.setOpenMode(
                firstTime ? IndexWriterConfig.OpenMode.CREATE : IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
        luceneIndexWriter = new IndexWriter(luceneDirectory, lucConfig);
        luceneIndexWriter.setInfoStream(verbose ? new PrintStream(new String2LogOutputStream()) : null);
        String2.log("  createLuceneIndexWriter finished.  time=" + (System.currentTimeMillis() - tTime) + "ms");
    } catch (Throwable t) {
        throw new RuntimeException(t);
    }
}

From source file:gov.ssa.test.lucenedemo.IndexFiles.java

/**
 * Index all text files under a directory.
 *///from w  w w.  j  ava 2 s.  c  om
public void doIndexing(String _indexPath, String _docsPath) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = _indexPath;
    String docsPath = _docsPath;
    boolean create = true;

    /*
    for (int i = 0; i < args.length; i++)
    {
    if ("-index".equals(args[i]))
    {
        indexPath = args[i + 1];
        i++;
    }
    else if ("-docs".equals(args[i]))
    {
        docsPath = args[i + 1];
        i++;
    }
    else if ("-update".equals(args[i]))
    {
        create = false;
    }
    }
    */

    if (docsPath == null) {
        System.err.println("docsPath is null....");
        return;
    }

    final Path docDir = Paths.get(docsPath);

    if (!Files.isReadable(docDir)) {
        System.out.println("Document directory '" + docDir.toAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        return;
    }

    Date start = new Date();

    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");
        Directory dir = FSDirectory.open(Paths.get(indexPath));
        Analyzer analyzer = new StandardAnalyzer();
        IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }
        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);
        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);
        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);
        writer.close();
        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");
    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:HW1.generateIndex.java

License:Apache License

/** Index all text files under a directory. */
public static void main(String[] args) {
    String filePath = "/Users/yangyang/Desktop/lucene/corpus";
    File folder = new File(filePath);
    File[] files = folder.listFiles();

    String[] fields = { "DOCNO", "HEAD", "BYLINE", "DATELINE", "TEXT" };
    ArrayList<HashMap<String, String>> documents = new ArrayList<HashMap<String, String>>();
    int num = 0;/*from   w ww .  j av a2s .c  o m*/

    for (File file : files) {
        // read each file
        BufferedReader br = null;
        String line;

        try {
            br = new BufferedReader(new FileReader(file));
            String xmlRecords = "";
            while ((line = br.readLine()) != null) {
                // change "&" to "&amp" to avoid bug in parse XML
                if (line.contains("&")) {
                    line = line.replaceAll("&", "&amp;");
                }

                if (line.startsWith("<DOC>")) {
                    xmlRecords = line;

                } else if (line.startsWith("</DOC>")) {
                    xmlRecords += line;
                    // use ReadXMLFile.java to parse the XMLfile string
                    num += 1;
                    ReadXMLFile r = new ReadXMLFile();
                    HashMap<String, String> document = r.parse(xmlRecords, fields);
                    // System.out.println(document.toString());
                    documents.add(document);

                } else {
                    xmlRecords += line + " ";
                }

            }
        } catch (Exception e) {
            e.printStackTrace();
        }

    }

    System.out.println(num);

    String[] indexPaths = { "/Users/yangyang/Desktop/lucene/index/index01",
            "/Users/yangyang/Desktop/lucene/index/index02", "/Users/yangyang/Desktop/lucene/index/index03",
            "/Users/yangyang/Desktop/lucene/index/index04", };
    for (String indexPath : indexPaths) {
        try {
            System.out.println("Indexing to directory '" + indexPath + "'...");

            Directory dir = FSDirectory.open(Paths.get(indexPath));
            Analyzer analyzer = null;
            if (indexPath.endsWith("1")) {
                analyzer = new KeywordAnalyzer();
            } else if (indexPath.endsWith("2")) {
                analyzer = new SimpleAnalyzer();
            } else if (indexPath.endsWith("3")) {
                analyzer = new StopAnalyzer();
            } else if (indexPath.endsWith("4")) {
                analyzer = new StandardAnalyzer();
            }

            IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

            iwc.setOpenMode(OpenMode.CREATE);

            IndexWriter writer = new IndexWriter(dir, iwc);

            for (HashMap<String, String> doc : documents) {
                indexDoc(writer, doc);
            }

            writer.close();
        } catch (IOException e) {
            System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
        }
    }

}

From source file:iac.cnr.it.Indexer.java

License:Apache License

public Indexer(String indexDir, boolean create, boolean fork, boolean ocr) throws IOException {
    logger.entry();//w w  w. ja v a2s.  c  o  m

    this.fork = fork;

    numErrors = 0;
    numFiles = 0;

    Directory dir = FSDirectory.open(Paths.get(indexDir));
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);

    if (create) {
        iwc.setOpenMode(OpenMode.CREATE);
        logger.info("Configuration specified to create a new index or overwrites an existing one.");
    } else {
        iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        logger.info(
                "Configuration specified to create a new index if one does not exist, otherwise the index will be opened and documents will be appended.");
    }

    writer = new IndexWriter(dir, iwc);

    Parser autoDetectParser = new AutoDetectParser();
    context = new ParseContext();

    if (ocr) {
        TesseractOCRConfig ocrConfig = new TesseractOCRConfig();
        PDFParserConfig pdfConfig = new PDFParserConfig();
        pdfConfig.setExtractInlineImages(true);
        pdfConfig.setExtractUniqueInlineImagesOnly(false);
        context.set(Parser.class, autoDetectParser);
        context.set(TesseractOCRConfig.class, ocrConfig);
        context.set(PDFParserConfig.class, pdfConfig);
    }

    if (fork) {
        parser = new ForkParser(ForkParser.class.getClassLoader(), autoDetectParser);
    } else {
        parser = autoDetectParser;
    }

    logger.exit();
}

From source file:ie.cmrc.smtx.etl.index.lucene.LuceneSKOSConceptIndexer.java

License:Apache License

/**
 * {@inheritDoc}//from  w  ww.  j  av a  2  s  . c o  m
 * @param thesaurus {@inheritDoc}
 * @param indexDirFile Output index directory
 * @return {@inheritDoc}
 * @throws NotDirectoryException {@inheritDoc}
 * @throws IOException {@inheritDoc}
 */
@Override
public boolean indexSKOSThesaurus(SKOS thesaurus, File indexDirFile) throws NotDirectoryException, IOException {
    boolean success = false;

    if (thesaurus != null) {
        if (indexDirFile != null) {

            // Check index directory
            if (!indexDirFile.exists()) {
                // Index directory  does not exist, create it
                if (verbose)
                    System.out.println("  - Directory \"" + indexDirFile.getAbsolutePath()
                            + "\" does not exist. Will create it...");
                boolean created = indexDirFile.mkdirs();
                if (created) {
                    if (verbose)
                        System.out.println(
                                "      --> Created directory \"" + indexDirFile.getAbsolutePath() + "\".");
                } else {
                    if (verbose)
                        System.out.println("    * ERROR: Could not create directory \""
                                + indexDirFile.getAbsolutePath() + "\"!");
                    return false;
                }
            } else {
                if (indexDirFile.isDirectory()) {
                    // Index directory exists and is actually a directory
                    // Clear content
                    for (File f : indexDirFile.listFiles()) {
                        if (!f.delete() && verbose)
                            System.out.println(
                                    "    * WARNING: Could not delete file \"" + f.getAbsolutePath() + "\"!");
                    }
                } else {
                    // A file exists with the same name but is not a directory
                    if (verbose)
                        System.out.println("    * ERROR: File \"" + indexDirFile.getAbsolutePath()
                                + "\" exists but is not a directory!");
                    throw new NotDirectoryException(indexDirFile.getAbsolutePath());
                }
            }

            // Initialise analyser
            if (verbose)
                System.out.println("  - Initialising index analysers...");

            Analyzer analyser = SKOSAnalyzerFactory.createSmartSKOSMultilingualAnalyser(this.languages);
            if (verbose)
                System.out.println("      --> Analysers intialised.");

            // Configure index writer and connect to index directory
            if (verbose)
                System.out.println("  - Configuring index writer and connecting to index directory...");
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LATEST, analyser);
            iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
            IndexWriter indexWriter = new IndexWriter(new SimpleFSDirectory(indexDirFile), iwc);
            if (verbose)
                System.out.println("      --> Done configuring and connecting index writer.");

            if (verbose)
                System.out
                        .println("  - Indexing concepts. Please be patient as this may take a few minutes...");
            int counter = 0;
            int errors = 0;
            CloseableIterator<SKOSConcept> iter = thesaurus.listConcepts();
            while (iter.hasNext()) {
                counter++;
                SKOSConcept concept = iter.next();
                Document doc = this.indexConcept(concept);
                if (doc != null) {
                    try {
                        indexWriter.addDocument(doc);
                    } catch (IOException ex) {
                        errors++;
                        if (verbose)
                            System.out.println(
                                    "    * ERROR: Could not index concept \"" + concept.getURI() + "\"!");
                    }
                }
            }
            iter.close();
            if (verbose)
                System.out
                        .println("      --> Indexed " + counter + " concepts(s) with " + errors + " error(s).");

            if (verbose)
                System.out.println("  - Closing connection to index directory...");
            indexWriter.close();
            if (verbose)
                System.out.println("      --> Closed connection.");

            success = true;
        } else {
            throw new IllegalArgumentException("Index directory is null");
        }
    } else {
        throw new IllegalArgumentException("Thesaurus parameter is null");
    }

    return success;
}

From source file:ie.cmrc.smtx.etl.index.lucene.LuceneSKOSIndexer.java

License:Apache License

/**
 * {@inheritDoc}//from   w w w.j av a2 s  .c  o  m
 * @param thesaurus {@inheritDoc}
 * @param indexDirFile Output directory
 * @return {@inheritDoc}
 * @throws NotDirectoryException {@inheritDoc}
 * @throws IOException {@inheritDoc}
 */
@Override
public boolean indexSKOSThesaurus(SKOS thesaurus, File indexDirFile) throws NotDirectoryException, IOException {
    boolean success = false;

    if (thesaurus != null) {
        if (indexDirFile != null) {

            // Check index directory
            if (!indexDirFile.exists()) {
                // Index directory  does not exist, create it
                if (verbose)
                    System.out.println("  - Directory \"" + indexDirFile.getAbsolutePath()
                            + "\" does not exist. Will create it...");
                boolean created = indexDirFile.mkdirs();
                if (created) {
                    if (verbose)
                        System.out.println(
                                "      --> Created directory \"" + indexDirFile.getAbsolutePath() + "\".");
                } else {
                    if (verbose)
                        System.out.println("    * ERROR: Could not create directory \""
                                + indexDirFile.getAbsolutePath() + "\"!");
                    return false;
                }
            } else {
                if (indexDirFile.isDirectory()) {
                    // Index directory exists and is actually a directory
                    // Clear content
                    for (File f : indexDirFile.listFiles()) {
                        if (!f.delete() && verbose)
                            System.out.println(
                                    "    * WARNING: Could not delete file \"" + f.getAbsolutePath() + "\"!");
                    }
                } else {
                    // A file exists with the same name but is not a directory
                    if (verbose)
                        System.out.println("    * ERROR: File \"" + indexDirFile.getAbsolutePath()
                                + "\" exists but is not a directory!");
                    throw new NotDirectoryException(indexDirFile.getAbsolutePath());
                }
            }

            // Initialise analyser
            if (verbose)
                System.out.println("  - Initialising index analysers...");

            Analyzer analyser = SKOSAnalyzerFactory.createSmartSKOSMultilingualAnalyser(this.languages);
            if (verbose)
                System.out.println("      --> Analysers intialised.");

            // Configure index writer and connect to index directory
            if (verbose)
                System.out.println("  - Configuring index writer and connecting to index directory...");
            IndexWriterConfig iwc = new IndexWriterConfig(Version.LATEST, analyser);
            iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
            IndexWriter indexWriter = new IndexWriter(new SimpleFSDirectory(indexDirFile), iwc);
            if (verbose)
                System.out.println("      --> Done configuring and connecting index writer.");

            if (verbose)
                System.out
                        .println("  - Indexing resources. Please be patient as this may take a few minutes...");
            int counter = 0;
            int errors = 0;
            CloseableIterator<SKOSResource> iter = thesaurus.listSKOSResources();
            while (iter.hasNext()) {
                counter++;
                SKOSResource resource = iter.next();
                Document doc = this.indexResource(resource);
                if (doc != null) {
                    try {
                        indexWriter.addDocument(doc);
                    } catch (IOException ex) {
                        errors++;
                        if (verbose)
                            System.out.println(
                                    "    * ERROR: Could not index resource \"" + resource.getURI() + "\"!");
                    }
                }
            }
            iter.close();
            if (verbose)
                System.out.println(
                        "      --> Indexed " + counter + " resources(s) with " + errors + " error(s).");

            if (verbose)
                System.out.println("  - Closing connection to index directory...");
            indexWriter.close();
            if (verbose)
                System.out.println("      --> Closed connection.");

            success = true;
        } else {
            throw new IllegalArgumentException("Index directory file is null");
        }
    } else {
        throw new IllegalArgumentException("Thesaurus parameter is null");
    }

    return success;
}

From source file:ikanalyzer.LuceneIndexAndSearchDemo.java

License:Apache License

/**
 *  ???/*from  w  w w .java2s  .c o m*/
 * 
 * @param args
 */
public static void main(String[] args) {
    // Lucene Document??
    String fieldName = "text";
    // 
    String text = "IK Analyzer???????";

    // IKAnalyzer?
    Analyzer analyzer = new IKAnalyzer(true);

    Directory directory = null;
    IndexWriter iwriter = null;
    IndexReader ireader = null;
    IndexSearcher isearcher = null;
    try {
        // 
        directory = new RAMDirectory();

        // ?IndexWriterConfig
        IndexWriterConfig iwConfig = new IndexWriterConfig(analyzer);
        iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwriter = new IndexWriter(directory, iwConfig);
        // 
        Document doc = new Document();
        doc.add(new StringField("ID", "10000", Field.Store.YES));
        doc.add(new TextField(fieldName, text, Field.Store.YES));
        iwriter.addDocument(doc);
        iwriter.close();

        // ?**********************************
        // ?
        ireader = DirectoryReader.open(directory);
        isearcher = new IndexSearcher(ireader);

        String keyword = "?";
        // QueryParser?Query
        QueryParser qp = new QueryParser(fieldName, analyzer);
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query query = qp.parse(keyword);
        System.out.println("Query = " + query);

        // ?5?
        TopDocs topDocs = isearcher.search(query, 5);
        System.out.println("" + topDocs.totalHits);
        // 
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (int i = 0; i < topDocs.totalHits; i++) {
            Document targetDoc = isearcher.doc(scoreDocs[i].doc);
            System.out.println("" + targetDoc.toString());
        }

    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (directory != null) {
            try {
                directory.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:in.student.project.index.IndexTrec.java

License:Apache License

/** Indexer for Trec files. */
public static void main(String[] argv) {
    try {/*from   ww w.  j av  a 2  s  . c o  m*/
        File index = new File("index");
        boolean create = false;
        File root = null;

        String usage = "IndexTrec [-create] [-index <index>] <root_directory>";

        if (argv.length == 0) {
            System.err.println("Usage: " + usage);
            return;
        }

        for (int i = 0; i < argv.length; i++) {
            if (argv[i].equals("-index")) { // parse -index option
                index = new File(argv[++i]);
            } else if (argv[i].equals("-create")) { // parse -create option
                create = true;
            } else if (i != argv.length - 1) {
                System.err.println("Usage: " + usage);
                return;
            } else
                root = new File(argv[i]);
        }

        Date start = new Date();
        if (!create) { // delete stale docs
            deleting = true;
            indexDocs(root, index, create);
        }

        Directory dir = FSDirectory.open(index);
        Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_48);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_48, analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        writer = new IndexWriter(dir, iwc);
        indexDocs(root, index, create); // add new docs

        writer.close();
        Date end = new Date();

        System.out.print(end.getTime() - start.getTime());
        System.out.println(" total milliseconds");
    } catch (Exception e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:index.Indexcategory.java

public static void main(String[] args) throws IOException {
    String indexPath = "/Users/smita/Documents/ES/index/abstract/";
    String docsPath = null;//  w  w  w .  j a va  2s. c  om
    boolean create = true;

    String path = "/Users/smita/Documents/data/dbpedia/short_abstracts_en.nq";
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    if (create) {
        // Create a new index in the directory, removing any
        // previously indexed documents:
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    } else {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }
    IndexWriter writer = new IndexWriter(dir, iwc);

    FileInputStream inputStream = null;
    Scanner sc = null;
    try {

        int linecount = 0;
        inputStream = new FileInputStream(path);
        sc = new Scanner(inputStream, "UTF-8");
        String ignore = sc.nextLine();
        while (sc.hasNextLine()) {
            linecount++;
            String line = sc.nextLine();
            //System.out.println(line);
            try {
                String article = line.split("> ")[0];
                String category = line.split("> ")[2];
                //System.out.println(article+" ++ "+category);

                //index row

                article = article.substring(29, article.length() - 1);
                //category=category.substring(38,category.length()-1);
                //System.out.println(article+"    "+category);

                Document doc = new Document();
                doc.add(new TextField("article", article, Field.Store.YES));
                doc.add(new TextField("category", category, Field.Store.YES));
                if (writer.getConfig().getOpenMode() == IndexWriterConfig.OpenMode.CREATE) {
                    System.out.println("adding " + linecount);
                    writer.addDocument(doc);

                } else {
                    System.out.println("updating ");
                    //writer.updateDocument(new Term("path", file.toString()), doc);
                }
            } catch (Exception e) {
            }

        }
        if (sc.ioException() != null) {
            throw sc.ioException();
        }

    } finally {
        if (inputStream != null) {
            inputStream.close();
        }
        if (sc != null) {
            sc.close();
        }
    }

    writer.close();
}

From source file:index.IndexCoreMeta.java

public static void main(String[] args) throws FileNotFoundException, IOException, JSONException {

    String indexPath = "/Users/smita/Documents/ES/index/meta";
    String path = "/Users/smita/Documents/data/core/meta/";
    String docsPath = null;//from w  w w  . j  a  v  a2  s . c om
    boolean create = true;

    Date start = new Date();
    System.out.println("Indexing to directory '" + indexPath + "'...");
    Directory dir = FSDirectory.open(Paths.get(indexPath));
    Analyzer analyzer = new StandardAnalyzer();
    IndexWriterConfig iwc = new IndexWriterConfig(analyzer);
    if (create) {
        // Create a new index in the directory, removing any
        // previously indexed documents:
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE);
    } else {
        iwc.setOpenMode(IndexWriterConfig.OpenMode.CREATE_OR_APPEND);
    }
    // iwc.setRAMBufferSizeMB(256.0);
    IndexWriter writer = new IndexWriter(dir, iwc);

    //directory of data
    File folder = new File("/Users/smita/Documents/data/core/meta/");
    String[] files = folder.list();

    for (int i = 0; i < 870; i++) {
        String filename = path + files[i];
        System.out.println(filename);
        if (filename.endsWith(".DS_Store")) {
        } else {
            readFile(writer, filename);
        }

    }
    writer.close();

    Date end = new Date();
    System.out.println(end.getTime() - start.getTime() + " total milliseconds");

}