Example usage for org.apache.lucene.index IndexWriter close

List of usage examples for org.apache.lucene.index IndexWriter close

Introduction

In this page you can find the example usage for org.apache.lucene.index IndexWriter close.

Prototype

@Override
public void close() throws IOException 

Source Link

Document

Closes all open resources and releases the write lock.

Usage

From source file:buscador.IndexFiles.java

License:Apache License

/**
 * Index all text files under a directory.
 *//*from  w ww  . jav  a2s .c  om*/
public static void main(String[] args) {
    String usage = "java org.apache.lucene.demo.IndexFiles"
            + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n"
            + "This indexes the documents in DOCS_PATH, creating a Lucene index"
            + "in INDEX_PATH that can be searched with SearchFiles";
    String indexPath = "Zaguan1";
    String docsPath = null;
    boolean create = true;
    for (int i = 0; i < args.length; i++) {
        if ("-index".equals(args[i])) {
            indexPath = args[i + 1];
            i++;
        } else if ("-docs".equals(args[i])) {
            docsPath = args[i + 1];
            i++;
        } else if ("-update".equals(args[i])) {
            create = false;
        }
    }

    if (docsPath == null) {
        System.err.println("Usage: " + usage);
        System.exit(1);
    }

    final File docDir = new File(docsPath);
    if (!docDir.exists() || !docDir.canRead()) {
        System.out.println("Document directory '" + docDir.getAbsolutePath()
                + "' does not exist or is not readable, please check the path");
        System.exit(1);
    }

    Date start = new Date();
    try {
        System.out.println("Indexing to directory '" + indexPath + "'...");

        Directory dir = FSDirectory.open(new File(indexPath));
        Analyzer analyzer = new SpanishAnalyzer(Version.LUCENE_44);
        IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, analyzer);

        if (create) {
            // Create a new index in the directory, removing any
            // previously indexed documents:
            iwc.setOpenMode(OpenMode.CREATE);
        } else {
            // Add new documents to an existing index:
            iwc.setOpenMode(OpenMode.CREATE_OR_APPEND);
        }

        // Optional: for better indexing performance, if you
        // are indexing many documents, increase the RAM
        // buffer.  But if you do this, increase the max heap
        // size to the JVM (eg add -Xmx512m or -Xmx1g):
        //
        // iwc.setRAMBufferSizeMB(256.0);
        IndexWriter writer = new IndexWriter(dir, iwc);
        indexDocs(writer, docDir);

        // NOTE: if you want to maximize search performance,
        // you can optionally call forceMerge here.  This can be
        // a terribly costly operation, so generally it's only
        // worth it when your index is relatively static (ie
        // you're done adding documents to it):
        //
        // writer.forceMerge(1);
        writer.close();

        Date end = new Date();
        System.out.println(end.getTime() - start.getTime() + " total milliseconds");

    } catch (IOException e) {
        System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage());
    }
}

From source file:byrne.mitre.main.NameMatcher.java

License:Apache License

public static void main(String[] args) {

    try {/*  w w w.ja  v  a2  s.c o  m*/

        long startTime = System.currentTimeMillis();

        System.out.println("Loading Index...");
        final Analyzer analyzer = new NGramAnalyzer(2, 4);

        final Directory index = new RAMDirectory();
        final IndexWriter writer = new IndexWriter(index, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED);
        loadIndex("index.txt", writer);
        writer.close();

        System.out.println("Running queries...");
        final BufferedReader bufferedReader = new BufferedReader(new FileReader("queries.txt"));
        final BufferedWriter out = new BufferedWriter(new FileWriter("results.txt"));

        final IndexSearcher searcher = new IndexSearcher(index, true);
        String line = null;

        final int N_THREADS = Runtime.getRuntime().availableProcessors();
        System.out.println("Total threads: " + N_THREADS);

        final ExecutorService executor = Executors.newFixedThreadPool(N_THREADS);

        while ((line = bufferedReader.readLine()) != null) {
            final NameEntry entry = new NameEntry(line);
            final MitreQuery q = new MitreQuery(entry, analyzer, searcher, out);
            executor.execute(q);
        }
        executor.shutdown();
        executor.awaitTermination(Long.MAX_VALUE, TimeUnit.MINUTES);

        bufferedReader.close();
        searcher.close();
        out.close();

        long endTime = System.currentTimeMillis();
        System.out.println("Total run time: " + (endTime - startTime) / 60000 + " minutes");

    } catch (IOException e) {
        System.out.println(e);
    } catch (InterruptedException e) {
        System.out.println(e);
    }

}

From source file:ca.gnewton.lusql.core.IndexTermFreqCache.java

License:Apache License

/**
 * Describe <code>main</code> method here.
 *
 * @param args a <code>String</code> value
 *//*from   ww  w .j ava2  s . co m*/
public static final void main(final String[] args) {
    String dir = "itfcTestIndex";
    String cachedField = "title";
    try {
        IndexWriterConfig config = new IndexWriterConfig(LuSql.luceneVersion,
                new StandardAnalyzer(LuSql.luceneVersion)).setOpenMode(IndexWriterConfig.OpenMode.CREATE);

        IndexWriter writer = new IndexWriter(FSDirectory.open(new File(dir)), config);

        // Doc #1
        Document doc1 = new Document();
        Field title1 = new org.apache.lucene.document.Field(cachedField, "The Rain in Spain is plain",
                Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES);
        doc1.add(title1);
        org.apache.lucene.document.Field ab1 = new org.apache.lucene.document.Field("ab",
                "This is the test abstract", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES);
        doc1.add(ab1);
        writer.addDocument(doc1);

        // Doc #2
        Document doc2 = new Document();
        Field title2 = new org.apache.lucene.document.Field(cachedField, "This is the test plain title",
                Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES);
        doc2.add(title2);
        org.apache.lucene.document.Field ab2 = new org.apache.lucene.document.Field("ab",
                "This is the test abstract", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES);
        doc2.add(ab2);
        writer.addDocument(doc2);

        writer.close();

        IndexReader reader = IndexReader.open(FSDirectory.open(new File(dir)));
        IndexTermFreqCache cache = new IndexTermFreqCache(reader, cachedField, 100, true);
        System.err.println(cache);
    } catch (Throwable t) {
        t.printStackTrace();
    }

}

From source file:ca.mcgill.cs.creco.logic.search.CategorySearch.java

License:Apache License

private void buildCategoryIndex() throws IOException {
    IndexWriter writer = new IndexWriter(aDirectory, new IndexWriterConfig(VERSION, aAnalyzer));
    for (Category category : aDataStore.getCategories()) {
        String flattenedText = category.getName();
        for (Product product : category.getProducts()) {
            flattenedText += product.getName() + " ";
        }/*from   w w  w  . j  a va  2  s. c  o m*/
        Document doc = new Document();
        doc.add(new TextField(CATEGORY_ID, category.getId(), Field.Store.YES));
        doc.add(new TextField(CATEGORY_NAME, category.getName(), Field.Store.YES));
        doc.add(new TextField(FLATTENED_TEXT, flattenedText, Field.Store.YES));
        writer.addDocument(doc);
    }
    writer.close();
}

From source file:ca.pgon.freenetknowledge.search.impl.LuceneIndexerThread.java

License:Apache License

public void removing(UrlEntity refererURL) {
    try {//from  w  ww. ja v a 2 s  .c  o  m
        semaphore.acquire();

        Term term = new Term(LuceneSearchEngine.INDEX_REFERER_URL, String.valueOf(refererURL.getId()));

        IndexWriter indexWriter = genIndexWriter();
        indexWriter.deleteDocuments(term);

        indexWriter.close();
    } catch (NoSuchDirectoryException e) {
        // The index is empty, so not an issue
    } catch (Exception e) {
        logger.log(Level.SEVERE, "Error while removing referer", e);
    } finally {
        semaphore.release();
    }
}

From source file:ca.pgon.freenetknowledge.search.impl.LuceneIndexerThread.java

License:Apache License

@Override
public void run() {
    logger.info("LuceneIndexerThread Starting");
    while (!stop.get()) {
        while (queue.isEmpty() && !stop.get()) {
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
            }/*from   ww  w.  j a v  a 2s  .co m*/
        }

        IndexWriter indexWriter = null;

        try {
            // Open writer
            semaphore.acquire();

            indexWriter = genIndexWriter();

            // Add at most "maxAddInIndexBeforeComputing" entries in the
            // index before closing it
            for (int i = 0; i < maxAddInIndexBeforeComputing && !queue.isEmpty(); ++i) {
                addEntry(indexWriter, queue.poll());
            }
        } catch (Exception e) {
            logger.log(Level.SEVERE, "Problem writing the index", e);
        } finally {
            // Close writer
            try {
                indexWriter.close();
            } catch (Exception e) {
            }
            semaphore.release();
        }
    }

    logger.info("LuceneIndexerThread Stoping");
}

From source file:calliope.search.AeseSearch.java

License:Open Source License

/**
 * Update the index for just ONE docID     
 * @param docID the documents to regenerate
 * @param langCode the language code of the analyzer
 * @throws AeseException //from   w  ww. j  a  va  2s .  c om
 */
public static void updateIndex(String docID, String langCode) throws AeseException {
    try {
        Analyzer analyzer = createAnalyzer(langCode);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45, analyzer);
        if (index == null)
            throw new AeseException("Index must be initialised before update");
        IndexWriter w = new IndexWriter(index, config);
        Term t = new Term(LuceneFields.DOCID, docID);
        w.deleteDocuments(t);
        addCorTextoIndex(docID, w);
        w.close();
    } catch (Exception e) {
        throw new AeseException(e);
    }
}

From source file:calliope.search.AeseSearch.java

License:Open Source License

/**
 * Build the entire Lucene index from scratch
 * @param langCode the language code//www . java  2 s  .co m
 */
public static void buildIndex(String langCode) throws AeseException {
    try {
        String[] docIDs = Connector.getConnection().listCollection(Database.CORTEX);
        Analyzer analyzer = createAnalyzer(langCode);
        File home = new File(System.getProperty("user.home"));
        indexLocation = new File(home, ".calliope");
        if (!indexLocation.exists())
            indexLocation.mkdir();
        AeseSearch.index = new NIOFSDirectory(indexLocation);
        IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45, analyzer);
        IndexWriter w = new IndexWriter(index, config);
        for (int i = 0; i < docIDs.length; i++) {
            addCorTextoIndex(docIDs[i], w);
        }
        w.close();
    } catch (Exception e) {
        throw new AeseException(e);
    }
}

From source file:cc.pp.analyzer.ik.demo.IKAnalyzerDemo.java

License:Apache License

public static void main(String[] args) {

    //Lucene Document??
    String fieldName = "text";
    //// w  ww.j  ava  2  s  .co  m
    String text = "IK Analyzer???????";

    //IKAnalyzer?
    Analyzer analyzer = new IKAnalyzer(Version.LUCENE_48, true);

    Directory directory = null;
    IndexWriter iwriter = null;
    DirectoryReader ireader = null;
    IndexSearcher isearcher = null;
    try {
        //
        directory = new RAMDirectory();

        //?IndexWriterConfig
        IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_48, analyzer);
        iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND);
        iwriter = new IndexWriter(directory, iwConfig);
        //
        Document doc = new Document();
        doc.add(new LongField("ID", 1000, Field.Store.YES));
        doc.add(new TextField(fieldName, text, Field.Store.YES));
        iwriter.addDocument(doc);
        iwriter.close();

        //?**********************************
        //?
        ireader = DirectoryReader.open(directory);
        isearcher = new IndexSearcher(ireader);

        String keyword = "?";
        //         String keyword = "";
        //QueryParser?Query
        QueryParser qp = new QueryParser(Version.LUCENE_48, fieldName, analyzer);
        qp.setDefaultOperator(QueryParser.AND_OPERATOR);
        Query query = qp.parse(keyword);
        System.out.println("Query = " + query);

        //?5?
        TopDocs topDocs = isearcher.search(query, 5);
        System.out.println("" + topDocs.totalHits);
        //
        ScoreDoc[] scoreDocs = topDocs.scoreDocs;
        for (int i = 0; i < topDocs.totalHits; i++) {
            Document targetDoc = isearcher.doc(scoreDocs[i].doc);
            System.out.println("" + targetDoc.toString());
        }

    } catch (CorruptIndexException e) {
        e.printStackTrace();
    } catch (LockObtainFailedException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ParseException e) {
        e.printStackTrace();
    } finally {
        if (ireader != null) {
            try {
                ireader.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
        if (directory != null) {
            try {
                directory.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

From source file:cc.twittertools.index.IndexStatuses.java

License:Apache License

@SuppressWarnings("static-access")
public static void main(String[] args) throws Exception {
    Options options = new Options();

    options.addOption(new Option(HELP_OPTION, "show help"));
    options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment"));
    options.addOption(new Option(STORE_TERM_VECTORS_OPTION, "store term vectors"));

    options.addOption(OptionBuilder.withArgName("dir").hasArg().withDescription("source collection directory")
            .create(COLLECTION_OPTION));
    options.addOption(//ww  w .java 2s .c o m
            OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION));
    options.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("file with deleted tweetids")
            .create(DELETES_OPTION));
    options.addOption(OptionBuilder.withArgName("id").hasArg().withDescription("max id").create(MAX_ID_OPTION));

    CommandLine cmdline = null;
    CommandLineParser parser = new GnuParser();
    try {
        cmdline = parser.parse(options, args);
    } catch (ParseException exp) {
        System.err.println("Error parsing command line: " + exp.getMessage());
        System.exit(-1);
    }

    if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(COLLECTION_OPTION)
            || !cmdline.hasOption(INDEX_OPTION)) {
        HelpFormatter formatter = new HelpFormatter();
        formatter.printHelp(IndexStatuses.class.getName(), options);
        System.exit(-1);
    }

    String collectionPath = cmdline.getOptionValue(COLLECTION_OPTION);
    String indexPath = cmdline.getOptionValue(INDEX_OPTION);

    final FieldType textOptions = new FieldType();
    textOptions.setIndexed(true);
    textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS);
    textOptions.setStored(true);
    textOptions.setTokenized(true);
    if (cmdline.hasOption(STORE_TERM_VECTORS_OPTION)) {
        textOptions.setStoreTermVectors(true);
    }

    LOG.info("collection: " + collectionPath);
    LOG.info("index: " + indexPath);

    LongOpenHashSet deletes = null;
    if (cmdline.hasOption(DELETES_OPTION)) {
        deletes = new LongOpenHashSet();
        File deletesFile = new File(cmdline.getOptionValue(DELETES_OPTION));
        if (!deletesFile.exists()) {
            System.err.println("Error: " + deletesFile + " does not exist!");
            System.exit(-1);
        }
        LOG.info("Reading deletes from " + deletesFile);

        FileInputStream fin = new FileInputStream(deletesFile);
        byte[] ignoreBytes = new byte[2];
        fin.read(ignoreBytes); // "B", "Z" bytes from commandline tools
        BufferedReader br = new BufferedReader(new InputStreamReader(new CBZip2InputStream(fin)));

        String s;
        while ((s = br.readLine()) != null) {
            if (s.contains("\t")) {
                deletes.add(Long.parseLong(s.split("\t")[0]));
            } else {
                deletes.add(Long.parseLong(s));
            }
        }
        br.close();
        fin.close();
        LOG.info("Read " + deletes.size() + " tweetids from deletes file.");
    }

    long maxId = Long.MAX_VALUE;
    if (cmdline.hasOption(MAX_ID_OPTION)) {
        maxId = Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION));
        LOG.info("index: " + maxId);
    }

    long startTime = System.currentTimeMillis();
    File file = new File(collectionPath);
    if (!file.exists()) {
        System.err.println("Error: " + file + " does not exist!");
        System.exit(-1);
    }

    StatusStream stream = new JsonStatusCorpusReader(file);

    Directory dir = FSDirectory.open(new File(indexPath));
    IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, IndexStatuses.ANALYZER);
    config.setOpenMode(OpenMode.CREATE);

    IndexWriter writer = new IndexWriter(dir, config);
    int cnt = 0;
    Status status;
    try {
        while ((status = stream.next()) != null) {
            if (status.getText() == null) {
                continue;
            }

            // Skip deletes tweetids.
            if (deletes != null && deletes.contains(status.getId())) {
                continue;
            }

            if (status.getId() > maxId) {
                continue;
            }

            cnt++;
            Document doc = new Document();
            doc.add(new LongField(StatusField.ID.name, status.getId(), Field.Store.YES));
            doc.add(new LongField(StatusField.EPOCH.name, status.getEpoch(), Field.Store.YES));
            doc.add(new TextField(StatusField.SCREEN_NAME.name, status.getScreenname(), Store.YES));

            doc.add(new Field(StatusField.TEXT.name, status.getText(), textOptions));

            doc.add(new IntField(StatusField.FRIENDS_COUNT.name, status.getFollowersCount(), Store.YES));
            doc.add(new IntField(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount(), Store.YES));
            doc.add(new IntField(StatusField.STATUSES_COUNT.name, status.getStatusesCount(), Store.YES));

            long inReplyToStatusId = status.getInReplyToStatusId();
            if (inReplyToStatusId > 0) {
                doc.add(new LongField(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId,
                        Field.Store.YES));
                doc.add(new LongField(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId(),
                        Field.Store.YES));
            }

            String lang = status.getLang();
            if (!lang.equals("unknown")) {
                doc.add(new TextField(StatusField.LANG.name, status.getLang(), Store.YES));
            }

            long retweetStatusId = status.getRetweetedStatusId();
            if (retweetStatusId > 0) {
                doc.add(new LongField(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId, Field.Store.YES));
                doc.add(new LongField(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId(),
                        Field.Store.YES));
                doc.add(new IntField(StatusField.RETWEET_COUNT.name, status.getRetweetCount(), Store.YES));
                if (status.getRetweetCount() < 0 || status.getRetweetedStatusId() < 0) {
                    LOG.warn("Error parsing retweet fields of " + status.getId());
                }
            }

            writer.addDocument(doc);
            if (cnt % 100000 == 0) {
                LOG.info(cnt + " statuses indexed");
            }
        }

        LOG.info(String.format("Total of %s statuses added", cnt));

        if (cmdline.hasOption(OPTIMIZE_OPTION)) {
            LOG.info("Merging segments...");
            writer.forceMerge(1);
            LOG.info("Done!");
        }

        LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms");
    } catch (Exception e) {
        e.printStackTrace();
    } finally {
        writer.close();
        dir.close();
        stream.close();
    }
}