List of usage examples for org.apache.lucene.index IndexWriter close
@Override public void close() throws IOException
From source file:buscador.IndexFiles.java
License:Apache License
/** * Index all text files under a directory. *//*from w ww . jav a2s .c om*/ public static void main(String[] args) { String usage = "java org.apache.lucene.demo.IndexFiles" + " [-index INDEX_PATH] [-docs DOCS_PATH] [-update]\n\n" + "This indexes the documents in DOCS_PATH, creating a Lucene index" + "in INDEX_PATH that can be searched with SearchFiles"; String indexPath = "Zaguan1"; String docsPath = null; boolean create = true; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { indexPath = args[i + 1]; i++; } else if ("-docs".equals(args[i])) { docsPath = args[i + 1]; i++; } else if ("-update".equals(args[i])) { create = false; } } if (docsPath == null) { System.err.println("Usage: " + usage); System.exit(1); } final File docDir = new File(docsPath); if (!docDir.exists() || !docDir.canRead()) { System.out.println("Document directory '" + docDir.getAbsolutePath() + "' does not exist or is not readable, please check the path"); System.exit(1); } Date start = new Date(); try { System.out.println("Indexing to directory '" + indexPath + "'..."); Directory dir = FSDirectory.open(new File(indexPath)); Analyzer analyzer = new SpanishAnalyzer(Version.LUCENE_44); IndexWriterConfig iwc = new IndexWriterConfig(Version.LUCENE_44, analyzer); if (create) { // Create a new index in the directory, removing any // previously indexed documents: iwc.setOpenMode(OpenMode.CREATE); } else { // Add new documents to an existing index: iwc.setOpenMode(OpenMode.CREATE_OR_APPEND); } // Optional: for better indexing performance, if you // are indexing many documents, increase the RAM // buffer. But if you do this, increase the max heap // size to the JVM (eg add -Xmx512m or -Xmx1g): // // iwc.setRAMBufferSizeMB(256.0); IndexWriter writer = new IndexWriter(dir, iwc); indexDocs(writer, docDir); // NOTE: if you want to maximize search performance, // you can optionally call forceMerge here. This can be // a terribly costly operation, so generally it's only // worth it when your index is relatively static (ie // you're done adding documents to it): // // writer.forceMerge(1); writer.close(); Date end = new Date(); System.out.println(end.getTime() - start.getTime() + " total milliseconds"); } catch (IOException e) { System.out.println(" caught a " + e.getClass() + "\n with message: " + e.getMessage()); } }
From source file:byrne.mitre.main.NameMatcher.java
License:Apache License
public static void main(String[] args) { try {/* w w w.ja v a2 s.c o m*/ long startTime = System.currentTimeMillis(); System.out.println("Loading Index..."); final Analyzer analyzer = new NGramAnalyzer(2, 4); final Directory index = new RAMDirectory(); final IndexWriter writer = new IndexWriter(index, analyzer, true, IndexWriter.MaxFieldLength.UNLIMITED); loadIndex("index.txt", writer); writer.close(); System.out.println("Running queries..."); final BufferedReader bufferedReader = new BufferedReader(new FileReader("queries.txt")); final BufferedWriter out = new BufferedWriter(new FileWriter("results.txt")); final IndexSearcher searcher = new IndexSearcher(index, true); String line = null; final int N_THREADS = Runtime.getRuntime().availableProcessors(); System.out.println("Total threads: " + N_THREADS); final ExecutorService executor = Executors.newFixedThreadPool(N_THREADS); while ((line = bufferedReader.readLine()) != null) { final NameEntry entry = new NameEntry(line); final MitreQuery q = new MitreQuery(entry, analyzer, searcher, out); executor.execute(q); } executor.shutdown(); executor.awaitTermination(Long.MAX_VALUE, TimeUnit.MINUTES); bufferedReader.close(); searcher.close(); out.close(); long endTime = System.currentTimeMillis(); System.out.println("Total run time: " + (endTime - startTime) / 60000 + " minutes"); } catch (IOException e) { System.out.println(e); } catch (InterruptedException e) { System.out.println(e); } }
From source file:ca.gnewton.lusql.core.IndexTermFreqCache.java
License:Apache License
/** * Describe <code>main</code> method here. * * @param args a <code>String</code> value *//*from ww w .j ava2 s . co m*/ public static final void main(final String[] args) { String dir = "itfcTestIndex"; String cachedField = "title"; try { IndexWriterConfig config = new IndexWriterConfig(LuSql.luceneVersion, new StandardAnalyzer(LuSql.luceneVersion)).setOpenMode(IndexWriterConfig.OpenMode.CREATE); IndexWriter writer = new IndexWriter(FSDirectory.open(new File(dir)), config); // Doc #1 Document doc1 = new Document(); Field title1 = new org.apache.lucene.document.Field(cachedField, "The Rain in Spain is plain", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES); doc1.add(title1); org.apache.lucene.document.Field ab1 = new org.apache.lucene.document.Field("ab", "This is the test abstract", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES); doc1.add(ab1); writer.addDocument(doc1); // Doc #2 Document doc2 = new Document(); Field title2 = new org.apache.lucene.document.Field(cachedField, "This is the test plain title", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES); doc2.add(title2); org.apache.lucene.document.Field ab2 = new org.apache.lucene.document.Field("ab", "This is the test abstract", Field.Store.NO, Field.Index.ANALYZED, Field.TermVector.YES); doc2.add(ab2); writer.addDocument(doc2); writer.close(); IndexReader reader = IndexReader.open(FSDirectory.open(new File(dir))); IndexTermFreqCache cache = new IndexTermFreqCache(reader, cachedField, 100, true); System.err.println(cache); } catch (Throwable t) { t.printStackTrace(); } }
From source file:ca.mcgill.cs.creco.logic.search.CategorySearch.java
License:Apache License
private void buildCategoryIndex() throws IOException { IndexWriter writer = new IndexWriter(aDirectory, new IndexWriterConfig(VERSION, aAnalyzer)); for (Category category : aDataStore.getCategories()) { String flattenedText = category.getName(); for (Product product : category.getProducts()) { flattenedText += product.getName() + " "; }/*from w w w . j a va 2 s. c o m*/ Document doc = new Document(); doc.add(new TextField(CATEGORY_ID, category.getId(), Field.Store.YES)); doc.add(new TextField(CATEGORY_NAME, category.getName(), Field.Store.YES)); doc.add(new TextField(FLATTENED_TEXT, flattenedText, Field.Store.YES)); writer.addDocument(doc); } writer.close(); }
From source file:ca.pgon.freenetknowledge.search.impl.LuceneIndexerThread.java
License:Apache License
public void removing(UrlEntity refererURL) { try {//from w ww. ja v a 2 s .c o m semaphore.acquire(); Term term = new Term(LuceneSearchEngine.INDEX_REFERER_URL, String.valueOf(refererURL.getId())); IndexWriter indexWriter = genIndexWriter(); indexWriter.deleteDocuments(term); indexWriter.close(); } catch (NoSuchDirectoryException e) { // The index is empty, so not an issue } catch (Exception e) { logger.log(Level.SEVERE, "Error while removing referer", e); } finally { semaphore.release(); } }
From source file:ca.pgon.freenetknowledge.search.impl.LuceneIndexerThread.java
License:Apache License
@Override public void run() { logger.info("LuceneIndexerThread Starting"); while (!stop.get()) { while (queue.isEmpty() && !stop.get()) { try { Thread.sleep(1000); } catch (InterruptedException e) { }/*from ww w. j a v a 2s .co m*/ } IndexWriter indexWriter = null; try { // Open writer semaphore.acquire(); indexWriter = genIndexWriter(); // Add at most "maxAddInIndexBeforeComputing" entries in the // index before closing it for (int i = 0; i < maxAddInIndexBeforeComputing && !queue.isEmpty(); ++i) { addEntry(indexWriter, queue.poll()); } } catch (Exception e) { logger.log(Level.SEVERE, "Problem writing the index", e); } finally { // Close writer try { indexWriter.close(); } catch (Exception e) { } semaphore.release(); } } logger.info("LuceneIndexerThread Stoping"); }
From source file:calliope.search.AeseSearch.java
License:Open Source License
/** * Update the index for just ONE docID * @param docID the documents to regenerate * @param langCode the language code of the analyzer * @throws AeseException //from w ww. j a va 2s . c om */ public static void updateIndex(String docID, String langCode) throws AeseException { try { Analyzer analyzer = createAnalyzer(langCode); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45, analyzer); if (index == null) throw new AeseException("Index must be initialised before update"); IndexWriter w = new IndexWriter(index, config); Term t = new Term(LuceneFields.DOCID, docID); w.deleteDocuments(t); addCorTextoIndex(docID, w); w.close(); } catch (Exception e) { throw new AeseException(e); } }
From source file:calliope.search.AeseSearch.java
License:Open Source License
/** * Build the entire Lucene index from scratch * @param langCode the language code//www . java 2 s .co m */ public static void buildIndex(String langCode) throws AeseException { try { String[] docIDs = Connector.getConnection().listCollection(Database.CORTEX); Analyzer analyzer = createAnalyzer(langCode); File home = new File(System.getProperty("user.home")); indexLocation = new File(home, ".calliope"); if (!indexLocation.exists()) indexLocation.mkdir(); AeseSearch.index = new NIOFSDirectory(indexLocation); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_45, analyzer); IndexWriter w = new IndexWriter(index, config); for (int i = 0; i < docIDs.length; i++) { addCorTextoIndex(docIDs[i], w); } w.close(); } catch (Exception e) { throw new AeseException(e); } }
From source file:cc.pp.analyzer.ik.demo.IKAnalyzerDemo.java
License:Apache License
public static void main(String[] args) { //Lucene Document?? String fieldName = "text"; //// w ww.j ava 2 s .co m String text = "IK Analyzer???????"; //IKAnalyzer? Analyzer analyzer = new IKAnalyzer(Version.LUCENE_48, true); Directory directory = null; IndexWriter iwriter = null; DirectoryReader ireader = null; IndexSearcher isearcher = null; try { // directory = new RAMDirectory(); //?IndexWriterConfig IndexWriterConfig iwConfig = new IndexWriterConfig(Version.LUCENE_48, analyzer); iwConfig.setOpenMode(OpenMode.CREATE_OR_APPEND); iwriter = new IndexWriter(directory, iwConfig); // Document doc = new Document(); doc.add(new LongField("ID", 1000, Field.Store.YES)); doc.add(new TextField(fieldName, text, Field.Store.YES)); iwriter.addDocument(doc); iwriter.close(); //?********************************** //? ireader = DirectoryReader.open(directory); isearcher = new IndexSearcher(ireader); String keyword = "?"; // String keyword = ""; //QueryParser?Query QueryParser qp = new QueryParser(Version.LUCENE_48, fieldName, analyzer); qp.setDefaultOperator(QueryParser.AND_OPERATOR); Query query = qp.parse(keyword); System.out.println("Query = " + query); //?5? TopDocs topDocs = isearcher.search(query, 5); System.out.println("" + topDocs.totalHits); // ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (int i = 0; i < topDocs.totalHits; i++) { Document targetDoc = isearcher.doc(scoreDocs[i].doc); System.out.println("" + targetDoc.toString()); } } catch (CorruptIndexException e) { e.printStackTrace(); } catch (LockObtainFailedException e) { e.printStackTrace(); } catch (IOException e) { e.printStackTrace(); } catch (ParseException e) { e.printStackTrace(); } finally { if (ireader != null) { try { ireader.close(); } catch (IOException e) { e.printStackTrace(); } } if (directory != null) { try { directory.close(); } catch (IOException e) { e.printStackTrace(); } } } }
From source file:cc.twittertools.index.IndexStatuses.java
License:Apache License
@SuppressWarnings("static-access") public static void main(String[] args) throws Exception { Options options = new Options(); options.addOption(new Option(HELP_OPTION, "show help")); options.addOption(new Option(OPTIMIZE_OPTION, "merge indexes into a single segment")); options.addOption(new Option(STORE_TERM_VECTORS_OPTION, "store term vectors")); options.addOption(OptionBuilder.withArgName("dir").hasArg().withDescription("source collection directory") .create(COLLECTION_OPTION)); options.addOption(//ww w .java 2s .c o m OptionBuilder.withArgName("dir").hasArg().withDescription("index location").create(INDEX_OPTION)); options.addOption(OptionBuilder.withArgName("file").hasArg().withDescription("file with deleted tweetids") .create(DELETES_OPTION)); options.addOption(OptionBuilder.withArgName("id").hasArg().withDescription("max id").create(MAX_ID_OPTION)); CommandLine cmdline = null; CommandLineParser parser = new GnuParser(); try { cmdline = parser.parse(options, args); } catch (ParseException exp) { System.err.println("Error parsing command line: " + exp.getMessage()); System.exit(-1); } if (cmdline.hasOption(HELP_OPTION) || !cmdline.hasOption(COLLECTION_OPTION) || !cmdline.hasOption(INDEX_OPTION)) { HelpFormatter formatter = new HelpFormatter(); formatter.printHelp(IndexStatuses.class.getName(), options); System.exit(-1); } String collectionPath = cmdline.getOptionValue(COLLECTION_OPTION); String indexPath = cmdline.getOptionValue(INDEX_OPTION); final FieldType textOptions = new FieldType(); textOptions.setIndexed(true); textOptions.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); textOptions.setStored(true); textOptions.setTokenized(true); if (cmdline.hasOption(STORE_TERM_VECTORS_OPTION)) { textOptions.setStoreTermVectors(true); } LOG.info("collection: " + collectionPath); LOG.info("index: " + indexPath); LongOpenHashSet deletes = null; if (cmdline.hasOption(DELETES_OPTION)) { deletes = new LongOpenHashSet(); File deletesFile = new File(cmdline.getOptionValue(DELETES_OPTION)); if (!deletesFile.exists()) { System.err.println("Error: " + deletesFile + " does not exist!"); System.exit(-1); } LOG.info("Reading deletes from " + deletesFile); FileInputStream fin = new FileInputStream(deletesFile); byte[] ignoreBytes = new byte[2]; fin.read(ignoreBytes); // "B", "Z" bytes from commandline tools BufferedReader br = new BufferedReader(new InputStreamReader(new CBZip2InputStream(fin))); String s; while ((s = br.readLine()) != null) { if (s.contains("\t")) { deletes.add(Long.parseLong(s.split("\t")[0])); } else { deletes.add(Long.parseLong(s)); } } br.close(); fin.close(); LOG.info("Read " + deletes.size() + " tweetids from deletes file."); } long maxId = Long.MAX_VALUE; if (cmdline.hasOption(MAX_ID_OPTION)) { maxId = Long.parseLong(cmdline.getOptionValue(MAX_ID_OPTION)); LOG.info("index: " + maxId); } long startTime = System.currentTimeMillis(); File file = new File(collectionPath); if (!file.exists()) { System.err.println("Error: " + file + " does not exist!"); System.exit(-1); } StatusStream stream = new JsonStatusCorpusReader(file); Directory dir = FSDirectory.open(new File(indexPath)); IndexWriterConfig config = new IndexWriterConfig(Version.LUCENE_43, IndexStatuses.ANALYZER); config.setOpenMode(OpenMode.CREATE); IndexWriter writer = new IndexWriter(dir, config); int cnt = 0; Status status; try { while ((status = stream.next()) != null) { if (status.getText() == null) { continue; } // Skip deletes tweetids. if (deletes != null && deletes.contains(status.getId())) { continue; } if (status.getId() > maxId) { continue; } cnt++; Document doc = new Document(); doc.add(new LongField(StatusField.ID.name, status.getId(), Field.Store.YES)); doc.add(new LongField(StatusField.EPOCH.name, status.getEpoch(), Field.Store.YES)); doc.add(new TextField(StatusField.SCREEN_NAME.name, status.getScreenname(), Store.YES)); doc.add(new Field(StatusField.TEXT.name, status.getText(), textOptions)); doc.add(new IntField(StatusField.FRIENDS_COUNT.name, status.getFollowersCount(), Store.YES)); doc.add(new IntField(StatusField.FOLLOWERS_COUNT.name, status.getFriendsCount(), Store.YES)); doc.add(new IntField(StatusField.STATUSES_COUNT.name, status.getStatusesCount(), Store.YES)); long inReplyToStatusId = status.getInReplyToStatusId(); if (inReplyToStatusId > 0) { doc.add(new LongField(StatusField.IN_REPLY_TO_STATUS_ID.name, inReplyToStatusId, Field.Store.YES)); doc.add(new LongField(StatusField.IN_REPLY_TO_USER_ID.name, status.getInReplyToUserId(), Field.Store.YES)); } String lang = status.getLang(); if (!lang.equals("unknown")) { doc.add(new TextField(StatusField.LANG.name, status.getLang(), Store.YES)); } long retweetStatusId = status.getRetweetedStatusId(); if (retweetStatusId > 0) { doc.add(new LongField(StatusField.RETWEETED_STATUS_ID.name, retweetStatusId, Field.Store.YES)); doc.add(new LongField(StatusField.RETWEETED_USER_ID.name, status.getRetweetedUserId(), Field.Store.YES)); doc.add(new IntField(StatusField.RETWEET_COUNT.name, status.getRetweetCount(), Store.YES)); if (status.getRetweetCount() < 0 || status.getRetweetedStatusId() < 0) { LOG.warn("Error parsing retweet fields of " + status.getId()); } } writer.addDocument(doc); if (cnt % 100000 == 0) { LOG.info(cnt + " statuses indexed"); } } LOG.info(String.format("Total of %s statuses added", cnt)); if (cmdline.hasOption(OPTIMIZE_OPTION)) { LOG.info("Merging segments..."); writer.forceMerge(1); LOG.info("Done!"); } LOG.info("Total elapsed time: " + (System.currentTimeMillis() - startTime) + "ms"); } catch (Exception e) { e.printStackTrace(); } finally { writer.close(); dir.close(); stream.close(); } }