List of usage examples for org.apache.lucene.search IndexSearcher IndexSearcher
public IndexSearcher(IndexReaderContext context)
From source file:au.org.ala.names.search.ALANameIndexer.java
License:Open Source License
/** * Creates a temporary index that stores the taxon concept LSIDs that were * included in the last ANBG exports./*w w w . j av a 2 s. c o m*/ * * @param tcFileName * @return * @throws Exception */ private IndexSearcher createTmpIndex(String tcFileName) throws Exception { //creating the tmp index in the /tmp/taxonConcept directory CSVReader reader = new CSVReader(new FileReader(new File(tcFileName)), '\t', '"', '~'); File indexDir = new File("/tmp/taxonConcept"); IndexWriter iw = createIndexWriter(indexDir, new KeywordAnalyzer(), true); String[] values = null; while ((values = reader.readNext()) != null) { if (values != null && values.length > 1) { //just add the LSID to the index Document doc = new Document(); doc.add(new StringField("lsid", values[0], Store.NO)); iw.addDocument(doc); } } iw.commit(); iw.forceMerge(1); iw.close(); return new IndexSearcher(DirectoryReader.open(FSDirectory.open(indexDir))); }
From source file:au.org.ala.names.search.ALANameSearcher.java
License:Open Source License
/** * Creates a new name searcher. Using the indexDirectory * as the source directory// w w w .j a va 2s . co m * * @param indexDirectory The directory that contains the index files for the scientific names, irmng and vernacular names. * @throws CorruptIndexException * @throws IOException */ public ALANameSearcher(String indexDirectory) throws CorruptIndexException, IOException { //Initialis CB index searching items log.debug("Creating the search object for the name matching api..."); //make the query parsers thread safe queryParser = new ThreadLocal<QueryParser>() { @Override protected QueryParser initialValue() { QueryParser qp = new QueryParser(Version.LUCENE_34, "genus", new LowerCaseKeywordAnalyzer()); qp.setFuzzyMinSim(0.8f); //fuzzy match similarity setting. used to match the authorship. return qp; } }; idParser = new ThreadLocal<QueryParser>() { @Override protected QueryParser initialValue() { return new QueryParser(Version.LUCENE_34, "lsid", new org.apache.lucene.analysis.core.KeywordAnalyzer()); } }; cbReader = DirectoryReader.open(FSDirectory.open(createIfNotExist(indexDirectory + File.separator + "cb")));//false cbSearcher = new IndexSearcher(cbReader); //Initalise the IRMNG index searching items irmngReader = DirectoryReader .open(FSDirectory.open(createIfNotExist(indexDirectory + File.separator + "irmng"))); irmngSearcher = new IndexSearcher(irmngReader); //initalise the Common name index searching items vernReader = DirectoryReader .open(FSDirectory.open(createIfNotExist(indexDirectory + File.separator + "vernacular"))); vernSearcher = new IndexSearcher(vernReader); //initialise the identifier index idSearcher = new IndexSearcher( DirectoryReader.open(FSDirectory.open(createIfNotExist(indexDirectory + File.separator + "id")))); tnse = new TaxonNameSoundEx(); parser = new PhraseNameParser(); crossRankHomonyms = au.org.ala.names.util.FileUtils.streamToSet( this.getClass().getClassLoader().getResourceAsStream("au/org/ala/homonyms/cross_rank_homonyms.txt"), new java.util.HashSet<String>(), true); }
From source file:au.org.ala.names.search.ALANameSearcher.java
License:Open Source License
public void reopenReaders() { //this should only need to reopen the cbSearcher because the others should NOT be changing try {/*from w ww . j av a 2 s . c o m*/ DirectoryReader newReader = DirectoryReader.openIfChanged(cbReader); //IndexReader tmpReader = cbReader.reopen(); if (newReader != null) { cbReader.close(); cbReader = newReader; //now reinit the searcher cbSearcher = new IndexSearcher(cbReader); } } catch (Exception e) { } }
From source file:au.org.ala.names.search.DwcaNameIndexer.java
License:Open Source License
/** * Creates a loading index to use to generate the hierarchy including the left right values. * * @param tmpIndexDir/*from ww w . jav a 2 s. c o m*/ * @param archiveDirectory * @throws Exception */ private void createLoadingIndex(String tmpIndexDir, String archiveDirectory) throws Exception { log.info("Starting to create the temporary loading index."); File indexDir = new File(tmpIndexDir); IndexWriter iw = createIndexWriter(indexDir, new KeywordAnalyzer(), true); //create the loading index so that left right values and classifications can be generated Archive archive = ArchiveFactory.openArchive(new File(archiveDirectory)); Iterator<DarwinCoreRecord> it = archive.iteratorDwc(); int i = 0; long start = System.currentTimeMillis(); while (it.hasNext()) { Document doc = new Document(); DarwinCoreRecord dwcr = it.next(); String id = dwcr.getId(); String lsid = dwcr.getTaxonID() == null ? id : dwcr.getTaxonID(); String acceptedLsid = dwcr.getAcceptedNameUsageID(); //add and store the identifier for the record doc.add(new StringField(NameIndexField.ID.toString(), dwcr.getId(), Field.Store.YES)); if (StringUtils.isNotBlank(lsid)) { doc.add(new StringField(NameIndexField.LSID.toString(), lsid, Field.Store.YES)); } else { System.out.println("LSID is null for " + id + " " + lsid + " " + lsid + " " + acceptedLsid); } if (StringUtils.isNotBlank(dwcr.getParentNameUsageID())) { doc.add(new StringField("parent_id", dwcr.getParentNameUsageID(), Field.Store.YES)); } if (StringUtils.isNotBlank(dwcr.getAcceptedNameUsageID())) { doc.add(new StringField(NameIndexField.ACCEPTED.toString(), dwcr.getAcceptedNameUsageID(), Field.Store.YES)); } if (StringUtils.isNotBlank(dwcr.getScientificName())) { //stored no need to search on doc.add(new StoredField(NameIndexField.NAME.toString(), dwcr.getScientificName())); } if (StringUtils.isNotBlank(dwcr.getScientificNameAuthorship())) { //stored no need to search on doc.add(new StoredField(NameIndexField.AUTHOR.toString(), dwcr.getScientificNameAuthorship())); } if (StringUtils.isNotBlank(dwcr.getGenus())) { //stored no need to search on doc.add(new StoredField("genus", dwcr.getGenus())); } if (StringUtils.isNotBlank(dwcr.getSpecificEpithet())) { //stored no need to search on doc.add(new StoredField(NameIndexField.SPECIFIC.toString(), dwcr.getSpecificEpithet())); } if (StringUtils.isNotBlank(dwcr.getInfraspecificEpithet())) { //stored no need to search on doc.add(new StoredField(NameIndexField.INFRA_SPECIFIC.toString(), dwcr.getInfraspecificEpithet())); } if (StringUtils.isNotBlank(dwcr.getTaxonRank())) { //match the supplied rank RankType rt = RankType.getForStrRank(dwcr.getTaxonRank()); if (rt != null) { doc.add(new StringField(NameIndexField.RANK.toString(), rt.getRank(), Field.Store.YES)); doc.add(new StringField(NameIndexField.RANK_ID.toString(), rt.getId().toString(), Field.Store.YES)); } else { doc.add(new StringField(NameIndexField.RANK.toString(), dwcr.getTaxonRank(), Field.Store.YES)); doc.add(new StringField(NameIndexField.RANK_ID.toString(), RankType.UNRANKED.getId().toString(), Field.Store.YES)); } } else { //put in unknown rank doc.add(new StringField(NameIndexField.RANK.toString(), "Unknown", Field.Store.YES)); doc.add(new StringField(NameIndexField.RANK_ID.toString(), RankType.UNRANKED.getId().toString(), Field.Store.YES)); } if (StringUtils.equals(lsid, acceptedLsid) || StringUtils.equals(id, acceptedLsid) || acceptedLsid == null) { //mark this one as an accepted concept doc.add(new StringField(NameIndexField.iS_SYNONYM.toString(), "F", Field.Store.YES)); if (StringUtils.isBlank(dwcr.getParentNameUsageID())) { doc.add(new StringField("root", "T", Field.Store.YES)); } } else { doc.add(new StringField(NameIndexField.iS_SYNONYM.toString(), "T", Field.Store.YES)); } iw.addDocument(doc); i++; if (i % 1000 == 0) { long finish = System.currentTimeMillis(); log.debug("Loading index: " + i + " records per sec: " + (1000 / (((float) (finish / start)) / 1000))); start = finish; } } log.info("Finished creating the temporary load index with " + i + " concepts"); iw.commit(); iw.forceMerge(1); iw.close(); lsearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(indexDir))); }
From source file:au.org.ala.names.search.DwcaNameIndexer.java
License:Open Source License
private TopDocs getLoadIdxResults(String field, String value, int max) throws Exception { if (lsearcher == null && new File(dirTmpIndex).exists()) { lsearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(new File(dirTmpIndex)))); } else if (lsearcher == null && !new File(dirTmpIndex).exists()) { throw new RuntimeException( "A load index has not been generated. Please run this tool with '-load' before creating the search index."); }//from ww w. j a v a 2 s . c om TermQuery tq = new TermQuery(new Term(field, value)); return lsearcher.search(tq, max); }
From source file:axiom.db.utils.LuceneManipulator.java
License:Open Source License
public void compress(String dbDir) throws Exception { System.setProperty("org.apache.lucene.FSDirectory.class", "org.apache.lucene.store.TransFSDirectory"); File dbhome = new File(dbDir); String url = getUrl(dbhome);//from w ww. j a v a2 s. c o m FSDirectory indexDir = FSDirectory.getDirectory(dbhome, false); if (indexDir instanceof TransFSDirectory) { FSDirectory.setDisableLocks(true); TransFSDirectory d = (TransFSDirectory) indexDir; d.setDriverClass(DRIVER_CLASS); d.setUrl(url); d.setUser(null); d.setPassword(null); } File ndbhome = new File(dbhome.getParentFile(), dbhome.getName() + "_tmp"); File olddbhome = new File(dbhome.getParentFile(), dbhome.getName() + "_old"); FSDirectory nindexDir = FSDirectory.getDirectory(ndbhome, true); if (nindexDir instanceof TransFSDirectory) { FSDirectory.setDisableLocks(true); TransFSDirectory d = (TransFSDirectory) nindexDir; d.setDriverClass(DRIVER_CLASS); d.setUrl(url); d.setUser(null); d.setPassword(null); } IndexSearcher searcher = null; IndexWriter writer = null; LuceneManager lmgr = null; try { searcher = new IndexSearcher(indexDir); PerFieldAnalyzerWrapper a = LuceneManager.buildAnalyzer(); writer = IndexWriterManager.getWriter(nindexDir, a, true); final int numDocs = searcher.getIndexReader().numDocs(); HashSet deldocs = new HashSet(); HashMap infos = new HashMap(); for (int i = 0; i < numDocs; i++) { Document doc = searcher.doc(i); String delprop = doc.get(DeletedInfos.DELETED); final String id = doc.get(LuceneManager.ID) + DeletedInfos.KEY_SEPERATOR + doc.get(LuceneManager.LAYER_OF_SAVE); if (delprop != null && "true".equals(delprop)) { deldocs.add(id); } else { Object v; if ((v = infos.get(id)) == null) { infos.put(id, new Integer(i)); } else { final String lmod = doc.get(LuceneManager.LASTMODIFIED); final String lmod_prev = searcher.doc(((Integer) v).intValue()).get("_lastmodified"); if (lmod_prev == null || (lmod != null && lmod.compareTo(lmod_prev) > 0)) { infos.put(id, new Integer(i)); } } } } ArrayList listOfMaps = new ArrayList(); for (int i = 0; i < numDocs; i++) { Document doc = searcher.doc(i); String delprop = doc.get(DeletedInfos.DELETED); String layerStr = doc.get(LuceneManager.LAYER_OF_SAVE); int layer = -1; try { layer = Integer.parseInt(layerStr); } catch (Exception ex) { layer = -1; } final String id = doc.get(LuceneManager.ID) + DeletedInfos.KEY_SEPERATOR + doc.get(LuceneManager.LAYER_OF_SAVE); if (delprop != null && "true".equals(delprop)) { continue; } else if (id != null && deldocs.contains(id)) { continue; } Integer idx = (Integer) infos.get(id); if (idx != null && i != idx.intValue()) { continue; } Document ndoc = convertDocument(doc); if (ndoc != null) { writer.addDocument(ndoc); } } } catch (Exception ex) { ex.printStackTrace(); throw new RuntimeException(ex); } finally { if (searcher != null) { try { searcher.close(); } catch (Exception ex) { } } if (lmgr != null) { lmgr.shutdown(); lmgr = null; } indexDir.close(); SegmentInfos sinfos = IndexObjectsFactory.getFSSegmentInfos(indexDir); sinfos.clear(); IndexObjectsFactory.removeDeletedInfos(indexDir); } Connection conn = null; boolean exceptionOccured = false; try { if (writer != null) { conn = DriverManager.getConnection(url); conn.setAutoCommit(false); writer.close(); writer.flushCache(); LuceneManager.commitSegments(null, conn, dbhome, writer.getDirectory()); writer.finalizeTrans(); } } catch (Exception ex) { ex.printStackTrace(); exceptionOccured = true; throw new RuntimeException(ex); } finally { if (conn != null) { try { if (!conn.getAutoCommit()) { if (!exceptionOccured) { conn.commit(); } else { conn.rollback(); } } conn.close(); } catch (Exception ex) { ex.printStackTrace(); } conn = null; } nindexDir.close(); SegmentInfos sinfos = IndexObjectsFactory.getFSSegmentInfos(nindexDir); sinfos.clear(); IndexObjectsFactory.removeDeletedInfos(nindexDir); } File[] files = dbhome.listFiles(); for (int i = 0; i < files.length; i++) { if (!files[i].isDirectory()) { files[i].delete(); } } files = ndbhome.listFiles(); for (int i = 0; i < files.length; i++) { if (!files[i].isDirectory()) { File nfile = new File(dbhome, files[i].getName()); files[i].renameTo(nfile); } } if (!FileUtils.deleteDir(ndbhome)) { throw new Exception("Could not delete " + ndbhome); } }
From source file:axiom.objectmodel.dom.convert.LuceneConvertor.java
License:Open Source License
public void convert(Application app, File dbhome) throws Exception { FSDirectory indexDir = FSDirectory.getDirectory(dbhome, false); if (indexDir instanceof TransFSDirectory) { FSDirectory.setDisableLocks(true); TransFSDirectory d = (TransFSDirectory) indexDir; TransSource source = app.getTransSource(); d.setDriverClass(source.getDriverClass()); d.setUrl(source.getUrl());/* w w w . j a v a 2 s .co m*/ d.setUser(source.getUser()); d.setPassword(source.getPassword()); } File ndbhome = new File(dbhome.getParentFile(), dbhome.getName() + "_tmp"); File olddbhome = new File(dbhome.getParentFile(), dbhome.getName() + "_old"); FSDirectory nindexDir = FSDirectory.getDirectory(ndbhome, true); if (nindexDir instanceof TransFSDirectory) { FSDirectory.setDisableLocks(true); TransFSDirectory d = (TransFSDirectory) nindexDir; TransSource source = app.getTransSource(); d.setDriverClass(source.getDriverClass()); d.setUrl(source.getUrl()); d.setUser(source.getUser()); d.setPassword(source.getPassword()); } IndexSearcher searcher = null; IndexWriter writer = null; LuceneManager lmgr = null; try { searcher = new IndexSearcher(indexDir); PerFieldAnalyzerWrapper a = LuceneManager.buildAnalyzer(); writer = IndexWriterManager.getWriter(nindexDir, a, true); final int numDocs = searcher.getIndexReader().numDocs(); HashSet deldocs = new HashSet(); HashMap infos = new HashMap(); for (int i = 0; i < numDocs; i++) { Document doc = searcher.doc(i); String delprop = doc.get(DeletedInfos.DELETED); String layerStr = doc.get(LuceneManager.LAYER_OF_SAVE); int layer = -1; try { layer = Integer.parseInt(layerStr); } catch (Exception ex) { layer = -1; } final String id = doc.get(LuceneManager.ID) + DeletedInfos.KEY_SEPERATOR + doc.get(LuceneManager.LAYER_OF_SAVE); if (delprop != null && "true".equals(delprop)/* && layer == DbKey.LIVE_LAYER*/) { deldocs.add(id); } else { Object v; if ((v = infos.get(id)) == null) { infos.put(id, new Integer(i)); } else { final String lmod = doc.get(LuceneManager.LASTMODIFIED); final String lmod_prev = searcher.doc(((Integer) v).intValue()).get("_lastmodified"); if (lmod_prev == null || (lmod != null && lmod.compareTo(lmod_prev) > 0)) { infos.put(id, new Integer(i)); } } } } ArrayList listOfMaps = new ArrayList(); for (int i = 0; i < numDocs; i++) { Document doc = searcher.doc(i); String delprop = doc.get(DeletedInfos.DELETED); String layerStr = doc.get(LuceneManager.LAYER_OF_SAVE); int layer = -1; try { layer = Integer.parseInt(layerStr); } catch (Exception ex) { layer = -1; } final String id = doc.get(LuceneManager.ID) + DeletedInfos.KEY_SEPERATOR + doc.get(LuceneManager.LAYER_OF_SAVE); if (delprop != null && "true".equals(delprop)) { continue; } else if (id != null && deldocs.contains(id)/* && layer == DbKey.LIVE_LAYER*/) { continue; } Integer idx = (Integer) infos.get(id); if (idx != null && i != idx.intValue()) { continue; } Document ndoc = convertDocument(doc); if (this.recordNodes) { listOfMaps.add(LuceneManager.luceneDocumentToMap(doc)); } if (ndoc != null) { writer.addDocument(ndoc); } } if (this.recordNodes) { lmgr = new LuceneManager(this.app, false, true); this.allNodes = new HashMap(); final int size = listOfMaps.size(); for (int i = 0; i < size; i++) { HashMap m = (HashMap) listOfMaps.get(i); INode n = lmgr.mapToNode(m); this.allNodes.put(n.getID(), getPath(n)); n = null; } } } catch (Exception ex) { ex.printStackTrace(); throw new RuntimeException(ex); } finally { if (searcher != null) { try { searcher.close(); } catch (Exception ex) { app.logError(ErrorReporter.errorMsg(this.getClass(), "convert"), ex); } } if (lmgr != null) { lmgr.shutdown(); lmgr = null; } indexDir.close(); SegmentInfos sinfos = IndexObjectsFactory.getFSSegmentInfos(indexDir); sinfos.clear(); IndexObjectsFactory.removeDeletedInfos(indexDir); } Connection conn = null; boolean exceptionOccured = false; try { if (writer != null) { TransSource ts = app.getTransSource(); conn = ts.getConnection(); DatabaseMetaData dmd = conn.getMetaData(); ResultSet rs = dmd.getColumns(null, null, "Lucene", "version"); if (!rs.next()) { final String alterTbl = "ALTER TABLE Lucene ADD version INT NOT NULL DEFAULT 1"; PreparedStatement pstmt = null; try { pstmt = conn.prepareStatement(alterTbl); pstmt.execute(); } catch (SQLException sqle) { app.logError(ErrorReporter.errorMsg(this.getClass(), "convert"), sqle); } finally { if (pstmt != null) { pstmt.close(); pstmt = null; } } } rs.close(); rs = null; writer.close(); writer.flushCache();//TODO:writer.writeSegmentsFile(); LuceneManager.commitSegments(conn, app, writer.getDirectory()); writer.finalizeTrans(); this.updateSQL(conn); } } catch (Exception ex) { ex.printStackTrace(); exceptionOccured = true; throw new RuntimeException(ex); } finally { if (conn != null) { try { if (!conn.getAutoCommit()) { if (!exceptionOccured) { conn.commit(); } else { conn.rollback(); } } conn.close(); } catch (Exception ex) { app.logError(ErrorReporter.errorMsg(this.getClass(), "convert"), ex); } conn = null; } nindexDir.close(); SegmentInfos sinfos = IndexObjectsFactory.getFSSegmentInfos(nindexDir); sinfos.clear(); IndexObjectsFactory.removeDeletedInfos(nindexDir); } if (!dbhome.renameTo(olddbhome)) { throw new Exception("Could not move the old version of the db into " + olddbhome); } if (!ndbhome.renameTo(dbhome)) { throw new Exception("Could not move the newer version of the db into " + dbhome); } File oldBlobDir = new File(olddbhome, "blob"); File newBlobDir = new File(ndbhome, "blob"); oldBlobDir.renameTo(newBlobDir); if (!FileUtils.deleteDir(olddbhome)) { throw new Exception("Could not delete the old version of the db at " + olddbhome); } }
From source file:axiom.objectmodel.dom.LuceneManager.java
License:Open Source License
private LuceneManager(Application app) throws Exception { this.app = app; this.writerManager = new IndexWriterManager(app, buildAnalyzer(), true, true); this.dataFormatter = new LuceneDataFormatter(app.getAppDir()); this.directory = this.writerManager.getDirectory(); try {// w w w. j a v a 2 s . co m this.searcher = new IndexSearcher(this.directory); } catch (IOException ignore) { // db doesn't exist yet, so create the searcher the first time a search is done } }
From source file:axiom.objectmodel.dom.LuceneManager.java
License:Open Source License
public synchronized IndexSearcher getIndexSearcher() throws IOException { if (!this.isSearcherValid || this.searcher == null) { this.isSearcherValid = true; try {// ww w .ja va 2 s . c o m this.searcher = new IndexSearcher(this.directory); } catch (Exception ex) { throw new IOException( "FATAL ERROR::LuceneManager.getIndexSearcher(), Could not create IndexSearcher"); } } this.searcher.refCount++; return this.searcher; }
From source file:back.Searcher.java
License:Apache License
/** Simple command-line based search demo. */ public static void search(String query, boolean stopword, boolean stemming, int consulta) throws Exception { String index = null;//w w w . j av a 2 s.c om Analyzer analyzer = null; if (!stopword && !stemming) { index = ".\\indexed"; analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT, new CharArraySet(Version.LUCENE_CURRENT, 0, false)); System.out.println("Nenhum Marcado"); } else if (stopword && !stemming) { index = ".\\indexedNoStpWrd"; analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); System.out.println("Primeiro Marcado"); } else if (!stopword && stemming) { index = ".\\indexedStemming"; analyzer = new EnglishAnalyzer(Version.LUCENE_CURRENT, new CharArraySet(Version.LUCENE_CURRENT, 0, false)); System.out.println("Segundo Marcado"); } else if (stopword && stemming) { index = ".\\indexedTreated"; analyzer = new EnglishAnalyzer(Version.LUCENE_CURRENT); System.out.println("Dois Marcados"); } String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = query; int hitsPerPage = 200; CSVReader CSVreader = new CSVReader(new FileReader(".\\matriz.csv")); List<String[]> myEntries = CSVreader.readAll(); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query1 = parser.parse(line); System.out.println("Searching for: " + query1.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query1, null, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query1, hitsPerPage, raw, queries == null && queryString == null, myEntries, consulta); if (queryString != null) { break; } } reader.close(); }