List of usage examples for org.apache.lucene.index DirectoryReader open
public static DirectoryReader open(final IndexCommit commit) throws IOException
From source file:au.org.ala.names.search.ALANameIndexer.java
License:Open Source License
/** * Creates a temporary index that stores the taxon concept LSIDs that were * included in the last ANBG exports./*from ww w . jav a 2s.c om*/ * * @param tcFileName * @return * @throws Exception */ private IndexSearcher createTmpIndex(String tcFileName) throws Exception { //creating the tmp index in the /tmp/taxonConcept directory CSVReader reader = new CSVReader(new FileReader(new File(tcFileName)), '\t', '"', '~'); File indexDir = new File("/tmp/taxonConcept"); IndexWriter iw = createIndexWriter(indexDir, new KeywordAnalyzer(), true); String[] values = null; while ((values = reader.readNext()) != null) { if (values != null && values.length > 1) { //just add the LSID to the index Document doc = new Document(); doc.add(new StringField("lsid", values[0], Store.NO)); iw.addDocument(doc); } } iw.commit(); iw.forceMerge(1); iw.close(); return new IndexSearcher(DirectoryReader.open(FSDirectory.open(indexDir))); }
From source file:au.org.ala.names.search.ALANameSearcher.java
License:Open Source License
/** * Creates a new name searcher. Using the indexDirectory * as the source directory/* w w w . j av a2 s . co m*/ * * @param indexDirectory The directory that contains the index files for the scientific names, irmng and vernacular names. * @throws CorruptIndexException * @throws IOException */ public ALANameSearcher(String indexDirectory) throws CorruptIndexException, IOException { //Initialis CB index searching items log.debug("Creating the search object for the name matching api..."); //make the query parsers thread safe queryParser = new ThreadLocal<QueryParser>() { @Override protected QueryParser initialValue() { QueryParser qp = new QueryParser(Version.LUCENE_34, "genus", new LowerCaseKeywordAnalyzer()); qp.setFuzzyMinSim(0.8f); //fuzzy match similarity setting. used to match the authorship. return qp; } }; idParser = new ThreadLocal<QueryParser>() { @Override protected QueryParser initialValue() { return new QueryParser(Version.LUCENE_34, "lsid", new org.apache.lucene.analysis.core.KeywordAnalyzer()); } }; cbReader = DirectoryReader.open(FSDirectory.open(createIfNotExist(indexDirectory + File.separator + "cb")));//false cbSearcher = new IndexSearcher(cbReader); //Initalise the IRMNG index searching items irmngReader = DirectoryReader .open(FSDirectory.open(createIfNotExist(indexDirectory + File.separator + "irmng"))); irmngSearcher = new IndexSearcher(irmngReader); //initalise the Common name index searching items vernReader = DirectoryReader .open(FSDirectory.open(createIfNotExist(indexDirectory + File.separator + "vernacular"))); vernSearcher = new IndexSearcher(vernReader); //initialise the identifier index idSearcher = new IndexSearcher( DirectoryReader.open(FSDirectory.open(createIfNotExist(indexDirectory + File.separator + "id")))); tnse = new TaxonNameSoundEx(); parser = new PhraseNameParser(); crossRankHomonyms = au.org.ala.names.util.FileUtils.streamToSet( this.getClass().getClassLoader().getResourceAsStream("au/org/ala/homonyms/cross_rank_homonyms.txt"), new java.util.HashSet<String>(), true); }
From source file:au.org.ala.names.search.DwcaNameIndexer.java
License:Open Source License
/** * Creates a loading index to use to generate the hierarchy including the left right values. * * @param tmpIndexDir/*w w w . ja va 2s. com*/ * @param archiveDirectory * @throws Exception */ private void createLoadingIndex(String tmpIndexDir, String archiveDirectory) throws Exception { log.info("Starting to create the temporary loading index."); File indexDir = new File(tmpIndexDir); IndexWriter iw = createIndexWriter(indexDir, new KeywordAnalyzer(), true); //create the loading index so that left right values and classifications can be generated Archive archive = ArchiveFactory.openArchive(new File(archiveDirectory)); Iterator<DarwinCoreRecord> it = archive.iteratorDwc(); int i = 0; long start = System.currentTimeMillis(); while (it.hasNext()) { Document doc = new Document(); DarwinCoreRecord dwcr = it.next(); String id = dwcr.getId(); String lsid = dwcr.getTaxonID() == null ? id : dwcr.getTaxonID(); String acceptedLsid = dwcr.getAcceptedNameUsageID(); //add and store the identifier for the record doc.add(new StringField(NameIndexField.ID.toString(), dwcr.getId(), Field.Store.YES)); if (StringUtils.isNotBlank(lsid)) { doc.add(new StringField(NameIndexField.LSID.toString(), lsid, Field.Store.YES)); } else { System.out.println("LSID is null for " + id + " " + lsid + " " + lsid + " " + acceptedLsid); } if (StringUtils.isNotBlank(dwcr.getParentNameUsageID())) { doc.add(new StringField("parent_id", dwcr.getParentNameUsageID(), Field.Store.YES)); } if (StringUtils.isNotBlank(dwcr.getAcceptedNameUsageID())) { doc.add(new StringField(NameIndexField.ACCEPTED.toString(), dwcr.getAcceptedNameUsageID(), Field.Store.YES)); } if (StringUtils.isNotBlank(dwcr.getScientificName())) { //stored no need to search on doc.add(new StoredField(NameIndexField.NAME.toString(), dwcr.getScientificName())); } if (StringUtils.isNotBlank(dwcr.getScientificNameAuthorship())) { //stored no need to search on doc.add(new StoredField(NameIndexField.AUTHOR.toString(), dwcr.getScientificNameAuthorship())); } if (StringUtils.isNotBlank(dwcr.getGenus())) { //stored no need to search on doc.add(new StoredField("genus", dwcr.getGenus())); } if (StringUtils.isNotBlank(dwcr.getSpecificEpithet())) { //stored no need to search on doc.add(new StoredField(NameIndexField.SPECIFIC.toString(), dwcr.getSpecificEpithet())); } if (StringUtils.isNotBlank(dwcr.getInfraspecificEpithet())) { //stored no need to search on doc.add(new StoredField(NameIndexField.INFRA_SPECIFIC.toString(), dwcr.getInfraspecificEpithet())); } if (StringUtils.isNotBlank(dwcr.getTaxonRank())) { //match the supplied rank RankType rt = RankType.getForStrRank(dwcr.getTaxonRank()); if (rt != null) { doc.add(new StringField(NameIndexField.RANK.toString(), rt.getRank(), Field.Store.YES)); doc.add(new StringField(NameIndexField.RANK_ID.toString(), rt.getId().toString(), Field.Store.YES)); } else { doc.add(new StringField(NameIndexField.RANK.toString(), dwcr.getTaxonRank(), Field.Store.YES)); doc.add(new StringField(NameIndexField.RANK_ID.toString(), RankType.UNRANKED.getId().toString(), Field.Store.YES)); } } else { //put in unknown rank doc.add(new StringField(NameIndexField.RANK.toString(), "Unknown", Field.Store.YES)); doc.add(new StringField(NameIndexField.RANK_ID.toString(), RankType.UNRANKED.getId().toString(), Field.Store.YES)); } if (StringUtils.equals(lsid, acceptedLsid) || StringUtils.equals(id, acceptedLsid) || acceptedLsid == null) { //mark this one as an accepted concept doc.add(new StringField(NameIndexField.iS_SYNONYM.toString(), "F", Field.Store.YES)); if (StringUtils.isBlank(dwcr.getParentNameUsageID())) { doc.add(new StringField("root", "T", Field.Store.YES)); } } else { doc.add(new StringField(NameIndexField.iS_SYNONYM.toString(), "T", Field.Store.YES)); } iw.addDocument(doc); i++; if (i % 1000 == 0) { long finish = System.currentTimeMillis(); log.debug("Loading index: " + i + " records per sec: " + (1000 / (((float) (finish / start)) / 1000))); start = finish; } } log.info("Finished creating the temporary load index with " + i + " concepts"); iw.commit(); iw.forceMerge(1); iw.close(); lsearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(indexDir))); }
From source file:au.org.ala.names.search.DwcaNameIndexer.java
License:Open Source License
private TopDocs getLoadIdxResults(String field, String value, int max) throws Exception { if (lsearcher == null && new File(dirTmpIndex).exists()) { lsearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(new File(dirTmpIndex)))); } else if (lsearcher == null && !new File(dirTmpIndex).exists()) { throw new RuntimeException( "A load index has not been generated. Please run this tool with '-load' before creating the search index."); }//w w w. j a v a 2 s .c o m TermQuery tq = new TermQuery(new Term(field, value)); return lsearcher.search(tq, max); }
From source file:back.Searcher.java
License:Apache License
/** Simple command-line based search demo. */ public static void search(String query, boolean stopword, boolean stemming, int consulta) throws Exception { String index = null;//from w ww. j a v a 2 s .c o m Analyzer analyzer = null; if (!stopword && !stemming) { index = ".\\indexed"; analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT, new CharArraySet(Version.LUCENE_CURRENT, 0, false)); System.out.println("Nenhum Marcado"); } else if (stopword && !stemming) { index = ".\\indexedNoStpWrd"; analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT); System.out.println("Primeiro Marcado"); } else if (!stopword && stemming) { index = ".\\indexedStemming"; analyzer = new EnglishAnalyzer(Version.LUCENE_CURRENT, new CharArraySet(Version.LUCENE_CURRENT, 0, false)); System.out.println("Segundo Marcado"); } else if (stopword && stemming) { index = ".\\indexedTreated"; analyzer = new EnglishAnalyzer(Version.LUCENE_CURRENT); System.out.println("Dois Marcados"); } String field = "contents"; String queries = null; int repeat = 0; boolean raw = false; String queryString = query; int hitsPerPage = 200; CSVReader CSVreader = new CSVReader(new FileReader(".\\matriz.csv")); List<String[]> myEntries = CSVreader.readAll(); IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query1 = parser.parse(line); System.out.println("Searching for: " + query1.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query1, null, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query1, hitsPerPage, raw, queries == null && queryString == null, myEntries, consulta); if (queryString != null) { break; } } reader.close(); }
From source file:bajavista.Buscador.java
public ArrayList<Informacion> buscarContenido(String busqueda) throws IOException, ParseException { StandardAnalyzer analyzer = new StandardAnalyzer(Version.LUCENE_43); File indexDirES = new File(dirIndexES); Directory indexES = FSDirectory.open(indexDirES); //File indexDirNONES = new File(dirIndexNONES); //Directory indexNONES = FSDirectory.open(indexDirNONES); // 2. Query/*w ww . ja v a 2 s .c o m*/ String querystr = busqueda; Query q = new QueryParser(Version.LUCENE_43, "text", analyzer).parse(querystr); //Query qNONES = new QueryParser(Version.LUCENE_43, "contenido", analyzer).parse(querystr); // 3. Search int hitsPage = 1024; IndexReader reader = DirectoryReader.open(indexES); IndexSearcher searcher = new IndexSearcher(reader); //IndexReader readerNONES = DirectoryReader.open(indexNONES); //IndexSearcher searcherNONES = new IndexSearcher(readerNONES); TopScoreDocCollector collector = TopScoreDocCollector.create(hitsPage, true); //TopScoreDocCollector collectorNONES = TopScoreDocCollector.create(hitsPage, true); searcher.search(q, collector); //searcherNONES.search(q, collectorNONES); ScoreDoc[] hits = collector.topDocs().scoreDocs; // ScoreDoc[] hitsNONES = collectorNONES.topDocs().scoreDocs; // 4. Return results for (int i = 0; i < hits.length; ++i) { int docId = hits[i].doc; Document data = searcher.doc(docId); info = new Informacion(Integer.parseInt(data.get("idUser")), Long.parseLong(data.get("timestamp")), data.get("text"), Double.parseDouble(data.get("objective")), Double.parseDouble(data.get("subjective")), Double.parseDouble(data.get("positive")), Double.parseDouble(data.get("negative")), Integer.parseInt(data.get("need"))); listaInfo.add(info); } /*System.out.println("No ES Found " + hitsNONES.length + " hits."); for(int i=0;i<hitsNONES.length;++i) { int docId = hitsNONES[i].doc; Document d = searcherNONES.doc(docId); System.out.println((i + 1) + ". " + d.get("es") + "\t" + d.get("contenido")); }*/ reader.close(); //readerNONES.close(); return listaInfo; }
From source file:bbejeck.nosql.lucene.LuceneSqlFileSystemSearchBase.java
License:Apache License
@Override public void openSearcher() throws Exception { ireader = DirectoryReader.open(fsDirectory); isearcher = new IndexSearcher(ireader); }
From source file:bbejeck.nosql.lucene.LuceneSqlSearchBase.java
License:Apache License
public void openSearcher() throws Exception { ireader = DirectoryReader.open(ramDirectory); isearcher = new IndexSearcher(ireader); }
From source file:biospectra.classify.Classifier.java
License:Apache License
private void initialize(File indexPath, int kmerSize, int kmerSkips, boolean minStrandKmer, double minShouldMatch, QueryGenerationAlgorithm queryGenerationAlgorithm, Similarity similarity) throws Exception { if (!indexPath.exists() || !indexPath.isDirectory()) { throw new IllegalArgumentException("indexPath is not a directory or does not exist"); }//from ww w. jav a2 s . c o m this.indexPath = indexPath; this.kmerSize = kmerSize; this.kmerSkips = kmerSkips; this.minStrandKmer = minStrandKmer; this.queryAnalyzer = new KmerQueryAnalyzer(this.kmerSize, this.kmerSkips, this.minStrandKmer); Directory dir = new MMapDirectory(this.indexPath.toPath()); this.indexReader = DirectoryReader.open(dir); this.indexSearcher = new IndexSearcher(this.indexReader); if (similarity != null) { this.indexSearcher.setSimilarity(similarity); } this.minShouldMatch = minShouldMatch; this.queryGenerationAlgorithm = queryGenerationAlgorithm; BooleanQuery.setMaxClauseCount(10000); }
From source file:biospectra.utils.IndexUtil.java
License:Apache License
private void initialize(File indexPath) throws Exception { if (indexPath == null) { throw new IllegalArgumentException("indexPath is null"); }//from w ww . j a v a2 s . c om this.indexPath = indexPath; Directory dir = new MMapDirectory(this.indexPath.toPath()); this.indexReader = DirectoryReader.open(dir); }