List of usage examples for org.apache.lucene.index DirectoryReader open
public static DirectoryReader open(final IndexCommit commit) throws IOException
From source file:app.finder.topicsource.service.SearchFiles.java
License:Apache License
public List<TopicSource> getTopicSources(String queryString) throws IOException, ParseException { String field = "contents"; String queries = null;//from ww w .jav a 2s .com int repeat = 0; boolean raw = false; int hitsPerPage = SEARCH_MAX_SIZE; // 100; IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(indexDir))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = null; QueryParser parser = new QueryParser(field, analyzer); Query query = parser.parse(queryString); //System.out.println("Searching for: " + query.toString(field)); searcher.search(query, null, SEARCH_MAX_SIZE); List<String> list = doSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); reader.close(); List<TopicSource> topicSourceList = new ArrayList<TopicSource>(); TopicSource topicSource = null; int counter = 0; for (String fileName : list) { topicSource = new TopicSource(); File file = new File(fileName); topicSource.setFileName("" + (++counter) + ". " + file.getName()); topicSource.setPath(file.getCanonicalPath()); topicSource.setText(readFile(file)); topicSourceList.add(topicSource); } return topicSourceList; }
From source file:app.SearchFiles.java
License:Apache License
/** Simple command-line based search demo. */ public static void main(String[] args) throws Exception { String usage = "Usage:\tjava org.apache.lucene.demo.SearchFiles [-index dir] [-field f] [-repeat n] [-queries file] [-query string] [-raw] [-paging hitsPerPage]\n\nSee http://lucene.apache.org/core/4_1_0/demo/ for details."; if (args.length > 0 && ("-h".equals(args[0]) || "-help".equals(args[0]))) { System.out.println(usage); System.exit(0);//from w w w .jav a 2 s. c o m } String index = "index"; String field = "contents"; String queries = null; int repeat = 0; boolean raw = true; String queryString = null; int hitsPerPage = 10; for (int i = 0; i < args.length; i++) { if ("-index".equals(args[i])) { index = args[i + 1]; i++; } else if ("-field".equals(args[i])) { field = args[i + 1]; i++; } else if ("-queries".equals(args[i])) { queries = args[i + 1]; i++; } else if ("-query".equals(args[i])) { queryString = args[i + 1]; i++; } else if ("-repeat".equals(args[i])) { repeat = Integer.parseInt(args[i + 1]); i++; } else if ("-raw".equals(args[i])) { raw = true; } else if ("-paging".equals(args[i])) { hitsPerPage = Integer.parseInt(args[i + 1]); if (hitsPerPage <= 0) { System.err.println("There must be at least 1 hit per page."); System.exit(1); } i++; } } IndexReader reader = DirectoryReader.open(FSDirectory.open(new File(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_40); BufferedReader in = null; if (queries != null) { in = new BufferedReader(new InputStreamReader(new FileInputStream(queries), "UTF-8")); } else { in = new BufferedReader(new InputStreamReader(System.in, "UTF-8")); } QueryParser parser = new QueryParser(Version.LUCENE_40, field, analyzer); while (true) { if (queries == null && queryString == null) { // prompt the user System.out.println("Enter query: "); } String line = queryString != null ? queryString : in.readLine(); if (line == null || line.length() == -1) { break; } line = line.trim(); if (line.length() == 0) { break; } Query query = parser.parse(line); System.out.println("Searching for: " + query.toString(field)); if (repeat > 0) { // repeat & time as benchmark Date start = new Date(); for (int i = 0; i < repeat; i++) { searcher.search(query, null, 100); } Date end = new Date(); System.out.println("Time: " + (end.getTime() - start.getTime()) + "ms"); } doPagingSearch(in, searcher, query, hitsPerPage, raw, queries == null && queryString == null); if (queryString != null) { break; } } reader.close(); }
From source file:Application.mediaIndexer.java
public static void SearchFiles(String index, String queryString, String selected, TextArea results) throws IOException, ParseException { IndexReader reader = DirectoryReader.open(FSDirectory.open(Paths.get(index))); IndexSearcher searcher = new IndexSearcher(reader); Analyzer analyzer = new StandardAnalyzer(); BufferedReader in = new BufferedReader(new InputStreamReader(System.in, StandardCharsets.UTF_8)); QueryParser parser = new QueryParser(selected, analyzer); String line = queryString != null ? queryString : in.readLine(); line = line.trim();/*from ww w . ja v a 2 s. c o m*/ Query query = parser.parse(line); int maxHits = 100; TopDocs docsResults = searcher.search(query, maxHits); ScoreDoc[] hits = docsResults.scoreDocs; for (int i = 0; i < hits.length; i++) { Document doc = searcher.doc(hits[i].doc); results.appendText("Title: " + doc.get("title") + "\n"); results.appendText("Artists: " + doc.get("xmpDM:artist") + "\n"); results.appendText("Genre: " + doc.get("xmpDM:genre") + "\n"); results.appendText("Year: " + doc.get("xmpDM:releaseDate") + "\n"); } // Playlist. playlist.clear(); for (int i = 0; i < hits.length; i++) { Document doc = searcher.doc(hits[i].doc); String path = doc.get("path"); if (path != null) playlist.add(new File(path)); } reader.close(); }
From source file:approxnn.ANNRetriever.java
public ANNRetriever(String propFile) throws Exception { prop = new Properties(); prop.load(new FileReader(propFile)); numDimensions = Integer.parseInt(prop.getProperty("vec.numdimensions")); syntheticQueries = prop.getProperty("data.source").equals("synthetic"); if (syntheticQueries) rqgen = new RandomQueryGen(prop); // Read from optimized index (instead of the initial index) String indexPath = !syntheticQueries ? prop.getProperty("index") : rqgen.randomSamplesFileName() + ".index"; if (indexPath != null) { File indexDir = new File(indexPath); //reader = DirectoryReader.open(FSDirectory.open(indexDir.toPath())); reader = DirectoryReader.open(MMapDirectory.open(indexDir.toPath())); //reader = DirectoryReader.open(new RAMDirectory(FSDirectory.open(indexDir.toPath()), IOContext.DEFAULT)); searcher = new IndexSearcher(reader); searcher.setSimilarity(new LMJelinekMercerSimilarity(0.1f)); // almost close to tf }//ww w .ja v a2s . c o m DocVector.initVectorRange(prop); numIntervals = DocVector.numIntervals; if (!syntheticQueries) indexedVecQueries = new IndexedVecQueries(propFile); //System.out.println(indexedVecQueries); //vecQueries = new VecQueries(propFile); debug = Boolean.parseBoolean(prop.getProperty("debug", "false")); subSpaceDimension = Integer.parseInt(prop.getProperty("subspace.dimension", "0")); start = Integer.parseInt(prop.getProperty("retrieve.start", "0")); end = Integer.parseInt(prop.getProperty("retrieve.end", "-1")); }
From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java
License:Apache License
/** * This constructor loads the SKOS model from a given InputStream using the * given serialization language parameter, which must be either N3, RDF/XML, * or TURTLE.//from w w w . j a v a 2 s. co m * * @param inputStream the input stream * @param lang the serialization language * @throws IOException if the model cannot be loaded */ public SKOSEngineImpl(InputStream inputStream, String lang) throws IOException { if (!("N3".equals(lang) || "RDF/XML".equals(lang) || "TURTLE".equals(lang))) { throw new IOException("Invalid RDF serialization format"); } this.analyzer = new SimpleAnalyzer(); this.skosModel = ModelFactory.createDefaultModel(); skosModel.read(inputStream, null, lang); indexDir = new RAMDirectory(); entailSKOSModel(); indexSKOSModel(); searcher = new IndexSearcher(DirectoryReader.open(indexDir)); }
From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java
License:Apache License
/** * This constructor loads the SKOS model from a given filename or URI, * starts the indexing process and sets up the index searcher. * * @param indexPath index path//from w w w. j av a 2 s .c o m * @param filenameOrURI file name or URI * @param languages the languages to be considered * @throws IOException if indexing SKOS model fails */ public SKOSEngineImpl(String indexPath, String filenameOrURI, List<String> languages) throws IOException { this.analyzer = new SimpleAnalyzer(); String langSig = ""; if (languages != null) { this.languages = new TreeSet<>(languages); if (!this.languages.isEmpty()) { langSig = "-" + join(this.languages.iterator(), '-'); } } String name = getName(filenameOrURI); File dir = new File(indexPath + name + langSig); this.indexDir = FSDirectory.open(dir.toPath()); if (filenameOrURI != null) { FileManager fileManager = new FileManager(); fileManager.addLocatorFile(); fileManager.addLocatorURL(); fileManager.addLocatorClassLoader(SKOSEngineImpl.class.getClassLoader()); if (getExtension(filenameOrURI).equals("zip")) { fileManager.addLocatorZip(filenameOrURI); filenameOrURI = getBaseName(filenameOrURI); } File inputFile = new File(filenameOrURI); Path inputPath = Paths.get(inputFile.getParent(), inputFile.getName()); skosModel = fileManager.loadModel(inputPath.toUri().toString()); entailSKOSModel(); indexSKOSModel(); searcher = new IndexSearcher(DirectoryReader.open(indexDir)); } }
From source file:at.ac.univie.mminf.luceneSKOS.analysis.engine.jena.SKOSEngineImpl.java
License:Apache License
/** * This constructor loads the SKOS model from a given InputStream using the * given serialization language parameter, which must be either N3, RDF/XML, * or TURTLE.//www . j av a 2s . c o m * * @param inputStream the input stream * @param format the serialization language * @param languages the languages * @throws IOException if the model cannot be loaded */ public SKOSEngineImpl(InputStream inputStream, String format, List<String> languages) throws IOException { if (!("N3".equals(format) || "RDF/XML".equals(format) || "TURTLE".equals(format))) { throw new IOException("Invalid RDF serialization format"); } if (languages != null) { this.languages = new TreeSet<>(languages); } analyzer = new SimpleAnalyzer(); skosModel = ModelFactory.createDefaultModel(); skosModel.read(inputStream, null, format); indexDir = new RAMDirectory(); entailSKOSModel(); indexSKOSModel(); searcher = new IndexSearcher(DirectoryReader.open(indexDir)); }
From source file:au.org.ala.names.search.ALANameIndexer.java
License:Open Source License
/** * Creates the temporary index that provides a lookup of checklist bank id to * GUID/*from w w w .j a v a 2 s . co m*/ */ private IndexSearcher createTmpGuidIndex(String cbExportFile) throws Exception { System.out.println("Starting to create the tmp guid index..."); IndexWriter iw = createIndexWriter(new File("/data/tmp/guid"), new KeywordAnalyzer(), true); au.com.bytecode.opencsv.CSVReader cbreader = new au.com.bytecode.opencsv.CSVReader( new FileReader(cbExportFile), '\t', '"', '/', 1); for (String[] values = cbreader.readNext(); values != null; values = cbreader.readNext()) { Document doc = new Document(); String id = values[POS_ID]; String guid = values[POS_LSID]; doc.add(new StringField("id", id, Store.YES)); if (StringUtils.isEmpty(id)) guid = id; doc.add(new StoredField("guid", guid)); iw.addDocument(doc); } System.out.println("Finished writing the tmp guid index..."); iw.commit(); iw.forceMerge(1); iw.close(); //As of lucene 4.0 all IndexReaders are read only return new IndexSearcher(DirectoryReader.open(FSDirectory.open(new File("/data/tmp/guid")))); }
From source file:au.org.ala.names.search.ALANameIndexer.java
License:Open Source License
/** * Indexes common names from CoL and ANBG for use in the Common name search. * * @param iw The index writer to write the common documents to * @param exportDir The directory that contains the common name export files. * @param indexDir The directory in which to create the index. * @throws Exception//from w ww.j ava 2s . c om */ private void indexCommonNames(IndexWriter iw, String exportDir, String indexDir) throws Exception { log.info("Creating Common Names Index ..."); //TODO think about adding additional sources for common names IndexSearcher currentNameSearcher = new IndexSearcher( DirectoryReader.open(FSDirectory.open(new File(indexDir + File.separator + "cb")))); IndexSearcher extraSearcher = new IndexSearcher( DirectoryReader.open(FSDirectory.open(new File(indexDir + File.separator + "id")))); addCoLCommonNames(iw, currentNameSearcher); addAnbgCommonNames(afdFile, iw, currentNameSearcher, extraSearcher, '\t'); addAnbgCommonNames(apniFile, iw, currentNameSearcher, extraSearcher, ','); iw.commit(); iw.forceMerge(1); iw.close(); }
From source file:au.org.ala.names.search.ALANameIndexer.java
License:Open Source License
/** * Creates a temporary index that will provide a lookup up of lsid to "real lsid". * <p/>/* www .j a v a2 s . c o m*/ * This deals with the following situations: * - common names that are sourced from CoL (LSIDs will be mapped to corresponding ANBG LSID) * - Multiple ANBG LSIDs exist for the same scientific name and more than 1 are mapped to the same common name. * * @param idFile * @throws Exception */ private void createExtraIdIndex(String idxLocation, File idFile) throws Exception { CSVReader reader = new CSVReader(new FileReader(idFile), '\t', '"', '~');//CSVReader.build(idFile, "UTF-8", "\t", '"', 0); File indexDir = new File(idxLocation); IndexWriter iw = createIndexWriter(indexDir, new KeywordAnalyzer(), true);//new IndexWriter(FSDirectory.open(indexDir), new KeywordAnalyzer(), true, MaxFieldLength.UNLIMITED); String[] values = null; while ((values = reader.readNext()) != null) { if (values != null && values.length >= 3) { Document doc = new Document(); //doc.add(new Field("lsid", values[2], Store.NO, Index.NOT_ANALYZED)); doc.add(new StringField("lsid", values[2], Store.NO)); //doc.add(new Field("reallsid", values[1], Store.YES, Index.NO)); doc.add(new StoredField("reallsid", values[1])); iw.addDocument(doc); } } iw.commit(); iw.forceMerge(1); iw.close(); idSearcher = new IndexSearcher(DirectoryReader.open(FSDirectory.open(indexDir))); }