List of usage examples for org.apache.lucene.facet.taxonomy.directory DirectoryTaxonomyReader DirectoryTaxonomyReader
public DirectoryTaxonomyReader(DirectoryTaxonomyWriter taxoWriter) throws IOException
From source file:com.chimpler.example.FacetLuceneAdvancedSearcher.java
License:Apache License
public static void main(String args[]) throws Exception { if (args.length != 5) { System.err.println(/* w w w . j a va 2s. co m*/ "Parameters: [index directory] [taxonomy directory] [query] [field drilldown] [value drilldown]"); System.exit(1); } String indexDirectory = args[0]; String taxonomyDirectory = args[1]; String query = args[2]; String fieldDrilldown = args[3]; String valueDrilldown = args[4]; IndexReader indexReader = DirectoryReader.open(FSDirectory.open(new File(indexDirectory))); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TaxonomyReader taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.open(new File(taxonomyDirectory))); CategoryPath drillDownCategoryPath = new CategoryPath(fieldDrilldown + "/" + valueDrilldown, '/'); FacetSearchParams searchParams = new FacetSearchParams(); searchParams.facetRequests.add(new CountFacetRequest(new CategoryPath("author"), 100)); searchParams.facetRequests.add(new CountFacetRequest(new CategoryPath("book_category"), 100)); searchParams.facetRequests.add(new CountFacetRequest(drillDownCategoryPath, 100)); ComplexPhraseQueryParser queryParser = new ComplexPhraseQueryParser(LUCENE_VERSION, "title", new StandardAnalyzer(LUCENE_VERSION)); Query luceneQuery = queryParser.parse(query); //luceneQuery = DrillDownQuery.query(luceneQuery, drillDownCategoryPath); // Collectors to get top results and facets TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, true); FacetsCollector facetsCollector = FacetsCollector.create(searchParams, indexReader, taxonomyReader); indexSearcher.search(luceneQuery, MultiCollector.wrap(topScoreDocCollector, facetsCollector)); System.out.println("Found:"); for (ScoreDoc scoreDoc : topScoreDocCollector.topDocs().scoreDocs) { Document document = indexReader.document(scoreDoc.doc); System.out.printf("- book: id=%s, title=%s, book_category=%s, authors=%s, score=%f\n", document.get("id"), document.get("title"), document.get("book_category"), document.get("authors"), scoreDoc.score); } System.out.println("Facets:"); for (FacetResult facetResult : facetsCollector.getFacetResults()) { System.out.println("- " + facetResult.getFacetResultNode().label); for (FacetResultNode facetResultNode : facetResult.getFacetResultNode().subResults) { System.out.printf(" - %s (%f)\n", facetResultNode.label.toString(), facetResultNode.value); for (FacetResultNode subFacetResultNode : facetResultNode.subResults) { System.out.printf(" - %s (%f)\n", subFacetResultNode.label.toString(), subFacetResultNode.value); } } } taxonomyReader.close(); indexReader.close(); }
From source file:com.chimpler.example.FacetLuceneIndexer.java
License:Apache License
public static void main(String args[]) throws Exception { // if (args.length != 3) { // System.err.println("Parameters: [index directory] [taxonomy directory] [json file]"); // System.exit(1); // }/* w w w. j av a 2s . c om*/ String indexDirectory = "index"; String taxonomyDirectory = "taxonomy"; String jsonFileName = "/home/qiuqiang/workspace/facet-lucene-example/books.json"; IndexWriterConfig writerConfig = new IndexWriterConfig(LUCENE_VERSION, new WhitespaceAnalyzer(LUCENE_VERSION)); writerConfig.setOpenMode(OpenMode.APPEND); IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexDirectory)), writerConfig); TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(MMapDirectory.open(new File(taxonomyDirectory)), OpenMode.APPEND); TaxonomyReader taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.open(new File(taxonomyDirectory))); String content = IOUtils.toString(new FileInputStream(jsonFileName)); JSONArray bookArray = new JSONArray(content); Field idField = new IntField("id", 0, Store.YES); Field titleField = new TextField("title", "", Store.YES); Field authorsField = new TextField("authors", "", Store.YES); Field bookCategoryField = new TextField("book_category", "", Store.YES); indexWriter.deleteAll(); FacetFields facetFields = new FacetFields(taxonomyWriter); for (int i = 0; i < bookArray.length(); i++) { Document document = new Document(); JSONObject book = bookArray.getJSONObject(i); int id = book.getInt("id"); String title = book.getString("title"); String bookCategory = book.getString("book_category"); List<CategoryPath> categoryPaths = new ArrayList<CategoryPath>(); String authorsString = ""; JSONArray authors = book.getJSONArray("authors"); for (int j = 0; j < authors.length(); j++) { String author = authors.getString(j); if (j > 0) { authorsString += ", "; } categoryPaths.add(new CategoryPath("author", author)); authorsString += author; } categoryPaths.add(new CategoryPath("book_category" + bookCategory, '/')); idField.setIntValue(id); titleField.setStringValue(title); authorsField.setStringValue(authorsString); bookCategoryField.setStringValue(bookCategory); facetFields.addFields(document, categoryPaths); document.add(idField); document.add(titleField); document.add(authorsField); document.add(bookCategoryField); indexWriter.addDocument(document); System.out.printf("Book: id=%d, title=%s, book_category=%s, authors=%s\n", id, title, bookCategory, authors); } taxonomyWriter.prepareCommit(); try { taxonomyWriter.commit(); } catch (Exception e) { taxonomyWriter.rollback(); } // taxonomyWriter.close(); // // indexWriter.commit(); // indexWriter.close(); String query = "story"; IndexReader indexReader = DirectoryReader.open(indexWriter, false); IndexReader indexReader2 = DirectoryReader.open(indexWriter, false); System.out.println(indexReader == indexReader2); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TaxonomyReader newTaxonomyReader = DirectoryTaxonomyReader.openIfChanged(taxonomyReader); if (newTaxonomyReader != null) { TaxonomyReader tmp = taxonomyReader; taxonomyReader = newTaxonomyReader; tmp.close(); } else { System.out.println("null"); } ArrayList<FacetRequest> facetRequests = new ArrayList<FacetRequest>(); facetRequests.add(new CountFacetRequest(new CategoryPath("author"), 100)); facetRequests.add(new CountFacetRequest(new CategoryPath("book_category"), 100)); FacetSearchParams searchParams = new FacetSearchParams(facetRequests); ComplexPhraseQueryParser queryParser = new ComplexPhraseQueryParser(LUCENE_VERSION, "title", new StandardAnalyzer(LUCENE_VERSION)); Query luceneQuery = queryParser.parse(query); // Collectors to get top results and facets TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, true); FacetsCollector facetsCollector = FacetsCollector.create(searchParams, indexReader, taxonomyReader); indexSearcher.search(luceneQuery, MultiCollector.wrap(topScoreDocCollector, facetsCollector)); System.out.println("Found:"); for (ScoreDoc scoreDoc : topScoreDocCollector.topDocs().scoreDocs) { Document document = indexReader.document(scoreDoc.doc); System.out.printf("- book: id=%s, title=%s, book_category=%s, authors=%s, score=%f\n", document.get("id"), document.get("title"), document.get("book_category"), document.get("authors"), scoreDoc.score); } System.out.println("Facets:"); for (FacetResult facetResult : facetsCollector.getFacetResults()) { System.out.println("- " + facetResult.getFacetResultNode().label); for (FacetResultNode facetResultNode : facetResult.getFacetResultNode().subResults) { System.out.printf(" - %s (%f)\n", facetResultNode.label.toString(), facetResultNode.value); for (FacetResultNode subFacetResultNode : facetResultNode.subResults) { System.out.printf(" - %s (%f)\n", subFacetResultNode.label.toString(), subFacetResultNode.value); } } } taxonomyReader.close(); indexReader.close(); taxonomyWriter.commit(); taxonomyWriter.close(); indexWriter.commit(); indexWriter.close(); }
From source file:com.chimpler.example.FacetLuceneSearcher.java
License:Apache License
public static void main(String args[]) throws Exception { // if (args.length != 3) { // System.err.println("Parameters: [index directory] [taxonomy directory] [query]"); // System.exit(1); // }//from w w w . j av a 2s . c o m String indexDirectory = "index"; String taxonomyDirectory = "taxonomy"; String query = "story"; IndexReader indexReader = DirectoryReader.open(FSDirectory.open(new File(indexDirectory))); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TaxonomyReader taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.open(new File(taxonomyDirectory))); ArrayList<FacetRequest> facetRequests = new ArrayList<FacetRequest>(); facetRequests.add(new CountFacetRequest(new CategoryPath("author"), 100)); facetRequests.add(new CountFacetRequest(new CategoryPath("book_category"), 100)); FacetSearchParams searchParams = new FacetSearchParams(facetRequests); ComplexPhraseQueryParser queryParser = new ComplexPhraseQueryParser(LUCENE_VERSION, "title", new StandardAnalyzer(LUCENE_VERSION)); Query luceneQuery = queryParser.parse(query); // Collectors to get top results and facets TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, true); FacetsCollector facetsCollector = FacetsCollector.create(searchParams, indexReader, taxonomyReader); indexSearcher.search(luceneQuery, MultiCollector.wrap(topScoreDocCollector, facetsCollector)); System.out.println("Found:"); for (ScoreDoc scoreDoc : topScoreDocCollector.topDocs().scoreDocs) { Document document = indexReader.document(scoreDoc.doc); System.out.printf("- book: id=%s, title=%s, book_category=%s, authors=%s, score=%f\n", document.get("id"), document.get("title"), document.get("book_category"), document.get("authors"), scoreDoc.score); } System.out.println("Facets:"); for (FacetResult facetResult : facetsCollector.getFacetResults()) { System.out.println("- " + facetResult.getFacetResultNode().label); for (FacetResultNode facetResultNode : facetResult.getFacetResultNode().subResults) { System.out.printf(" - %s (%f)\n", facetResultNode.label.toString(), facetResultNode.value); for (FacetResultNode subFacetResultNode : facetResultNode.subResults) { System.out.printf(" - %s (%f)\n", subFacetResultNode.label.toString(), subFacetResultNode.value); } } } taxonomyReader.close(); indexReader.close(); }
From source file:com.czw.search.lucene.example.facet.AssociationsFacetsExample.java
License:Apache License
/** User runs a query and aggregates facets by summing their association values. */ private List<FacetResult> sumAssociations() throws IOException { DirectoryReader indexReader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(indexReader); TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); FacetsCollector fc = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query: FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc); Facets tags = new TaxonomyFacetSumIntAssociations("$tags", taxoReader, config, fc); Facets genre = new TaxonomyFacetSumFloatAssociations("$genre", taxoReader, config, fc); // Retrieve results List<FacetResult> results = new ArrayList<>(); results.add(tags.getTopChildren(10, "tags")); results.add(genre.getTopChildren(10, "genre")); indexReader.close();//from w ww . j a v a 2s.c om taxoReader.close(); return results; }
From source file:com.czw.search.lucene.example.facet.AssociationsFacetsExample.java
License:Apache License
/** User drills down on 'tags/solr'. */ private FacetResult drillDown() throws IOException { DirectoryReader indexReader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(indexReader); TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); // Passing no baseQuery means we drill down on all // documents ("browse only"): DrillDownQuery q = new DrillDownQuery(config); // Now user drills down on Publish Date/2010: q.add("tags", "solr"); FacetsCollector fc = new FacetsCollector(); FacetsCollector.search(searcher, q, 10, fc); // Retrieve results Facets facets = new TaxonomyFacetSumFloatAssociations("$genre", taxoReader, config, fc); FacetResult result = facets.getTopChildren(10, "genre"); indexReader.close();//from ww w .j ava 2 s .c o m taxoReader.close(); return result; }
From source file:com.czw.search.lucene.example.facet.ExpressionAggregationFacetsExample.java
License:Apache License
/** User runs a query and aggregates facets. */ private FacetResult search() throws IOException, ParseException { DirectoryReader indexReader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(indexReader); TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); // Aggregate categories by an expression that combines the document's score // and its popularity field Expression expr = JavascriptCompiler.compile("_score * sqrt(popularity)"); SimpleBindings bindings = new SimpleBindings(); bindings.add(new SortField("_score", SortField.Type.SCORE)); // the score of the document bindings.add(new SortField("popularity", SortField.Type.LONG)); // the value of the 'popularity' field // Aggregates the facet values FacetsCollector fc = new FacetsCollector(true); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query: FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc); // Retrieve results Facets facets = new TaxonomyFacetSumValueSource(taxoReader, config, fc, expr.getDoubleValuesSource(bindings)); FacetResult result = facets.getTopChildren(10, "A"); indexReader.close();/*from w w w .j a v a 2 s. co m*/ taxoReader.close(); return result; }
From source file:com.czw.search.lucene.example.facet.MultiCategoryListsFacetsExample.java
License:Apache License
/** User runs a query and counts facets. */ private List<FacetResult> search() throws IOException { DirectoryReader indexReader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(indexReader); TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); FacetsCollector fc = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query: FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc); // Retrieve results List<FacetResult> results = new ArrayList<>(); // Count both "Publish Date" and "Author" dimensions Facets author = new FastTaxonomyFacetCounts("author", taxoReader, config, fc); results.add(author.getTopChildren(10, "Author")); Facets pubDate = new FastTaxonomyFacetCounts("pubdate", taxoReader, config, fc); results.add(pubDate.getTopChildren(10, "Publish Date")); indexReader.close();/*from w w w. j a va2 s .c om*/ taxoReader.close(); return results; }
From source file:com.czw.search.lucene.example.facet.SimpleFacetsExample.java
License:Apache License
/** * User runs a query and counts facets.//from ww w . ja va 2 s. c o m */ private List<FacetResult> facetsWithSearch() throws IOException { DirectoryReader indexReader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(indexReader); TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); FacetsCollector fc = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query: FacetsCollector.search(searcher, new MatchAllDocsQuery(), 10, fc); // Retrieve results List<FacetResult> results = new ArrayList<>(); // Count both "Publish Date" and "Author" dimensions Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, fc); results.add(facets.getTopChildren(10, "Author")); results.add(facets.getTopChildren(10, "Publish Date")); results.add(facets.getTopChildren(10, "Category")); indexReader.close(); taxoReader.close(); return results; }
From source file:com.czw.search.lucene.example.facet.SimpleFacetsExample.java
License:Apache License
/** * User runs a query and counts facets only without collecting the matching documents. */// www .j a v a2 s . c om private List<FacetResult> facetsOnly() throws IOException { DirectoryReader indexReader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(indexReader); TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); FacetsCollector fc = new FacetsCollector(); // MatchAllDocsQuery is for "browsing" (counts facets // for all non-deleted docs in the index); normally // you'd use a "normal" query: searcher.search(new MatchAllDocsQuery(), fc); // Retrieve results List<FacetResult> results = new ArrayList<>(); // Count both "Publish Date" and "Author" dimensions Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, fc); results.add(facets.getTopChildren(10, "Author")); results.add(facets.getTopChildren(10, "Publish Date")); results.add(facets.getTopChildren(10, "Category")); indexReader.close(); taxoReader.close(); return results; }
From source file:com.czw.search.lucene.example.facet.SimpleFacetsExample.java
License:Apache License
/** * User drills down on 'Publish Date/2011', and we * return facets for 'Author'//from w ww . ja v a 2 s .c om */ private FacetResult drillDown() throws IOException { DirectoryReader indexReader = DirectoryReader.open(indexDir); IndexSearcher searcher = new IndexSearcher(indexReader); TaxonomyReader taxoReader = new DirectoryTaxonomyReader(taxoDir); // Passing no baseQuery means we drill down on all // documents ("browse only"): DrillDownQuery q = new DrillDownQuery(config); // Now user drills down on Publish Date/2010: q.add("Publish Date", "2010"); FacetsCollector fc = new FacetsCollector(); FacetsCollector.search(searcher, q, 10, fc); // Retrieve results Facets facets = new FastTaxonomyFacetCounts(taxoReader, config, fc); FacetResult result = facets.getTopChildren(10, "Author"); indexReader.close(); taxoReader.close(); return result; }