List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:cn.util.test.java
License:Open Source License
public static void main(String[] args) throws IOException { String lirePath = "F:\\Lire-0.9.5\\demo\\"; String indexPath = lirePath + "index"; String picsPath = lirePath + "pics"; String reco = lirePath + "test.jpg"; // Checking if arg[0] is there and if it is an image. BufferedImage img = null;/*w ww. ja v a2s .co m*/ File f = new File(reco); if (f.exists()) { try { img = ImageIO.read(f); } catch (IOException e) { e.printStackTrace(); //To change body of catch statement use File | Settings | File Templates. } } IndexReader ir = DirectoryReader.open(FSDirectory.open(Paths.get(indexPath))); ImageSearcher searcher = new GenericFastImageSearcher(5, CEDD.class); // ImageSearcher searcher = new GenericFastImageSearcher(30, AutoColorCorrelogram.class); // searching with a image file ... ImageSearchHits hits = searcher.search(img, ir); // searching with a Lucene document instance ... // ImageSearchHits hits = searcher.search(ir.document(0), ir); for (int i = 0; i < hits.length(); i++) { String fileName = ir.document(hits.documentID(i)).getValues(DocumentBuilder.FIELD_NAME_IDENTIFIER)[0]; if (hits.score(i) > 10)//??? { System.out.println(hits.score(i) + ": \t" + fileName); } } }
From source file:com.bdaum.zoom.lal.internal.lucene.Lucene.java
License:Open Source License
private static Document getDocumentById(IndexReader indexReader, String searchString) throws IOException { IndexSearcher indexSearcher = new IndexSearcher(indexReader); Analyzer analyzer = new KeywordAnalyzer(); QueryParser queryParser = new QueryParser(DocumentBuilder.FIELD_NAME_IDENTIFIER, analyzer); try {// www .java 2s. co m Query query = queryParser.parse(searchString); TopDocs topdocs = indexSearcher.search(query, 1); if (topdocs.totalHits > 0) return indexReader.document(topdocs.scoreDocs[0].doc); } catch (org.apache.lucene.queryparser.classic.ParseException e) { // should never happen } return null; }
From source file:com.browseengine.bobo.tools.CarDataDigest.java
License:Open Source License
public void digest(DataHandler handler) throws IOException { int numcars = getMaxDocs(); Random rand = new Random(); IndexReader reader = null; try {/*w w w . j a v a2 s . com*/ reader = IndexReader.open(FSDirectory.open(getDataFile()), true); int carcount = reader.maxDoc(); Document[] docCache = new Document[carcount]; for (int i = 0; i < carcount; ++i) { docCache[i] = reader.document(i); } for (int i = 0; i < numcars; ++i) { if (i != 0 && i % 1000 == 0) { System.out.println(i + " cars indexed."); } Document doc = new Document(); int n = rand.nextInt(10); if (n == 0) { makeCar(doc, cars[rand.nextInt(cars.length)]); } else { Document srcDoc = docCache[rand.nextInt(carcount)]; makeCar(doc, srcDoc); } populateDocument(doc, null); handler.handleDocument(doc); } } finally { if (reader != null) { reader.close(); } } }
From source file:com.browseengine.local.service.geoindex.GeoResourceWriter.java
License:Open Source License
public synchronized void optimize() throws IOException, GeoIndexingException { if (_writer != null) { if (_path != null) { _writer.optimize();//w ww .j a va 2s. c o m File path2 = new File(_path.getParentFile(), _path.getName() + ".tmp"); _writer.close(); _writer = null; if (_path.renameTo(path2)) { IndexReader reader = null; TermEnum termEnum = null; TermDocs termDocs = null; try { reader = IndexReader.open(path2); int maxDoc = reader.maxDoc(); if (maxDoc <= 0) { throw new GeoIndexingException("can't optimize an index with " + maxDoc + " docs"); } LonDocid[] lonDocids = new LonDocid[maxDoc]; String fld = GeoSearchFields.LON.getField().intern(); Term term = new Term(fld, ""); termEnum = reader.terms(term); termDocs = reader.termDocs(); while ((term = termEnum.term()) != null && term.field() == fld) { double lon = Double.parseDouble(term.text()); termDocs.seek(term); while (termDocs.next()) { int docid = termDocs.doc(); lonDocids[docid] = new LonDocid(docid, lon); } termEnum.next(); } termDocs.close(); termDocs = null; termEnum.close(); termEnum = null; Arrays.sort(lonDocids); init(_path, true); for (int i = 0; i < lonDocids.length; i++) { int docid = lonDocids[i].docid; Document doc = reader.document(docid); // all fields are stored String name = doc.get(GeoSearchFields.NAME.getField()); String description = doc.get(GeoSearchFields.DESCRIPTION.getField()); String addressStr = doc.get(GeoSearchFields.ADDRESS.getField()); String phoneStr = doc.get(GeoSearchFields.PHONE.getField()); long phoneNumber = LocalResource.NO_PHONE_NUMBER; if (phoneStr != null && phoneStr.length() > 0) { phoneNumber = Long.parseLong(phoneStr); } String lonStr = doc.get(GeoSearchFields.LON.getField()); double lon = Double.parseDouble(lonStr); String latStr = doc.get(GeoSearchFields.LAT.getField()); double lat = Double.parseDouble(latStr); LocalResource resource = new LocalResource(name, description, addressStr, phoneNumber, lon, lat); addResource(resource); } reader.close(); reader = null; _writer.optimize(); LOGGER.info("successfully completed optimization of index at " + _path.getAbsolutePath()); } finally { try { // erase the tmp dir recursiveDelete(path2); } finally { try { if (reader != null) { reader.close(); } } finally { try { if (termEnum != null) { termEnum.close(); } } finally { try { if (termDocs != null) { termDocs.close(); } } finally { reader = null; termDocs = null; termEnum = null; } } } } } } else { init(_path, false); throw new GeoIndexingException("trouble doing the rename from " + _path.getAbsolutePath() + " to " + path2.getAbsolutePath() + "; check permissions"); } } else { _writer.optimize(); } } else { throw new GeoIndexingException("attempt to optimize a closed " + GeoResourceWriter.class.getName()); } }
From source file:com.chimpler.example.FacetLuceneAdvancedSearcher.java
License:Apache License
public static void main(String args[]) throws Exception { if (args.length != 5) { System.err.println(// w w w . j a va 2s . c om "Parameters: [index directory] [taxonomy directory] [query] [field drilldown] [value drilldown]"); System.exit(1); } String indexDirectory = args[0]; String taxonomyDirectory = args[1]; String query = args[2]; String fieldDrilldown = args[3]; String valueDrilldown = args[4]; IndexReader indexReader = DirectoryReader.open(FSDirectory.open(new File(indexDirectory))); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TaxonomyReader taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.open(new File(taxonomyDirectory))); CategoryPath drillDownCategoryPath = new CategoryPath(fieldDrilldown + "/" + valueDrilldown, '/'); FacetSearchParams searchParams = new FacetSearchParams(); searchParams.facetRequests.add(new CountFacetRequest(new CategoryPath("author"), 100)); searchParams.facetRequests.add(new CountFacetRequest(new CategoryPath("book_category"), 100)); searchParams.facetRequests.add(new CountFacetRequest(drillDownCategoryPath, 100)); ComplexPhraseQueryParser queryParser = new ComplexPhraseQueryParser(LUCENE_VERSION, "title", new StandardAnalyzer(LUCENE_VERSION)); Query luceneQuery = queryParser.parse(query); //luceneQuery = DrillDownQuery.query(luceneQuery, drillDownCategoryPath); // Collectors to get top results and facets TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, true); FacetsCollector facetsCollector = FacetsCollector.create(searchParams, indexReader, taxonomyReader); indexSearcher.search(luceneQuery, MultiCollector.wrap(topScoreDocCollector, facetsCollector)); System.out.println("Found:"); for (ScoreDoc scoreDoc : topScoreDocCollector.topDocs().scoreDocs) { Document document = indexReader.document(scoreDoc.doc); System.out.printf("- book: id=%s, title=%s, book_category=%s, authors=%s, score=%f\n", document.get("id"), document.get("title"), document.get("book_category"), document.get("authors"), scoreDoc.score); } System.out.println("Facets:"); for (FacetResult facetResult : facetsCollector.getFacetResults()) { System.out.println("- " + facetResult.getFacetResultNode().label); for (FacetResultNode facetResultNode : facetResult.getFacetResultNode().subResults) { System.out.printf(" - %s (%f)\n", facetResultNode.label.toString(), facetResultNode.value); for (FacetResultNode subFacetResultNode : facetResultNode.subResults) { System.out.printf(" - %s (%f)\n", subFacetResultNode.label.toString(), subFacetResultNode.value); } } } taxonomyReader.close(); indexReader.close(); }
From source file:com.chimpler.example.FacetLuceneIndexer.java
License:Apache License
public static void main(String args[]) throws Exception { // if (args.length != 3) { // System.err.println("Parameters: [index directory] [taxonomy directory] [json file]"); // System.exit(1); // }/* w w w .ja va2 s.c o m*/ String indexDirectory = "index"; String taxonomyDirectory = "taxonomy"; String jsonFileName = "/home/qiuqiang/workspace/facet-lucene-example/books.json"; IndexWriterConfig writerConfig = new IndexWriterConfig(LUCENE_VERSION, new WhitespaceAnalyzer(LUCENE_VERSION)); writerConfig.setOpenMode(OpenMode.APPEND); IndexWriter indexWriter = new IndexWriter(FSDirectory.open(new File(indexDirectory)), writerConfig); TaxonomyWriter taxonomyWriter = new DirectoryTaxonomyWriter(MMapDirectory.open(new File(taxonomyDirectory)), OpenMode.APPEND); TaxonomyReader taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.open(new File(taxonomyDirectory))); String content = IOUtils.toString(new FileInputStream(jsonFileName)); JSONArray bookArray = new JSONArray(content); Field idField = new IntField("id", 0, Store.YES); Field titleField = new TextField("title", "", Store.YES); Field authorsField = new TextField("authors", "", Store.YES); Field bookCategoryField = new TextField("book_category", "", Store.YES); indexWriter.deleteAll(); FacetFields facetFields = new FacetFields(taxonomyWriter); for (int i = 0; i < bookArray.length(); i++) { Document document = new Document(); JSONObject book = bookArray.getJSONObject(i); int id = book.getInt("id"); String title = book.getString("title"); String bookCategory = book.getString("book_category"); List<CategoryPath> categoryPaths = new ArrayList<CategoryPath>(); String authorsString = ""; JSONArray authors = book.getJSONArray("authors"); for (int j = 0; j < authors.length(); j++) { String author = authors.getString(j); if (j > 0) { authorsString += ", "; } categoryPaths.add(new CategoryPath("author", author)); authorsString += author; } categoryPaths.add(new CategoryPath("book_category" + bookCategory, '/')); idField.setIntValue(id); titleField.setStringValue(title); authorsField.setStringValue(authorsString); bookCategoryField.setStringValue(bookCategory); facetFields.addFields(document, categoryPaths); document.add(idField); document.add(titleField); document.add(authorsField); document.add(bookCategoryField); indexWriter.addDocument(document); System.out.printf("Book: id=%d, title=%s, book_category=%s, authors=%s\n", id, title, bookCategory, authors); } taxonomyWriter.prepareCommit(); try { taxonomyWriter.commit(); } catch (Exception e) { taxonomyWriter.rollback(); } // taxonomyWriter.close(); // // indexWriter.commit(); // indexWriter.close(); String query = "story"; IndexReader indexReader = DirectoryReader.open(indexWriter, false); IndexReader indexReader2 = DirectoryReader.open(indexWriter, false); System.out.println(indexReader == indexReader2); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TaxonomyReader newTaxonomyReader = DirectoryTaxonomyReader.openIfChanged(taxonomyReader); if (newTaxonomyReader != null) { TaxonomyReader tmp = taxonomyReader; taxonomyReader = newTaxonomyReader; tmp.close(); } else { System.out.println("null"); } ArrayList<FacetRequest> facetRequests = new ArrayList<FacetRequest>(); facetRequests.add(new CountFacetRequest(new CategoryPath("author"), 100)); facetRequests.add(new CountFacetRequest(new CategoryPath("book_category"), 100)); FacetSearchParams searchParams = new FacetSearchParams(facetRequests); ComplexPhraseQueryParser queryParser = new ComplexPhraseQueryParser(LUCENE_VERSION, "title", new StandardAnalyzer(LUCENE_VERSION)); Query luceneQuery = queryParser.parse(query); // Collectors to get top results and facets TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, true); FacetsCollector facetsCollector = FacetsCollector.create(searchParams, indexReader, taxonomyReader); indexSearcher.search(luceneQuery, MultiCollector.wrap(topScoreDocCollector, facetsCollector)); System.out.println("Found:"); for (ScoreDoc scoreDoc : topScoreDocCollector.topDocs().scoreDocs) { Document document = indexReader.document(scoreDoc.doc); System.out.printf("- book: id=%s, title=%s, book_category=%s, authors=%s, score=%f\n", document.get("id"), document.get("title"), document.get("book_category"), document.get("authors"), scoreDoc.score); } System.out.println("Facets:"); for (FacetResult facetResult : facetsCollector.getFacetResults()) { System.out.println("- " + facetResult.getFacetResultNode().label); for (FacetResultNode facetResultNode : facetResult.getFacetResultNode().subResults) { System.out.printf(" - %s (%f)\n", facetResultNode.label.toString(), facetResultNode.value); for (FacetResultNode subFacetResultNode : facetResultNode.subResults) { System.out.printf(" - %s (%f)\n", subFacetResultNode.label.toString(), subFacetResultNode.value); } } } taxonomyReader.close(); indexReader.close(); taxonomyWriter.commit(); taxonomyWriter.close(); indexWriter.commit(); indexWriter.close(); }
From source file:com.chimpler.example.FacetLuceneSearcher.java
License:Apache License
public static void main(String args[]) throws Exception { // if (args.length != 3) { // System.err.println("Parameters: [index directory] [taxonomy directory] [query]"); // System.exit(1); // }// w w w. j a v a 2 s . co m String indexDirectory = "index"; String taxonomyDirectory = "taxonomy"; String query = "story"; IndexReader indexReader = DirectoryReader.open(FSDirectory.open(new File(indexDirectory))); IndexSearcher indexSearcher = new IndexSearcher(indexReader); TaxonomyReader taxonomyReader = new DirectoryTaxonomyReader(FSDirectory.open(new File(taxonomyDirectory))); ArrayList<FacetRequest> facetRequests = new ArrayList<FacetRequest>(); facetRequests.add(new CountFacetRequest(new CategoryPath("author"), 100)); facetRequests.add(new CountFacetRequest(new CategoryPath("book_category"), 100)); FacetSearchParams searchParams = new FacetSearchParams(facetRequests); ComplexPhraseQueryParser queryParser = new ComplexPhraseQueryParser(LUCENE_VERSION, "title", new StandardAnalyzer(LUCENE_VERSION)); Query luceneQuery = queryParser.parse(query); // Collectors to get top results and facets TopScoreDocCollector topScoreDocCollector = TopScoreDocCollector.create(10, true); FacetsCollector facetsCollector = FacetsCollector.create(searchParams, indexReader, taxonomyReader); indexSearcher.search(luceneQuery, MultiCollector.wrap(topScoreDocCollector, facetsCollector)); System.out.println("Found:"); for (ScoreDoc scoreDoc : topScoreDocCollector.topDocs().scoreDocs) { Document document = indexReader.document(scoreDoc.doc); System.out.printf("- book: id=%s, title=%s, book_category=%s, authors=%s, score=%f\n", document.get("id"), document.get("title"), document.get("book_category"), document.get("authors"), scoreDoc.score); } System.out.println("Facets:"); for (FacetResult facetResult : facetsCollector.getFacetResults()) { System.out.println("- " + facetResult.getFacetResultNode().label); for (FacetResultNode facetResultNode : facetResult.getFacetResultNode().subResults) { System.out.printf(" - %s (%f)\n", facetResultNode.label.toString(), facetResultNode.value); for (FacetResultNode subFacetResultNode : facetResultNode.subResults) { System.out.printf(" - %s (%f)\n", subFacetResultNode.label.toString(), subFacetResultNode.value); } } } taxonomyReader.close(); indexReader.close(); }
From source file:com.datasalt.pangool.solr.TestSolrOutputFormat.java
License:Apache License
@Test public void test() throws Exception { trash(OUTPUT);/*from w ww .ja va2 s. c om*/ TupleSolrOutputFormatExample example = new TupleSolrOutputFormatExample(); example.run(INPUT, OUTPUT, getConf()); // Assert that indexes have been created assertTrue(new File(OUTPUT + "/part-00000/data/index").exists()); assertTrue(new File(OUTPUT + "/FR/part-00000/data/index").exists()); assertTrue(new File(OUTPUT + "/ES/part-00000/data/index").exists()); // Validate data inside index IndexReader r = IndexReader.open(FSDirectory.open(new File(OUTPUT + "/part-00000/data/index"))); assertEquals(2, r.maxDoc()); int contentAssertions = 0; Set<String> distinctMessages = new HashSet<String>(); for (int i = 0; i < 2; i++) { String document = r.document(i).toString(); distinctMessages.add(document); if (document.contains("user_id:user1")) { assertTrue(document.contains("Hi, this is a message from user1.")); contentAssertions++; } else if (document.contains("user_id:user4")) { assertTrue(document.contains("Hi, this is another message from user4.")); contentAssertions++; } } assertEquals(2, distinctMessages.size()); assertEquals(2, contentAssertions); r = IndexReader.open(FSDirectory.open(new File(OUTPUT + "/FR/part-00000/data/index"))); assertEquals(1, r.maxDoc()); String document = r.document(0).toString(); assertTrue(document.contains("user_id:user3")); assertTrue(document.contains("Oh la l!")); r = IndexReader.open(FSDirectory.open(new File(OUTPUT + "/ES/part-00000/data/index"))); assertEquals(1, r.maxDoc()); document = r.document(0).toString(); assertTrue(document.contains("user_id:user2")); assertTrue(document.contains("Yo no hablo ingls.")); document = r.document(0).toString(); trash(OUTPUT); }
From source file:com.doculibre.constellio.services.ImportExportServicesImpl.java
License:Open Source License
@SuppressWarnings("unchecked") @Override// w w w. ja v a 2s .c o m public void importData(Directory directory, RecordCollection collection, ProgressInfo progressInfo) { try { ConnectorInstance connectorInstance = collection.getConnectorInstances().iterator().next(); RecordServices recordServices = ConstellioSpringUtils.getRecordServices(); String uniqueKeyMetaName = null; IndexField uniqueKeyIndexField = collection.getUniqueKeyIndexField(); for (ConnectorInstanceMeta connectorInstanceMeta : uniqueKeyIndexField.getConnectorInstanceMetas()) { if (connectorInstance.equals(connectorInstanceMeta.getConnectorInstance())) { uniqueKeyMetaName = connectorInstanceMeta.getName(); break; } } Pattern invalidDatePattern = Pattern .compile("^[0-9]{4}-[0-9]{2}-[0-9]{2}T[0-9]{2}:[0-9]{2}:[0-9]{2}(\\.[0-9]*)?$"); IndexReader indexReader = DirectoryReader.open(directory); if (progressInfo != null) { progressInfo.setTotal(indexReader.numDocs()); } for (int i = 0; i < indexReader.numDocs(); i++) { Document document = indexReader.document(i); Record record = new Record(); record.setLastModified(new Date()); record.setConnectorInstance(connectorInstance); for (IndexableField field : document.getFields()) { // for (String fieldName : (Collection<String>) // indexReader.getFieldNames(FieldOption.ALL)) { if (field != null && field.fieldType().stored() && field.binaryValue() == null) { String metaName = field.name(); String metaContent = field.stringValue(); Matcher invalidDateMatcher = invalidDatePattern.matcher(metaContent); if (invalidDateMatcher.matches()) { metaContent = metaContent + "Z"; } if (uniqueKeyMetaName.equals(metaName)) { record.setUrl(metaContent); } RecordMeta meta = new RecordMeta(); ConnectorInstanceMeta connectorInstanceMeta = connectorInstance.getOrCreateMeta(metaName); meta.setConnectorInstanceMeta(connectorInstanceMeta); meta.setRecord(record); meta.setContent(metaContent); record.addContentMeta(meta); } } try { recordServices.makePersistent(record); // if (i % 500 == 0) { // EntityManager entityManager = // ConstellioPersistenceContext.getCurrentEntityManager(); // entityManager.getTransaction().commit(); // entityManager.getTransaction().begin(); // } } catch (Exception e) { e.printStackTrace(); } if (progressInfo != null) { progressInfo.setCurrentIndex(i); } } indexReader.close(); } catch (IOException e) { throw new RuntimeException(e); } }
From source file:com.doculibre.constellio.services.ImportExportServicesImpl.java
License:Open Source License
@SuppressWarnings("unchecked") @Override/*from ww w.j a va2s . c o m*/ public void convertData(Directory directory, OutputStream output) { try { WritableWorkbook workbook = Workbook.createWorkbook(output); WritableSheet sheet = workbook.createSheet("fields", 0); WritableFont arial10font = new WritableFont(WritableFont.ARIAL, 10); WritableCellFormat arial10format = new WritableCellFormat(arial10font); IndexReader indexReader = DirectoryReader.open(directory); // { // int column = 0; // for (String fieldName : (Collection<String>) indexReader.getFieldNames()) { // Label label = new Label(column, 0, fieldName, arial10format); // sheet.addCell(label); // column++; // } // } int row = 1; for (int i = 0; i < indexReader.numDocs() /* && i != 502 */; i++) { Document document = indexReader.document(i); int column = 0; for (IndexableField field : document.getFields()) { if (row == 1) { Label label = new Label(column, 0, field.name(), arial10format); sheet.addCell(label); } if (field != null && field.fieldType().stored() && field.binaryValue() == null) { String indexedContent = field.stringValue(); indexedContent = convertText(indexedContent); Label label = new Label(column, row, indexedContent, arial10format); sheet.addCell(label); } column++; } row++; // if (i == 502) { // break; // } } indexReader.close(); workbook.write(); workbook.close(); } catch (IOException e) { throw new RuntimeException(e); } catch (RowsExceededException e) { throw new RuntimeException(e); } catch (WriteException e) { throw new RuntimeException(e); } }