List of usage examples for org.apache.lucene.index IndexReader document
public final Document document(int docID) throws IOException
n
th Document
in this index. From source file:com.downtree.tourbus.search.LocationFilter.java
@Override public BitSet bits(IndexReader reader) throws IOException { BitSet bits = new BitSet(reader.maxDoc()); TermDocs docs = reader.termDocs(new Term("ferret_class", m_type)); while (docs.next()) { Document doc = reader.document(docs.doc()); String value = doc.get("latitude"); if (value == null) continue; try {// ww w. j a v a2 s .c o m double latitude = Double.parseDouble(value) * DEG2RAD; double longitude = Double.parseDouble(doc.get("longitude")) * DEG2RAD; double x = (Math.sin(latitude) * Math.sin(m_centerLat)) + (Math.cos(latitude) * Math.cos(m_centerLat) * Math.cos(longitude - m_centerLong)); double distance = 0; if (x > -1 && x < 1) { distance = Math.acos(x) * EARTH_RADIUS; } if (distance <= m_radius) { bits.set(docs.doc()); } } catch (Exception e) { SolrException.logOnce(SolrCore.log, "Error in location filter", e); continue; } } return bits; }
From source file:com.esri.gpt.catalog.lucene.LuceneQueryAdapter.java
License:Apache License
/** * Executes a query against a Lucene index. * @param discoveryQuery the query to execute *//*w w w. j av a 2s. c o m*/ protected void executeQuery(DiscoveryQuery discoveryQuery) throws DiscoveryException, ParseException, CorruptIndexException, IOException { IndexSearcher searcher = null; try { // initialize searcher = getIndexAdapter().newSearcher(); this.maxDoc = searcher.maxDoc(); boolean bExecuteQuery = true; boolean bProcessHits = true; RequestContext reqContext = this.getIndexAdapter().getRequestContext(); BooleanQuery rootQuery = new BooleanQuery(); DiscoveryFilter discoveryFilter = discoveryQuery.getFilter(); DiscoveryResult discoveryResult = discoveryQuery.getResult(); Discoverables returnables = discoveryQuery.getReturnables(); if ((returnables == null) || (returnables.size() == 0) || (discoveryFilter.getMaxRecords() <= 0)) { bProcessHits = false; } // CSW query provider options boolean isDublinCoreResponse = true; boolean isBriefResponse = false; boolean isSummaryResponse = false; QueryOptions cswQueryOptions = (QueryOptions) reqContext.getObjectMap() .get("com.esri.gpt.server.csw.provider.components.QueryOptions"); // build the query (if no query was supplied, we'll query everything) LogicalClauseAdapter logicalAdapter = new LogicalClauseAdapter(this); LogicalClause rootClause = discoveryFilter.getRootClause(); if ((rootClause == null) || (rootClause.getClauses().size() == 0)) { if (discoveryFilter.getMaxRecords() <= QUERYALL_THRESHOLD) { LOGGER.finer("No filter was supplied, querying all..."); logicalAdapter.appendSelectAll(rootQuery); } else { LOGGER.finer("No filter was supplied, query will not be executed."); bExecuteQuery = false; } } else { logicalAdapter.adaptLogicalClause(rootQuery, rootClause); if ((rootQuery.clauses() == null) && (rootQuery.clauses().size() > 0)) { bExecuteQuery = false; } } if (!bExecuteQuery) return; // execute the query and process the hits if required // set the sort option Sort sortOption = null; if (bProcessHits && (searcher.maxDoc() > 0)) { sortOption = makeSortOption(discoveryQuery); } // filters Filter filter = null; // make the access control filter MetadataAcl acl = new MetadataAcl(reqContext); AuthenticationStatus auth = reqContext.getUser().getAuthenticationStatus(); boolean bAdmin = auth.getAuthenticatedRoles().hasRole("gptAdministrator"); if (!bAdmin && !acl.isPolicyUnrestricted()) { String[] aclValues = acl.makeUserAcl(); filter = new AclFilter(Storeables.FIELD_ACL, aclValues); } // isPartOf filter filter = IsPartOfFilter.make(reqContext, filter); // make the schema filter if (cswQueryOptions != null) { String schemaName = Val.chkStr(cswQueryOptions.getSchemaFilter()); if (schemaName.length() > 0) { filter = new SchemaFilter(schemaName, filter); isDublinCoreResponse = cswQueryOptions.isDublinCoreResponse(); if (!isDublinCoreResponse) { String elementSetType = Val.chkStr(cswQueryOptions.getElementSetType()); if (elementSetType.equalsIgnoreCase("brief")) { isBriefResponse = true; } else if (elementSetType.equalsIgnoreCase("summary")) { isSummaryResponse = true; } } } } // determine the start/end positions int startRecord = discoveryFilter.getStartRecord() - 1; int maxRecords = discoveryFilter.getMaxRecords(); if (startRecord < 0) startRecord = 0; int recordsPerPage = maxRecords; if (recordsPerPage <= 0) recordsPerPage = 1; int hitsToReturn = startRecord + recordsPerPage; int nextRecord = 0; int numDocs = 0; // execute the query LOGGER.finer("Executing Lucene Query:\n" + rootQuery); TopDocs topDocs = null; if (filter != null) { if (sortOption != null) { topDocs = searcher.search(rootQuery, filter, hitsToReturn, sortOption); } else { topDocs = searcher.search(rootQuery, filter, hitsToReturn); } } else { if (sortOption != null) { topDocs = searcher.search(rootQuery, filter, hitsToReturn, sortOption); } else { topDocs = searcher.search(rootQuery, hitsToReturn); } } // determine the hit count int totalHits = topDocs.totalHits; ScoreDoc[] scoreDocs = topDocs.scoreDocs; if ((scoreDocs != null) && (scoreDocs.length) > 0) { numDocs = scoreDocs.length; if (totalHits > numDocs) { nextRecord = numDocs + 1; } } discoveryResult.setNumberOfHits(totalHits); LOGGER.finer("Total query hits: " + totalHits); if (startRecord > (totalHits - 1)) bProcessHits = false; if (maxRecords <= 0) bProcessHits = false; int nTotal = numDocs - startRecord; if (!bProcessHits) return; // warn if many records were requested if (nTotal >= TOOMANY_WARNING_THRESHOLD) { LOGGER.warning("A request to process " + nTotal + " discovery records was recieved and will be exceuted.\n" + discoveryQuery.toString()); } // process the hits, build the results LOGGER.finer("Processing " + nTotal + " records from: " + (startRecord + 1) + " to: " + numDocs); Storeable storeable; DiscoveredRecords records = discoveryResult.getRecords(); IndexReader reader = searcher.getIndexReader(); for (int i = startRecord; i < numDocs; i++) { ScoreDoc scoreDoc = scoreDocs[i]; Document document = reader.document(scoreDoc.doc); DiscoveredRecord record = new DiscoveredRecord(); // Dublin Core based responses if (isDublinCoreResponse) { for (Discoverable target : returnables) { ArrayList<Object> values = new ArrayList<Object>(); storeable = (Storeable) target.getStorable(); if (storeable instanceof AnyTextProperty) { values = null; } else if (storeable instanceof GeometryProperty) { GeometryProperty geom = (GeometryProperty) storeable; values.add(geom.readEnvelope(document)); } else if (target.getMeaning().getMeaningType().equals(PropertyMeaningType.XMLURL)) { String uuid = document.get(Storeables.FIELD_UUID); uuid = URLEncoder.encode(uuid, "UTF-8"); values.add("?getxml=" + uuid); } else { DatastoreField retrievalField = storeable.getRetrievalField(); Field[] fields = document.getFields(retrievalField.getName()); if (fields != null) { for (Field f : fields) { Object value = retrievalField.makeValueToReturn(f.stringValue()); values.add(value); } } } if (values != null) { Object[] oValues = null; if (values.size() >= 0) oValues = values.toArray(); record.addField(target, oValues); } } // non Dublin Core based responses } else { String responseXml = null; if (isBriefResponse && (responseXml == null)) { Field field = document.getField(Storeables.FIELD_XML_BRIEF); if (field != null) { responseXml = field.stringValue(); } } else if (isSummaryResponse && (responseXml == null)) { Field field = document.getField(Storeables.FIELD_XML_SUMMARY); if (field != null) { responseXml = field.stringValue(); } } else if (responseXml == null) { Field field = document.getField(Storeables.FIELD_XML); if (field != null) { responseXml = field.stringValue(); } } record.setResponseXml(responseXml); } records.add(record); } int nPopulated = records.size(); LOGGER.finer("Populated " + nPopulated + " records."); } finally { getIndexAdapter().closeSearcher(searcher); } }
From source file:com.esri.gpt.server.assertion.handler.AsnCommentHandler.java
License:Apache License
/** * Queries comments.//from w w w . j a v a 2 s. c o m * @param context the assertion operation context * @throws Exception if an exception occurs */ private void query(AsnContext context) throws Exception { // initialize AsnOperation operation = context.getOperation(); AsnAssertionSet asnSet = operation.getAssertionSet(); AsnValueType vType = asnSet.getValueType(); String subject = operation.getSubject().getURN(); String predicate = vType.getRdfPredicate(); // build a query to match all occurrences of the subject/predicate pair BooleanQuery query = new BooleanQuery(); Query qSubject = new TermQuery(new Term(AsnConstants.FIELD_RDF_SUBJECT, subject)); Query qPredicate = new TermQuery(new Term(AsnConstants.FIELD_RDF_PREDICATE, predicate)); query.add(qSubject, BooleanClause.Occur.MUST); query.add(qPredicate, BooleanClause.Occur.MUST); // sort on descending timestamp String tsField = AsnConstants.FIELD_SYS_TIMESTAMP; Sort sortOption = new Sort(new SortField(tsField, SortField.STRING, true)); // determine the start and end positions int startRecord = context.getRequestOptions().getStartRecord() - 1; int maxRecords = context.getRequestOptions().getMaxRecords(); if (startRecord < 0) startRecord = 0; int recordsPerPage = maxRecords; if (recordsPerPage <= 0) recordsPerPage = 1; int hitsToReturn = startRecord + recordsPerPage; int nextRecord = 0; int numDocs = 0; IndexReader reader = null; IndexSearcher searcher = null; try { // make the reader and searcher, execute the search reader = this.getIndexAdapter().makeIndexReader(); searcher = new IndexSearcher(reader); TopDocs topDocs = searcher.search(query, null, hitsToReturn, sortOption); ScoreDoc[] scoreDocs = null; int totalHits = topDocs.totalHits; if (maxRecords > 0) { scoreDocs = topDocs.scoreDocs; if ((scoreDocs != null) && (scoreDocs.length) > 0) { numDocs = scoreDocs.length; if (totalHits > numDocs) { nextRecord = numDocs + 1; } } } // root property for the response String rootSubject = subject; String roorPredicate = operation.getPredicate().getURN() + "response"; AsnProperty rootProp = new AsnProperty(rootSubject, roorPredicate, null); // hit count and next record String queryPfx = asnSet.getURNPrefix() + ":query"; rootProp.getChildren().add(new AsnProperty(null, queryPfx + ":hits", "" + totalHits)); if (nextRecord > 0) { rootProp.getChildren().add(new AsnProperty(null, queryPfx + ":nextRecord", "" + nextRecord)); } // canCreate capability for the active user String canCreatePred = asnSet.getURNPrefix() + ":activeUser:canCreate"; String canCreateVal = "" + context.getAuthorizer().canCreate(context, asnSet.getAuthPolicy()); rootProp.getChildren().add(new AsnProperty(null, canCreatePred, canCreateVal)); // process the documents, generate the response AsnAssertionRenderer renderer = new AsnAssertionRenderer(); for (int i = startRecord; i < numDocs; i++) { Document document = reader.document(scoreDocs[i].doc); Assertion assertion = asnSet.newAssertion(context, false); assertion.load(document); rootProp.getChildren().add(renderer.makeProperty(context, assertion)); } context.getOperationResponse().generateResponse(context, rootProp.getChildren()); } finally { this.getIndexAdapter().closeReader(reader); this.getIndexAdapter().closeSearcher(searcher); } }
From source file:com.esri.gpt.server.assertion.index.AsnIndexAdapter.java
License:Apache License
/** * Loads an assertion based upon the subject id of the active operation. * @param context the assertion operation context * @param mustExist <code>true</code> true if the assertion must exist * @return the assertion (null if not found) * @throws CorruptIndexException if the index is corrupt * @throws IOException if an I/O exception occurs * @throws AsnInvalidOperationException if mustExist and the assertion was not found *///from ww w. j ava 2 s.c o m public Assertion loadAssertionById(AsnContext context, boolean mustExist) throws CorruptIndexException, IOException, AsnInvalidOperationException { TermDocs termDocs = null; IndexReader reader = null; AsnOperation operation = context.getOperation(); try { String assertionId = Val.chkStr(operation.getSubject().getValuePart()); if (assertionId.length() > 0) { reader = this.makeIndexReader(); termDocs = reader.termDocs(); termDocs.seek(new Term(AsnConstants.FIELD_SYS_ASSERTIONID, assertionId)); if (termDocs.next()) { Document document = reader.document(termDocs.doc()); Assertion assertion = operation.getAssertionSet().newAssertion(context, false); assertion.load(document); return assertion; } } } finally { try { if (termDocs != null) termDocs.close(); } catch (Exception ef) { } this.closeReader(reader); } if (mustExist) { String msg = "This assertion id was not found - " + operation.getSubject().getURN(); throw new AsnInvalidOperationException(msg); } return null; }
From source file:com.flaptor.hounder.indexer.IndexManager.java
License:Apache License
/** * Searchs all the index to find the largest AddId. * @return the largest AddId found in the index. *//* w w w . jav a2 s .c om*/ private long findLargestAddId() { long max = 1; IndexReader reader = null; try { reader = workIndex.getReader(); int num = reader.maxDoc(); for (int i = 0; i < num; i++) { if (!reader.isDeleted(i)) { String val = reader.document(i).get("AddId"); if (null != val) { long n = new Long(val).longValue(); if (max < n) { max = n; } } } } } catch (IOException e) { logger.fatal("Could not read from the index to get the last AddId." + e); throw new RuntimeException("Error reading the index when looking for initial AddId.", e); } finally { Execute.close(reader, logger); } logger.debug("Largest AddId found: " + max); return max; }
From source file:com.flaptor.hounder.indexer.LuceneUnicodeTest.java
License:Apache License
/** * The only Test case./*from w w w . j a v a 2s . c om*/ * Generates a bunch of interesting test strings, indexes them and then * gets them from the index to compare with the original. */ @TestInfo(testType = TestInfo.TestType.UNIT) public void testStoredContent() { try { String testString = getTestString(); logger.debug("Using test string: " + testString); Document doc = new Document(); doc.add(new Field("field1", testString, Field.Store.YES, Field.Index.NOT_ANALYZED)); writer.addDocument(doc); writer.optimize(); writer.close(); IndexReader reader = IndexReader.open(dir); Document doc2 = reader.document(0); String recoveredString = doc2.get("field1"); logger.debug("Recovered String: " + recoveredString); assertTrue("Strings do not match", testString.equals(recoveredString)); } catch (Exception e) { logger.error("Exception caught:" + e); assertTrue("exception", false); } }
From source file:com.flaptor.hounder.util.Idx.java
License:Apache License
public static void main(String arg[]) throws Exception { check(arg.length > 1, null);/*from w ww . j a va 2 s . c o m*/ String cmd = arg[0]; File idx = new File(arg[1]); if ("list".equals(cmd)) { int num = (arg.length > 2) ? Integer.parseInt(arg[2]) : -1; check(idx.exists(), "Index dir not found"); IndexReader reader = IndexReader.open(idx); int docs = reader.numDocs(); int max = reader.maxDoc(); System.err.println("Index contains " + docs + " documents plus " + (max - docs) + " deleted."); if (num > -1) { if (num == 0) num = docs; for (int i = 0; i < max && i < num; i++) { System.out.println("----------------------------------------"); if (!reader.isDeleted(i)) { Document doc = reader.document(i); List flds = doc.getFields(); Iterator iter = flds.iterator(); while (iter.hasNext()) { Field fld = (Field) iter.next(); String attr = (fld.isIndexed() ? ",i" : "") + (fld.isStored() ? ",s" : "") + (fld.isTokenized() ? ",t" : ""); System.out.println(fld.name() + attr + ": " + fld.stringValue()); } } } reader.close(); System.out.println(); } } else if ("search".equals(cmd)) { check(idx.exists(), "Index dir not found"); check(arg.length > 3, "Not enough arguments"); String field = arg[2]; String value = arg[3]; IndexSearcher searcher = new IndexSearcher(IndexReader.open(idx)); ScorelessHitCollector collector = new HashSetScorelessHitCollector(); searcher.search(new TermQuery(new Term(field, value)), collector); Set<Integer> docIds = collector.getMatchingDocuments(); System.out.println("\nNumber of hits: " + docIds.size() + "\n"); for (Integer docId : docIds) { Document doc = searcher.doc(docId); List flds = doc.getFields(); Iterator iter = flds.iterator(); while (iter.hasNext()) { Field fld = (Field) iter.next(); System.out.println(fld.name() + ": " + fld.stringValue()); } } searcher.close(); System.out.println(); } else if ("delete".equals(cmd)) { check(idx.exists(), "Index dir not found"); check(arg.length > 3, "Not enough arguments"); String field = arg[2]; String value = arg[3]; IndexReader reader = IndexReader.open(idx); reader.deleteDocuments(new Term(field, value)); reader.close(); } else if ("optimize".equals(cmd)) { IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.optimize(); writer.close(); } else if ("merge".equals(cmd)) { check(arg.length == 3, "not enough parameters"); File idx2 = new File(arg[2]); check(idx.exists(), "Index dir 1 not found"); check(idx2.exists(), "Index dir 2 not found"); IndexReader reader = IndexReader.open(idx2); IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.addIndexes(new IndexReader[] { reader }); writer.close(); reader.close(); } else if ("term-count".equals(cmd)) { check(arg.length == 3, "not enough parameters"); check(idx.exists(), "Index dir not found"); IndexReader reader = IndexReader.open(idx); String field = arg[2]; int count = 0; TermEnum terms = reader.terms(); while (terms.next()) { Term term = terms.term(); if (term.field().equals(field)) count++; } terms.close(); reader.close(); System.out.println("Found " + count + " different values for field " + field); } else if ("hit-count".equals(cmd)) { check(arg.length > 3, "Not enough arguments"); check(idx.exists(), "Index dir not found"); String field = arg[2]; String value = arg[3]; IndexSearcher searcher = new IndexSearcher(IndexReader.open(idx)); CountingHitCollector collector = new CountingHitCollector(); searcher.search(new TermQuery(new Term(field, value)), collector); System.out.println("\nNumber of hits: " + collector.getDocCount() + "\n"); searcher.close(); } else if ("uncompound".equals(cmd)) { IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.setUseCompoundFile(false); writer.optimize(); writer.close(); } else if ("compound".equals(cmd)) { IndexWriter writer = new IndexWriter(idx, new StandardAnalyzer(), false, IndexWriter.MaxFieldLength.UNLIMITED); writer.setUseCompoundFile(true); writer.optimize(); writer.close(); } else if ("terms".equals(cmd)) { check(arg.length == 3, "not enough parameters"); check(idx.exists(), "Index dir not found"); String field = arg[2]; IndexReader reader = IndexReader.open(idx); TermEnum terms = reader.terms(); while (terms.next()) { Term t = terms.term(); if (t.field().equals(field)) { System.out.println(t.text()); } } } }
From source file:com.github.flaxsearch.resources.DocumentResource.java
License:Apache License
@GET public DocumentData getDocument(@QueryParam("segment") Integer segment, @PathParam("docId") int doc) throws IOException { IndexReader reader = segment == null ? readerManager.getIndexReader() : readerManager.getLeafReader(segment); if (doc < 0 || doc > reader.maxDoc()) { throw new WebApplicationException("Unknown document " + doc, Response.Status.NOT_FOUND); }//from ww w.j a v a2 s . co m Document document = reader.document(doc); return new DocumentData(document); }
From source file:com.github.rnewson.couchdb.lucene.Progress.java
License:Apache License
public void load(final IndexReader reader) throws IOException { progress = newDocument();/*from w w w . j a v a 2s . c om*/ final TermDocs termDocs = reader.termDocs(PROGRESS_TERM); try { while (termDocs.next()) { final int doc = termDocs.doc(); if (!reader.isDeleted(doc)) { progress = reader.document(doc); } } } finally { termDocs.close(); } }
From source file:com.github.tteofili.looseen.MinHashClassifier.java
License:Apache License
public MinHashClassifier(IndexReader reader, String textField, String categoryField, int min, int hashCount, int hashSize) { this.min = min; this.hashCount = hashCount; this.hashSize = hashSize; try {/*from w w w.j a v a 2 s . c om*/ Analyzer analyzer = createMinHashAnalyzer(min, hashCount, hashSize); IndexWriterConfig config = new IndexWriterConfig(analyzer); directory = new RAMDirectory(); IndexWriter writer = new IndexWriter(directory, config); for (int i = 0; i < reader.maxDoc(); i++) { Document document = new Document(); Document d = reader.document(i); String textValue = d.getField(textField).stringValue(); String categoryValue = d.getField(categoryField).stringValue(); document.add(new TextField(TEXT_FIELD, textValue, Field.Store.NO)); document.add(new StringField(CLASS_FIELD, categoryValue, Field.Store.YES)); writer.addDocument(document); } writer.commit(); writer.close(); } catch (IOException e) { throw new RuntimeException(e); } BooleanQuery.setMaxClauseCount(Integer.MAX_VALUE); }