List of usage examples for org.apache.lucene.index Term Term
public Term(String fld)
From source file:com.dasasian.chok.lucene.FilterWritableTest.java
License:Apache License
@Test public void testSerializeFilter() throws Exception { TermQuery termQuery = new TermQuery(new Term("chok")); QueryWrapperFilter filter = new QueryWrapperFilter(termQuery); FilterWritable writable = new FilterWritable(filter); DataOutputBuffer buffer = writeWritable(writable); FilterWritable writable2 = new FilterWritable(); readWritable(buffer, writable2);// w w w .java 2s . c o m assertTrue(writable.equals(writable2)); }
From source file:com.dasasian.chok.lucene.QueryWritableTest.java
License:Apache License
@Test public void testSerializeQuery() throws Exception { TermQuery termQuery = new TermQuery(new Term("chok")); QueryWritable writable = new QueryWritable(termQuery); DataOutputBuffer buffer = writeWritable(writable); QueryWritable writable2 = new QueryWritable(); readWritable(buffer, writable2);/*from ww w .j av a 2s. c o m*/ assertTrue(writable.equals(writable2)); }
From source file:com.epam.wilma.message.search.lucene.delete.LuceneDeleteEngineTest.java
License:Open Source License
@BeforeMethod public void setUp() { MockitoAnnotations.initMocks(this); term = new Term(FIELD_NAME); Whitebox.setInternalState(underTest, "fieldName", FIELD_NAME); }
From source file:com.epam.wilma.message.search.lucene.index.FileIndexerTest.java
License:Open Source License
@BeforeMethod public void setUp() throws IOException { MockitoAnnotations.initMocks(this); Whitebox.setInternalState(underTest, "fieldName", FIELD_NAME); document = new Document(); term = new Term(FIELD_NAME); given(fileInputStreamFactory.createFileInputStream(file)).willReturn(fileInputStream); given(file.getAbsolutePath()).willReturn("path"); given(bufferedReaderFactory.createReader(fileInputStream)).willReturn(bufferedReader); given(documentFactory.createDocument()).willReturn(document); }
From source file:com.esri.gpt.catalog.lucene.AclFilter.java
License:Apache License
/** * Queries for documents that have no values associated with the field. * @param reader the index reader//w w w. j a v a2s . c o m * @return the OpenBitSet (documents with no values set to true) * @throws IOException if an exception is encountered while reading the index */ private OpenBitSet queryNulls(IndexReader reader, String field) throws IOException { int nBits = reader.maxDoc(); OpenBitSet bitSet = new OpenBitSet(nBits); TermEnum termEnum = null; TermDocs termDocs = null; if ((field != null) && (field.trim().length() > 0)) { try { // find all documents that have a term for the field, then flip the bit set termEnum = reader.terms(new Term(field)); termDocs = reader.termDocs(); do { Term term = termEnum.term(); if ((term != null) && term.field().equals(field)) { termDocs.seek(term); while (termDocs.next()) { bitSet.fastSet(termDocs.doc()); } } } while (termEnum.next()); bitSet.flip(0, nBits); if (reader.hasDeletions()) { for (int i = 0; i < nBits; i++) { if (bitSet.get(i) && reader.isDeleted(i)) { bitSet.fastFlip(i); } } } } finally { try { if (termEnum != null) termEnum.close(); } catch (Exception ef) { } try { if (termDocs != null) termDocs.close(); } catch (Exception ef) { } } } return bitSet; }
From source file:com.esri.gpt.catalog.lucene.AclFilter.java
License:Apache License
/** * Queries for documents that match one or more of the supplied values. * @param reader the index reader//from www . j a v a 2 s. com * @return the OpenBitSet (documents with matches are set to true) * @throws IOException if an exception is encountered while reading the index */ private OpenBitSet queryValues(IndexReader reader, String field, String[] values) throws IOException { OpenBitSet bitSet = new OpenBitSet(reader.maxDoc()); if ((values != null) && (values.length > 0)) { TermDocs termDocs = null; try { Term baseTerm = new Term(field); termDocs = reader.termDocs(); for (String value : values) { termDocs.seek(baseTerm.createTerm(value.trim().toLowerCase())); while (termDocs.next()) { bitSet.set(termDocs.doc()); } } } finally { try { if (termDocs != null) termDocs.close(); } catch (Exception ef) { } } } return bitSet; }
From source file:com.esri.gpt.catalog.lucene.LuceneIndexSynchronizer.java
License:Apache License
/** * Walks the documents within the database modifying the index as required. * @param info synchronization step information * @throws IOException if an exception occurs while communicating with the index * @throws SQLException if an exception occurs while communicating with the database * @throws CatalogIndexException if an exception occurs while modifying the index *//* ww w .j a va2 s. c o m*/ private void walkDatabase(WalkDatabaseInfo info) throws IOException, SQLException, CatalogIndexException { LOGGER.fine("Checking database records..."); PreparedStatement st = null; PreparedStatement stCol = null; TermDocs termDocs = null; try { // determine the metadata acl policy MetadataAcl acl = new MetadataAcl(this.context); boolean bCheckAcl = !acl.isPolicyUnrestricted(); // determine if collections are being used List<String[]> collections = null; CollectionDao colDao = new CollectionDao(this.context); boolean hasCollections = false; boolean useCollections = colDao.getUseCollections(); String sColMemberTable = colDao.getCollectionMemberTableName(); String sqlCol = "SELECT COLUUID FROM " + sColMemberTable + " WHERE DOCUUID=?"; if (useCollections) { collections = colDao.queryCollections(); hasCollections = (collections.size() > 0); } // initialize index related variables boolean bCheckIndex = (info.numOriginallyIndexed > 0); String fldUuid = Storeables.FIELD_UUID; String fldModified = Storeables.FIELD_DATEMODIFIED; String fldAcl = Storeables.FIELD_ACL; ArrayList<String> alFields = new ArrayList<String>(); alFields.add(fldModified); if (bCheckAcl) alFields.add(fldAcl); if (useCollections) alFields.add("isPartOf"); FieldSelector selector = new MapFieldSelector(alFields.toArray(new String[0])); Term termUuid = new Term(fldUuid); if (bCheckIndex) { termDocs = this.reader.termDocs(); } StringSet delUuids = new StringSet(); // build the database query StringBuffer sb = new StringBuffer("SELECT"); sb.append(" ").append(this.resourceTable).append(".DOCUUID"); sb.append(",").append(this.resourceTable).append(".APPROVALSTATUS"); sb.append(",").append(this.resourceTable).append(".PROTOCOL_TYPE"); sb.append(",").append(this.resourceTable).append(".FINDABLE"); sb.append(",").append(this.resourceTable).append(".UPDATEDATE"); sb.append(",").append(this.resourceTable).append(".ACL"); sb.append(" FROM ").append(this.resourceTable); String sql = sb.toString(); LOGGER.finest(sql); // execute the query, walk through the database records Connection con = this.context.getConnectionBroker().returnConnection("").getJdbcConnection(); st = con.prepareStatement(sql); ResultSet rs = st.executeQuery(); if (this.checkInterrupted()) return; if (useCollections && hasCollections) { stCol = con.prepareStatement(sqlCol); } while (rs.next()) { info.numProcessed++; info.loopCount++; long nDbTimeModified = 0; Timestamp tsDbModified = null; String sDbAcl = null; boolean bIndexable = false; // read the database uuid and approval status String uuid = rs.getString(1); String status = rs.getString(2); String protocolType = Val.chkStr(rs.getString(3)); boolean findable = Val.chkBool(rs.getString(4), false); bIndexable = (status != null) && (status.equalsIgnoreCase("approved") || status.equalsIgnoreCase("reviewed")); if (bIndexable && protocolType.length() > 0 && !findable) { bIndexable = false; } // read the database modification date if (bIndexable) { tsDbModified = rs.getTimestamp(5); if (tsDbModified != null) { nDbTimeModified = tsDbModified.getTime(); } bIndexable = (nDbTimeModified > 0); } // for non-indexable documents, delete if (!bIndexable) { info.numNonIndexable++; if (bCheckIndex) { termDocs.seek(termUuid.createTerm(uuid)); if (termDocs.next()) { info.numNonIndexableFound++; info.numRequiringDelete++; delUuids.add(uuid); if (delUuids.size() >= this.maxDeleteTokens) { if (this.checkInterrupted()) return; this.deleteDocuments(delUuids); info.numDocsDeleted += delUuids.size(); delUuids.clear(); if (this.checkInterrupted()) return; } } } } // for indexable documents, check to ensure that they are in sync if (bIndexable) { info.numIndexable++; boolean bRequiresUpdate = true; // find the document within the index if (bCheckIndex) { termDocs.seek(termUuid.createTerm(uuid)); if (termDocs.next()) { info.numIndexableFound++; Document doc = this.reader.document(termDocs.doc(), selector); if (doc != null) { bRequiresUpdate = false; // check the modification date long nIdxTimeModified = 0; String sModified = doc.get(fldModified); if (sModified != null) { try { nIdxTimeModified = Long.valueOf(sModified); } catch (NumberFormatException e) { nIdxTimeModified = 0; } } bRequiresUpdate = (nIdxTimeModified == 0) || (nDbTimeModified > nIdxTimeModified); if (bRequiresUpdate) info.numWithInconsistentDates++; // check the acl if (!bRequiresUpdate && bCheckAcl) { long tAclStartMillis = System.currentTimeMillis(); bRequiresUpdate = true; String[] aclsDb = null; sDbAcl = rs.getString(6); try { // use an internal method for quick parsing //aclsDb = acl.makeDocumentAcl(sDbAcl); aclsDb = this.parseAcl(sDbAcl); } catch (Exception eacl) { String sMsg = "Error parsing acl"; sMsg += ", uuid=" + uuid + "\n" + Val.chkStr(eacl.getMessage()); LOGGER.log(Level.WARNING, sMsg, eacl); } if (aclsDb == null) aclsDb = new String[0]; ArrayList<String> aclsIdx = new ArrayList<String>(); Field[] aclFields = doc.getFields(fldAcl); if ((aclFields != null) && (aclFields.length > 0)) { for (Field aclField : aclFields) { aclsIdx.add(aclField.stringValue()); } } if (aclsDb.length == aclsIdx.size()) { int nMatched = 0; if (aclsDb.length > 0) { for (String s1 : aclsDb) { for (String s2 : aclsIdx) { if (s1.equalsIgnoreCase(s2)) { nMatched++; break; } } } } bRequiresUpdate = (nMatched != aclsDb.length); } if (bRequiresUpdate) info.numWithInconsistentAcls++; info.aclMillis += (System.currentTimeMillis() - tAclStartMillis); } // check collection membership if (!bRequiresUpdate && useCollections) { long tColStartMillis = System.currentTimeMillis(); bRequiresUpdate = true; ArrayList<String> colDb = new ArrayList<String>(); if (useCollections && hasCollections) { stCol.clearParameters(); stCol.setString(1, uuid); ResultSet rsCol = stCol.executeQuery(); while (rsCol.next()) { String sCUuid = rsCol.getString(1); for (String[] col : collections) { if (sCUuid.equals(col[0])) { colDb.add(col[1]); break; } } } rsCol.close(); } ArrayList<String> colIdx = new ArrayList<String>(); Field[] colFields = doc.getFields("isPartOf"); if ((colFields != null) && (colFields.length > 0)) { for (Field colField : colFields) { colIdx.add(colField.stringValue()); } } if (colDb.size() == colIdx.size()) { int nMatched = 0; if (colDb.size() > 0) { for (String s1 : colDb) { for (String s2 : colIdx) { if (s1.equalsIgnoreCase(s2)) { nMatched++; break; } } } } bRequiresUpdate = (nMatched != colDb.size()); } if (bRequiresUpdate) info.numWithInconsistentColMembership++; info.colMillis += (System.currentTimeMillis() - tColStartMillis); } } } } // execute the update if required if (bRequiresUpdate) { if (this.checkInterrupted()) return; try { if (bCheckAcl) { if (sDbAcl == null) sDbAcl = rs.getString(6); } String sXml = Val.chkStr(this.readXml(uuid)); if (sXml.length() > 0) { info.numRequiringUpdate++; MetadataDocument mdDoc = new MetadataDocument(); Schema schema = mdDoc.prepareForView(this.context, sXml); this.adapter.publishDocument(uuid, tsDbModified, schema, sDbAcl); info.numDocsUpdated++; } } catch (SchemaException se) { // dont' allow the entire process to fail over one bad xml String sMsg = "Error indexing document during synchronization"; sMsg += ", uuid=" + uuid + "\n" + Val.chkStr(se.getMessage()); LOGGER.log(Level.WARNING, sMsg, se); } if (this.checkInterrupted()) return; } } // cache the synchronized uuids if (this.synchedUuidCache != null) { this.synchedUuidCache.put(uuid, ""); if (this.synchedUuidCache.size() > this.maxUuidCache) { this.synchedUuidCache = null; } } // log a status message if the feedback threshold was reached if (this.checkInterrupted()) return; if ((System.currentTimeMillis() - info.loopStartMillis) >= this.feedbackMillis) { LOGGER.info(info.getLoopMessage()); } } // delete any documents left over in the buffer if (delUuids.size() >= 0) { if (this.checkInterrupted()) return; this.deleteDocuments(delUuids); info.numDocsDeleted += delUuids.size(); } LOGGER.info(info.getStepMessage()); } finally { try { if (st != null) st.close(); } catch (Exception ef) { } try { if (stCol != null) stCol.close(); } catch (Exception ef) { } try { if (termDocs != null) termDocs.close(); } catch (Exception ef) { } } }
From source file:com.esri.gpt.catalog.lucene.LuceneIndexSynchronizer.java
License:Apache License
/** * Walks the documents within the index removing documents that are no longer indexable. * @param info synchronization step information * @throws IOException if an exception occurs while communicating with the index * @throws SQLException if an exception occurs while communicating with the database * @throws CatalogIndexException if an exception occurs while modifying the index *///from w w w.j av a 2s . c o m private void walkIndex(WalkIndexInfo info) throws IOException, SQLException, CatalogIndexException { LOGGER.fine("Checking indexed documents..."); TermEnum termEnum = null; try { StringSet chkUuids = new StringSet(); StringSet delUuids = new StringSet(); String fldUuid = Storeables.FIELD_UUID; termEnum = this.reader.terms(new Term(fldUuid)); do { Term term = termEnum.term(); if ((term == null) || !term.field().equals(fldUuid)) { break; } info.numProcessed++; info.loopCount++; // check the cache to see if the uuid was already synchronized, // otherwise add it to the set of uuids to check String uuid = term.text(); if (this.synchedUuidCache != null) { if (this.synchedUuidCache.containsKey(uuid)) { info.numFoundInCache++; } else { chkUuids.add(uuid); } } else { chkUuids.add(uuid); } // check to ensure that these documents are indexable if (chkUuids.size() >= this.maxSqlTokens) { if (this.checkInterrupted()) return; this.ensureIndexable(info, chkUuids, delUuids); chkUuids.clear(); if (this.checkInterrupted()) return; if ((System.currentTimeMillis() - info.loopStartMillis) >= this.feedbackMillis) { LOGGER.info(info.getLoopMessage()); } } // log a status message if the loop threshold was reached if (info.loopCount >= info.loopThreshold) { if (this.checkInterrupted()) return; if ((System.currentTimeMillis() - info.loopStartMillis) >= this.feedbackMillis) { LOGGER.info(info.getLoopMessage()); } } } while (termEnum.next()); // check any documents left over in the buffers if (chkUuids.size() > 0) { if (this.checkInterrupted()) return; this.ensureIndexable(info, chkUuids, delUuids); if (this.checkInterrupted()) return; } if (delUuids.size() >= 0) { if (this.checkInterrupted()) return; this.deleteDocuments(delUuids); info.numDocsDeleted += delUuids.size(); if (this.checkInterrupted()) return; } LOGGER.info(info.getStepMessage()); } finally { try { if (termEnum != null) termEnum.close(); } catch (Exception ef) { } } }
From source file:com.esri.gpt.catalog.lucene.stats.GlobalFieldStats.java
License:Apache License
/** * Executes the collection of statistics. * @param request the active statistics request * @param reader the index reader// w w w . j ava 2 s . c om * @throws IOException if an error occurs while communicating with the index */ public void collectStats(StatsRequest request, IndexReader reader) throws IOException { long t1 = System.currentTimeMillis(); TermEnum termEnum = null; TermDocs termDocs = null; try { OpenBitSet documentFilterBitSet = request.getDocumentFilterBitSet(); boolean isUnfiltered = (documentFilterBitSet == null); // return if there are no stats to collect String[] fieldNames = request.getCollectableFieldNames(reader); if (this.determineNumberOfDocsConsidered(reader, documentFilterBitSet) <= 0) { return; } else if ((fieldNames == null) || (fieldNames.length == 0)) { return; } // accumulate field frequencies per document termDocs = reader.termDocs(); for (String fieldName : fieldNames) { termEnum = reader.terms(new Term(fieldName)); OpenBitSet docsWithFieldBitSet = new OpenBitSet(reader.maxDoc()); do { Term term = termEnum.term(); if (term != null && term.field().equals(fieldName)) { termDocs.seek(term); while (termDocs.next()) { int docId = termDocs.doc(); boolean bSet = isUnfiltered || documentFilterBitSet.fastGet(docId); if (bSet) { docsWithFieldBitSet.fastSet(docId); } } } else { break; } } while (termEnum.next()); termEnum.close(); termEnum = null; if (docsWithFieldBitSet.cardinality() > 0) { this.fieldAccumulator.add(fieldName, docsWithFieldBitSet.cardinality()); } } // sort if (this.getSortByFrequency()) { this.fieldAccumulator.sortByFrequency(); } else { this.fieldAccumulator.sortByName(); } } finally { try { if (termEnum != null) termEnum.close(); } catch (Exception ef) { } try { if (termDocs != null) termDocs.close(); } catch (Exception ef) { } this.setTimeMillis(System.currentTimeMillis() - t1); } // print if (request.getResponseWriter() != null) { this.print(request); } }
From source file:com.esri.gpt.catalog.lucene.stats.GlobalTermStats.java
License:Apache License
/** * Executes the collection of statistics. * @param request the active statistics request * @param reader the index reader/*from ww w. ja v a 2 s . c om*/ * @throws IOException if an error occurs while communicating with the index */ public void collectStats(StatsRequest request, IndexReader reader) throws IOException { long t1 = System.currentTimeMillis(); TermEnum termEnum = null; TermDocs termDocs = null; try { OpenBitSet documentFilterBitSet = request.getDocumentFilterBitSet(); boolean isUnfiltered = (documentFilterBitSet == null); // return if there are no stats to collect String[] fieldNames = request.getCollectableFieldNames(reader); if (this.determineNumberOfDocsConsidered(reader, documentFilterBitSet) <= 0) { return; } else if ((fieldNames == null) || (fieldNames.length == 0)) { return; } // accumulate term frequencies termDocs = reader.termDocs(); for (String fieldName : fieldNames) { termEnum = reader.terms(new Term(fieldName)); do { Term term = termEnum.term(); if (term != null && term.field().equals(fieldName)) { termDocs.seek(term); long count = 0; while (termDocs.next()) { int docId = termDocs.doc(); boolean bSet = isUnfiltered || documentFilterBitSet.get(docId); if (bSet) { count++; //this.termAccumulator.add(term.text(),termDocs.freq()); } } this.termAccumulator.add(term.text(), count); } else { break; } } while (termEnum.next()); termEnum.close(); termEnum = null; } // purge based on min frequence and min records // sort if (Val.chkStr(request.getSortBy()).equalsIgnoreCase("name")) { this.termAccumulator.sortByName(); } else { this.termAccumulator.sortByFrequency(); } } finally { try { if (termEnum != null) termEnum.close(); } catch (Exception ef) { } try { if (termDocs != null) termDocs.close(); } catch (Exception ef) { } this.setTimeMillis(System.currentTimeMillis() - t1); } // print if (request.getResponseWriter() != null) { this.print(request); } }