Example usage for org.apache.lucene.index Term Term

List of usage examples for org.apache.lucene.index Term Term

Introduction

In this page you can find the example usage for org.apache.lucene.index Term Term.

Prototype

public Term(String fld) 

Source Link

Document

Constructs a Term with the given field and empty text.

Usage

From source file:com.dasasian.chok.lucene.FilterWritableTest.java

License:Apache License

@Test
public void testSerializeFilter() throws Exception {
    TermQuery termQuery = new TermQuery(new Term("chok"));
    QueryWrapperFilter filter = new QueryWrapperFilter(termQuery);
    FilterWritable writable = new FilterWritable(filter);
    DataOutputBuffer buffer = writeWritable(writable);

    FilterWritable writable2 = new FilterWritable();
    readWritable(buffer, writable2);//  w w w  .java  2s  .  c o m

    assertTrue(writable.equals(writable2));
}

From source file:com.dasasian.chok.lucene.QueryWritableTest.java

License:Apache License

@Test
public void testSerializeQuery() throws Exception {
    TermQuery termQuery = new TermQuery(new Term("chok"));
    QueryWritable writable = new QueryWritable(termQuery);
    DataOutputBuffer buffer = writeWritable(writable);

    QueryWritable writable2 = new QueryWritable();
    readWritable(buffer, writable2);/*from  ww  w  .j av  a  2s. c o  m*/

    assertTrue(writable.equals(writable2));
}

From source file:com.epam.wilma.message.search.lucene.delete.LuceneDeleteEngineTest.java

License:Open Source License

@BeforeMethod
public void setUp() {
    MockitoAnnotations.initMocks(this);
    term = new Term(FIELD_NAME);
    Whitebox.setInternalState(underTest, "fieldName", FIELD_NAME);
}

From source file:com.epam.wilma.message.search.lucene.index.FileIndexerTest.java

License:Open Source License

@BeforeMethod
public void setUp() throws IOException {
    MockitoAnnotations.initMocks(this);
    Whitebox.setInternalState(underTest, "fieldName", FIELD_NAME);
    document = new Document();
    term = new Term(FIELD_NAME);
    given(fileInputStreamFactory.createFileInputStream(file)).willReturn(fileInputStream);
    given(file.getAbsolutePath()).willReturn("path");
    given(bufferedReaderFactory.createReader(fileInputStream)).willReturn(bufferedReader);
    given(documentFactory.createDocument()).willReturn(document);
}

From source file:com.esri.gpt.catalog.lucene.AclFilter.java

License:Apache License

/**
 * Queries for documents that have no values associated with the field.
 * @param reader the index reader//w w w. j a  v a2s  . c o  m
 * @return the OpenBitSet (documents with no values set to true)
 * @throws IOException if an exception is encountered while reading the index
 */
private OpenBitSet queryNulls(IndexReader reader, String field) throws IOException {
    int nBits = reader.maxDoc();
    OpenBitSet bitSet = new OpenBitSet(nBits);
    TermEnum termEnum = null;
    TermDocs termDocs = null;
    if ((field != null) && (field.trim().length() > 0)) {
        try {

            // find all documents that have a term for the field, then flip the bit set
            termEnum = reader.terms(new Term(field));
            termDocs = reader.termDocs();
            do {
                Term term = termEnum.term();
                if ((term != null) && term.field().equals(field)) {
                    termDocs.seek(term);
                    while (termDocs.next()) {
                        bitSet.fastSet(termDocs.doc());
                    }
                }
            } while (termEnum.next());

            bitSet.flip(0, nBits);
            if (reader.hasDeletions()) {
                for (int i = 0; i < nBits; i++) {
                    if (bitSet.get(i) && reader.isDeleted(i)) {
                        bitSet.fastFlip(i);
                    }
                }
            }

        } finally {
            try {
                if (termEnum != null)
                    termEnum.close();
            } catch (Exception ef) {
            }
            try {
                if (termDocs != null)
                    termDocs.close();
            } catch (Exception ef) {
            }
        }
    }
    return bitSet;
}

From source file:com.esri.gpt.catalog.lucene.AclFilter.java

License:Apache License

/**
 * Queries for documents that match one or more of the supplied values.
 * @param reader the index reader//from   www  . j a v a  2 s. com
 * @return the OpenBitSet (documents with matches are set to true)
 * @throws IOException if an exception is encountered while reading the index
 */
private OpenBitSet queryValues(IndexReader reader, String field, String[] values) throws IOException {
    OpenBitSet bitSet = new OpenBitSet(reader.maxDoc());
    if ((values != null) && (values.length > 0)) {
        TermDocs termDocs = null;
        try {
            Term baseTerm = new Term(field);
            termDocs = reader.termDocs();
            for (String value : values) {
                termDocs.seek(baseTerm.createTerm(value.trim().toLowerCase()));
                while (termDocs.next()) {
                    bitSet.set(termDocs.doc());
                }
            }
        } finally {
            try {
                if (termDocs != null)
                    termDocs.close();
            } catch (Exception ef) {
            }
        }
    }
    return bitSet;
}

From source file:com.esri.gpt.catalog.lucene.LuceneIndexSynchronizer.java

License:Apache License

/**
 * Walks the documents within the database modifying the index as required.
 * @param info synchronization step information
 * @throws IOException if an exception occurs while communicating with the index
 * @throws SQLException if an exception occurs while communicating with the database
 * @throws CatalogIndexException if an exception occurs while modifying the index
 *//*  ww  w .j a  va2  s.  c o  m*/
private void walkDatabase(WalkDatabaseInfo info) throws IOException, SQLException, CatalogIndexException {
    LOGGER.fine("Checking database records...");
    PreparedStatement st = null;
    PreparedStatement stCol = null;
    TermDocs termDocs = null;
    try {

        // determine the metadata acl policy
        MetadataAcl acl = new MetadataAcl(this.context);
        boolean bCheckAcl = !acl.isPolicyUnrestricted();

        // determine if collections are being used
        List<String[]> collections = null;
        CollectionDao colDao = new CollectionDao(this.context);
        boolean hasCollections = false;
        boolean useCollections = colDao.getUseCollections();
        String sColMemberTable = colDao.getCollectionMemberTableName();
        String sqlCol = "SELECT COLUUID FROM " + sColMemberTable + " WHERE DOCUUID=?";
        if (useCollections) {
            collections = colDao.queryCollections();
            hasCollections = (collections.size() > 0);
        }

        // initialize index related variables
        boolean bCheckIndex = (info.numOriginallyIndexed > 0);
        String fldUuid = Storeables.FIELD_UUID;
        String fldModified = Storeables.FIELD_DATEMODIFIED;
        String fldAcl = Storeables.FIELD_ACL;

        ArrayList<String> alFields = new ArrayList<String>();
        alFields.add(fldModified);
        if (bCheckAcl)
            alFields.add(fldAcl);
        if (useCollections)
            alFields.add("isPartOf");
        FieldSelector selector = new MapFieldSelector(alFields.toArray(new String[0]));

        Term termUuid = new Term(fldUuid);
        if (bCheckIndex) {
            termDocs = this.reader.termDocs();
        }
        StringSet delUuids = new StringSet();

        // build the database query
        StringBuffer sb = new StringBuffer("SELECT");
        sb.append(" ").append(this.resourceTable).append(".DOCUUID");
        sb.append(",").append(this.resourceTable).append(".APPROVALSTATUS");
        sb.append(",").append(this.resourceTable).append(".PROTOCOL_TYPE");
        sb.append(",").append(this.resourceTable).append(".FINDABLE");
        sb.append(",").append(this.resourceTable).append(".UPDATEDATE");
        sb.append(",").append(this.resourceTable).append(".ACL");
        sb.append(" FROM ").append(this.resourceTable);
        String sql = sb.toString();
        LOGGER.finest(sql);

        // execute the query, walk through the database records
        Connection con = this.context.getConnectionBroker().returnConnection("").getJdbcConnection();
        st = con.prepareStatement(sql);
        ResultSet rs = st.executeQuery();
        if (this.checkInterrupted())
            return;
        if (useCollections && hasCollections) {
            stCol = con.prepareStatement(sqlCol);
        }

        while (rs.next()) {

            info.numProcessed++;
            info.loopCount++;
            long nDbTimeModified = 0;
            Timestamp tsDbModified = null;
            String sDbAcl = null;
            boolean bIndexable = false;

            // read the database uuid and approval status
            String uuid = rs.getString(1);
            String status = rs.getString(2);
            String protocolType = Val.chkStr(rs.getString(3));
            boolean findable = Val.chkBool(rs.getString(4), false);

            bIndexable = (status != null)
                    && (status.equalsIgnoreCase("approved") || status.equalsIgnoreCase("reviewed"));
            if (bIndexable && protocolType.length() > 0 && !findable) {
                bIndexable = false;
            }

            // read the database modification date
            if (bIndexable) {
                tsDbModified = rs.getTimestamp(5);
                if (tsDbModified != null) {
                    nDbTimeModified = tsDbModified.getTime();
                }
                bIndexable = (nDbTimeModified > 0);
            }

            // for non-indexable documents, delete
            if (!bIndexable) {
                info.numNonIndexable++;
                if (bCheckIndex) {
                    termDocs.seek(termUuid.createTerm(uuid));
                    if (termDocs.next()) {
                        info.numNonIndexableFound++;
                        info.numRequiringDelete++;
                        delUuids.add(uuid);
                        if (delUuids.size() >= this.maxDeleteTokens) {
                            if (this.checkInterrupted())
                                return;
                            this.deleteDocuments(delUuids);
                            info.numDocsDeleted += delUuids.size();
                            delUuids.clear();
                            if (this.checkInterrupted())
                                return;
                        }
                    }
                }
            }

            // for indexable documents, check to ensure that they are in sync
            if (bIndexable) {
                info.numIndexable++;
                boolean bRequiresUpdate = true;

                // find the document within the index
                if (bCheckIndex) {
                    termDocs.seek(termUuid.createTerm(uuid));
                    if (termDocs.next()) {
                        info.numIndexableFound++;
                        Document doc = this.reader.document(termDocs.doc(), selector);
                        if (doc != null) {
                            bRequiresUpdate = false;

                            // check the modification date
                            long nIdxTimeModified = 0;
                            String sModified = doc.get(fldModified);
                            if (sModified != null) {
                                try {
                                    nIdxTimeModified = Long.valueOf(sModified);
                                } catch (NumberFormatException e) {
                                    nIdxTimeModified = 0;
                                }
                            }
                            bRequiresUpdate = (nIdxTimeModified == 0) || (nDbTimeModified > nIdxTimeModified);
                            if (bRequiresUpdate)
                                info.numWithInconsistentDates++;

                            // check the acl
                            if (!bRequiresUpdate && bCheckAcl) {
                                long tAclStartMillis = System.currentTimeMillis();
                                bRequiresUpdate = true;
                                String[] aclsDb = null;
                                sDbAcl = rs.getString(6);
                                try {
                                    // use an internal method for quick parsing
                                    //aclsDb = acl.makeDocumentAcl(sDbAcl);
                                    aclsDb = this.parseAcl(sDbAcl);
                                } catch (Exception eacl) {
                                    String sMsg = "Error parsing acl";
                                    sMsg += ", uuid=" + uuid + "\n" + Val.chkStr(eacl.getMessage());
                                    LOGGER.log(Level.WARNING, sMsg, eacl);
                                }

                                if (aclsDb == null)
                                    aclsDb = new String[0];
                                ArrayList<String> aclsIdx = new ArrayList<String>();
                                Field[] aclFields = doc.getFields(fldAcl);
                                if ((aclFields != null) && (aclFields.length > 0)) {
                                    for (Field aclField : aclFields) {
                                        aclsIdx.add(aclField.stringValue());
                                    }
                                }
                                if (aclsDb.length == aclsIdx.size()) {
                                    int nMatched = 0;
                                    if (aclsDb.length > 0) {
                                        for (String s1 : aclsDb) {
                                            for (String s2 : aclsIdx) {
                                                if (s1.equalsIgnoreCase(s2)) {
                                                    nMatched++;
                                                    break;
                                                }
                                            }
                                        }
                                    }
                                    bRequiresUpdate = (nMatched != aclsDb.length);
                                }
                                if (bRequiresUpdate)
                                    info.numWithInconsistentAcls++;
                                info.aclMillis += (System.currentTimeMillis() - tAclStartMillis);
                            }

                            // check collection membership
                            if (!bRequiresUpdate && useCollections) {
                                long tColStartMillis = System.currentTimeMillis();
                                bRequiresUpdate = true;

                                ArrayList<String> colDb = new ArrayList<String>();
                                if (useCollections && hasCollections) {
                                    stCol.clearParameters();
                                    stCol.setString(1, uuid);
                                    ResultSet rsCol = stCol.executeQuery();
                                    while (rsCol.next()) {
                                        String sCUuid = rsCol.getString(1);
                                        for (String[] col : collections) {
                                            if (sCUuid.equals(col[0])) {
                                                colDb.add(col[1]);
                                                break;
                                            }
                                        }
                                    }
                                    rsCol.close();
                                }

                                ArrayList<String> colIdx = new ArrayList<String>();
                                Field[] colFields = doc.getFields("isPartOf");
                                if ((colFields != null) && (colFields.length > 0)) {
                                    for (Field colField : colFields) {
                                        colIdx.add(colField.stringValue());
                                    }
                                }
                                if (colDb.size() == colIdx.size()) {
                                    int nMatched = 0;
                                    if (colDb.size() > 0) {
                                        for (String s1 : colDb) {
                                            for (String s2 : colIdx) {
                                                if (s1.equalsIgnoreCase(s2)) {
                                                    nMatched++;
                                                    break;
                                                }
                                            }
                                        }
                                    }
                                    bRequiresUpdate = (nMatched != colDb.size());
                                }
                                if (bRequiresUpdate)
                                    info.numWithInconsistentColMembership++;
                                info.colMillis += (System.currentTimeMillis() - tColStartMillis);
                            }

                        }
                    }
                }

                // execute the update if required
                if (bRequiresUpdate) {
                    if (this.checkInterrupted())
                        return;
                    try {
                        if (bCheckAcl) {
                            if (sDbAcl == null)
                                sDbAcl = rs.getString(6);
                        }
                        String sXml = Val.chkStr(this.readXml(uuid));
                        if (sXml.length() > 0) {
                            info.numRequiringUpdate++;
                            MetadataDocument mdDoc = new MetadataDocument();
                            Schema schema = mdDoc.prepareForView(this.context, sXml);
                            this.adapter.publishDocument(uuid, tsDbModified, schema, sDbAcl);
                            info.numDocsUpdated++;
                        }
                    } catch (SchemaException se) {

                        // dont' allow the entire process to fail over one bad xml
                        String sMsg = "Error indexing document during synchronization";
                        sMsg += ", uuid=" + uuid + "\n" + Val.chkStr(se.getMessage());
                        LOGGER.log(Level.WARNING, sMsg, se);
                    }
                    if (this.checkInterrupted())
                        return;
                }

            }

            // cache the synchronized uuids
            if (this.synchedUuidCache != null) {
                this.synchedUuidCache.put(uuid, "");
                if (this.synchedUuidCache.size() > this.maxUuidCache) {
                    this.synchedUuidCache = null;
                }
            }

            // log a status message if the feedback threshold was reached
            if (this.checkInterrupted())
                return;
            if ((System.currentTimeMillis() - info.loopStartMillis) >= this.feedbackMillis) {
                LOGGER.info(info.getLoopMessage());
            }

        }

        // delete any documents left over in the buffer
        if (delUuids.size() >= 0) {
            if (this.checkInterrupted())
                return;
            this.deleteDocuments(delUuids);
            info.numDocsDeleted += delUuids.size();
        }

        LOGGER.info(info.getStepMessage());
    } finally {
        try {
            if (st != null)
                st.close();
        } catch (Exception ef) {
        }
        try {
            if (stCol != null)
                stCol.close();
        } catch (Exception ef) {
        }
        try {
            if (termDocs != null)
                termDocs.close();
        } catch (Exception ef) {
        }
    }
}

From source file:com.esri.gpt.catalog.lucene.LuceneIndexSynchronizer.java

License:Apache License

/**
 * Walks the documents within the index removing documents that are no longer indexable.
 * @param info synchronization step information
 * @throws IOException if an exception occurs while communicating with the index
 * @throws SQLException if an exception occurs while communicating with the database
 * @throws CatalogIndexException if an exception occurs while modifying the index
 *///from  w  w  w.j av  a  2s  .  c  o m
private void walkIndex(WalkIndexInfo info) throws IOException, SQLException, CatalogIndexException {
    LOGGER.fine("Checking indexed documents...");
    TermEnum termEnum = null;
    try {
        StringSet chkUuids = new StringSet();
        StringSet delUuids = new StringSet();
        String fldUuid = Storeables.FIELD_UUID;
        termEnum = this.reader.terms(new Term(fldUuid));
        do {
            Term term = termEnum.term();
            if ((term == null) || !term.field().equals(fldUuid)) {
                break;
            }
            info.numProcessed++;
            info.loopCount++;

            // check the cache to see if the uuid was already synchronized,
            // otherwise add it to the set of uuids to check
            String uuid = term.text();
            if (this.synchedUuidCache != null) {
                if (this.synchedUuidCache.containsKey(uuid)) {
                    info.numFoundInCache++;
                } else {
                    chkUuids.add(uuid);
                }
            } else {
                chkUuids.add(uuid);
            }

            // check to ensure that these documents are indexable
            if (chkUuids.size() >= this.maxSqlTokens) {
                if (this.checkInterrupted())
                    return;
                this.ensureIndexable(info, chkUuids, delUuids);
                chkUuids.clear();
                if (this.checkInterrupted())
                    return;
                if ((System.currentTimeMillis() - info.loopStartMillis) >= this.feedbackMillis) {
                    LOGGER.info(info.getLoopMessage());
                }
            }

            // log a status message if the loop threshold was reached
            if (info.loopCount >= info.loopThreshold) {
                if (this.checkInterrupted())
                    return;
                if ((System.currentTimeMillis() - info.loopStartMillis) >= this.feedbackMillis) {
                    LOGGER.info(info.getLoopMessage());
                }
            }

        } while (termEnum.next());

        // check any documents left over in the buffers
        if (chkUuids.size() > 0) {
            if (this.checkInterrupted())
                return;
            this.ensureIndexable(info, chkUuids, delUuids);
            if (this.checkInterrupted())
                return;
        }
        if (delUuids.size() >= 0) {
            if (this.checkInterrupted())
                return;
            this.deleteDocuments(delUuids);
            info.numDocsDeleted += delUuids.size();
            if (this.checkInterrupted())
                return;
        }

        LOGGER.info(info.getStepMessage());
    } finally {
        try {
            if (termEnum != null)
                termEnum.close();
        } catch (Exception ef) {
        }
    }
}

From source file:com.esri.gpt.catalog.lucene.stats.GlobalFieldStats.java

License:Apache License

/**
 * Executes the collection of statistics.
 * @param request the active statistics request
 * @param reader the index reader// w w  w  . j  ava  2 s .  c om
 * @throws IOException if an error occurs while communicating with the index
 */
public void collectStats(StatsRequest request, IndexReader reader) throws IOException {
    long t1 = System.currentTimeMillis();
    TermEnum termEnum = null;
    TermDocs termDocs = null;
    try {
        OpenBitSet documentFilterBitSet = request.getDocumentFilterBitSet();
        boolean isUnfiltered = (documentFilterBitSet == null);

        // return if there are no stats to collect
        String[] fieldNames = request.getCollectableFieldNames(reader);
        if (this.determineNumberOfDocsConsidered(reader, documentFilterBitSet) <= 0) {
            return;
        } else if ((fieldNames == null) || (fieldNames.length == 0)) {
            return;
        }

        // accumulate field frequencies per document
        termDocs = reader.termDocs();
        for (String fieldName : fieldNames) {
            termEnum = reader.terms(new Term(fieldName));
            OpenBitSet docsWithFieldBitSet = new OpenBitSet(reader.maxDoc());
            do {
                Term term = termEnum.term();
                if (term != null && term.field().equals(fieldName)) {

                    termDocs.seek(term);
                    while (termDocs.next()) {
                        int docId = termDocs.doc();
                        boolean bSet = isUnfiltered || documentFilterBitSet.fastGet(docId);
                        if (bSet) {
                            docsWithFieldBitSet.fastSet(docId);
                        }
                    }

                } else {
                    break;
                }
            } while (termEnum.next());
            termEnum.close();
            termEnum = null;

            if (docsWithFieldBitSet.cardinality() > 0) {
                this.fieldAccumulator.add(fieldName, docsWithFieldBitSet.cardinality());
            }
        }

        // sort
        if (this.getSortByFrequency()) {
            this.fieldAccumulator.sortByFrequency();
        } else {
            this.fieldAccumulator.sortByName();
        }

    } finally {
        try {
            if (termEnum != null)
                termEnum.close();
        } catch (Exception ef) {
        }
        try {
            if (termDocs != null)
                termDocs.close();
        } catch (Exception ef) {
        }
        this.setTimeMillis(System.currentTimeMillis() - t1);
    }

    // print
    if (request.getResponseWriter() != null) {
        this.print(request);
    }
}

From source file:com.esri.gpt.catalog.lucene.stats.GlobalTermStats.java

License:Apache License

/**
 * Executes the collection of statistics.
 * @param request the active statistics request
 * @param reader the index reader/*from   ww  w. ja v a  2 s .  c om*/
 * @throws IOException if an error occurs while communicating with the index
 */
public void collectStats(StatsRequest request, IndexReader reader) throws IOException {
    long t1 = System.currentTimeMillis();
    TermEnum termEnum = null;
    TermDocs termDocs = null;
    try {
        OpenBitSet documentFilterBitSet = request.getDocumentFilterBitSet();
        boolean isUnfiltered = (documentFilterBitSet == null);

        // return if there are no stats to collect
        String[] fieldNames = request.getCollectableFieldNames(reader);
        if (this.determineNumberOfDocsConsidered(reader, documentFilterBitSet) <= 0) {
            return;
        } else if ((fieldNames == null) || (fieldNames.length == 0)) {
            return;
        }

        // accumulate term frequencies

        termDocs = reader.termDocs();
        for (String fieldName : fieldNames) {
            termEnum = reader.terms(new Term(fieldName));
            do {
                Term term = termEnum.term();
                if (term != null && term.field().equals(fieldName)) {

                    termDocs.seek(term);
                    long count = 0;
                    while (termDocs.next()) {
                        int docId = termDocs.doc();
                        boolean bSet = isUnfiltered || documentFilterBitSet.get(docId);
                        if (bSet) {
                            count++;
                            //this.termAccumulator.add(term.text(),termDocs.freq());
                        }
                    }
                    this.termAccumulator.add(term.text(), count);

                } else {
                    break;
                }
            } while (termEnum.next());
            termEnum.close();
            termEnum = null;

        }

        // purge based on min frequence and min records

        // sort
        if (Val.chkStr(request.getSortBy()).equalsIgnoreCase("name")) {
            this.termAccumulator.sortByName();
        } else {
            this.termAccumulator.sortByFrequency();
        }

    } finally {
        try {
            if (termEnum != null)
                termEnum.close();
        } catch (Exception ef) {
        }
        try {
            if (termDocs != null)
                termDocs.close();
        } catch (Exception ef) {
        }
        this.setTimeMillis(System.currentTimeMillis() - t1);
    }

    // print
    if (request.getResponseWriter() != null) {
        this.print(request);
    }
}