Example usage for org.apache.lucene.search IndexSearcher getIndexReader

List of usage examples for org.apache.lucene.search IndexSearcher getIndexReader

Introduction

In this page you can find the example usage for org.apache.lucene.search IndexSearcher getIndexReader.

Prototype

public IndexReader getIndexReader() 

Source Link

Document

Return the IndexReader this searches.

Usage

From source file:com.google.gerrit.lucene.WrappableSearcherManager.java

License:Apache License

@Override
protected void decRef(IndexSearcher reference) throws IOException {
    reference.getIndexReader().decRef();
}

From source file:com.google.gerrit.lucene.WrappableSearcherManager.java

License:Apache License

@Override
protected IndexSearcher refreshIfNeeded(IndexSearcher referenceToRefresh) throws IOException {
    final IndexReader r = referenceToRefresh.getIndexReader();
    assert r instanceof DirectoryReader : "searcher's IndexReader should be a DirectoryReader, but got " + r;
    final IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) r);
    if (newReader == null) {
        return null;
    } else {//from  w w w  . j  a  v  a  2  s  . c o  m
        return getSearcher(searcherFactory, newReader);
    }
}

From source file:com.google.gerrit.lucene.WrappableSearcherManager.java

License:Apache License

@Override
protected boolean tryIncRef(IndexSearcher reference) {
    return reference.getIndexReader().tryIncRef();
}

From source file:com.google.gerrit.lucene.WrappableSearcherManager.java

License:Apache License

@Override
protected int getRefCount(IndexSearcher reference) {
    return reference.getIndexReader().getRefCount();
}

From source file:com.google.gerrit.lucene.WrappableSearcherManager.java

License:Apache License

/**
 * Returns <code>true</code> if no changes have occured since this searcher
 * ie. reader was opened, otherwise <code>false</code>.
 * @see DirectoryReader#isCurrent()//from   ww  w .jav  a2s  .c  o m
 */
public boolean isSearcherCurrent() throws IOException {
    final IndexSearcher searcher = acquire();
    try {
        final IndexReader r = searcher.getIndexReader();
        assert r instanceof DirectoryReader : "searcher's IndexReader should be a DirectoryReader, but got "
                + r;
        return ((DirectoryReader) r).isCurrent();
    } finally {
        release(searcher);
    }
}

From source file:com.google.gerrit.lucene.WrappableSearcherManager.java

License:Apache License

/** Expert: creates a searcher from the provided {@link
 *  IndexReader} using the provided {@link
 *  SearcherFactory}.  NOTE: this decRefs incoming reader
 * on throwing an exception. *///from  w w w .j av  a2s.co m
@SuppressWarnings("resource")
public static IndexSearcher getSearcher(SearcherFactory searcherFactory, IndexReader reader)
        throws IOException {
    boolean success = false;
    final IndexSearcher searcher;
    try {
        searcher = searcherFactory.newSearcher(reader, null);
        // Modification for Gerrit: Allow searcherFactory to transitively wrap the
        // provided reader.
        IndexReader unwrapped = searcher.getIndexReader();
        while (true) {
            if (unwrapped == reader) {
                break;
            } else if (unwrapped instanceof FilterDirectoryReader) {
                unwrapped = ((FilterDirectoryReader) unwrapped).getDelegate();
            } else if (unwrapped instanceof FilterLeafReader) {
                unwrapped = ((FilterLeafReader) unwrapped).getDelegate();
            } else {
                break;
            }
        }

        if (unwrapped != reader) {
            throw new IllegalStateException("SearcherFactory must wrap the provided reader (got "
                    + searcher.getIndexReader() + " but expected " + reader + ")");
        }
        success = true;
    } finally {
        if (!success) {
            reader.decRef();
        }
    }
    return searcher;
}

From source file:com.ibm.jaql.lang.expr.index.ProbeLuceneFn.java

License:Apache License

@Override
public JsonIterator iter(Context context) throws Exception {
    JsonRecord fd = (JsonRecord) exprs[0].eval(context);
    if (fd == null) {
        return JsonIterator.NULL;
    }//  ww w  . ja  va2  s .co  m
    JsonString loc = (JsonString) fd.get(new JsonString("location"));
    if (loc == null) {
        return JsonIterator.NULL;
    }
    JsonString jquery = (JsonString) exprs[1].eval(context);
    if (jquery == null) {
        return JsonIterator.NULL;
    }

    HashSet<String> fields = null;
    JsonIterator iter = exprs[2].iter(context);
    for (JsonValue sv : iter) {
        JsonString s = (JsonString) sv;
        if (s != null) {
            if (fields == null) {
                fields = new HashSet<String>();
            }
            fields.add(s.toString());
        }
    }
    final FieldSelector fieldSelector = (fields == null) ? null
            : new SetBasedFieldSelector(fields, new HashSet<String>());

    final IndexSearcher searcher = new IndexSearcher(loc.toString());
    Analyzer analyzer = new StandardAnalyzer();
    QueryParser qp = new QueryParser("key", analyzer);
    Query query = qp.parse(jquery.toString());

    query = searcher.rewrite(query);
    final Scorer scorer = query.weight(searcher).scorer(searcher.getIndexReader());
    final BufferedJsonRecord rec = new BufferedJsonRecord();
    final JsonString jdoc = new JsonString("doc");
    final MutableJsonLong jdocid = new MutableJsonLong();

    return new JsonIterator(rec) {
        @Override
        public boolean moveNext() throws Exception {
            if (!scorer.next()) {
                return false;
            }
            rec.clear();
            int i = scorer.doc();
            jdocid.set(i);
            rec.add(jdoc, jdocid);
            if (fieldSelector != null) {
                Document doc = searcher.doc(i, fieldSelector);
                for (Object x : doc.getFields()) {
                    Field f = (Field) x;
                    String name = f.name();
                    byte[] val = f.binaryValue();
                    ByteArrayInputStream bais = new ByteArrayInputStream(val); // TODO: reuse
                    DataInputStream in = new DataInputStream(bais); // TODO: reuse
                    JsonValue ival = serializer.read(in, null);
                    rec.add(new JsonString(name), ival);
                }
            }
            return true; // currentValue == rec
        }
    };
}

From source file:com.jaeksoft.searchlib.parser.HtmlParser.java

License:Open Source License

@Override
protected void parseContent(StreamLimiter streamLimiter, LanguageEnum forcedLang)
        throws IOException, SearchLibException {

    titleBoost = getFloatProperty(ClassPropertyEnum.TITLE_BOOST);
    boostTagMap = new TreeMap<String, BoostTag>();
    boostTagMap.put("h1", new BoostTag(ClassPropertyEnum.H1_BOOST));
    boostTagMap.put("h2", new BoostTag(ClassPropertyEnum.H2_BOOST));
    boostTagMap.put("h3", new BoostTag(ClassPropertyEnum.H3_BOOST));
    boostTagMap.put("h4", new BoostTag(ClassPropertyEnum.H4_BOOST));
    boostTagMap.put("h5", new BoostTag(ClassPropertyEnum.H5_BOOST));
    boostTagMap.put("h6", new BoostTag(ClassPropertyEnum.H6_BOOST));
    ignoreMetaNoIndex = getBooleanProperty(ClassPropertyEnum.IGNORE_META_NOINDEX);
    ignoreMetaNoFollow = getBooleanProperty(ClassPropertyEnum.IGNORE_META_NOFOLLOW);
    ignoreLinkNoFollow = getBooleanProperty(ClassPropertyEnum.IGNORE_LINK_NOFOLLOW);
    ignoreUntitledDocuments = getBooleanProperty(ClassPropertyEnum.IGNORE_UNTITLED_DOCUMENTS);
    ignoreNonCanonical = getBooleanProperty(ClassPropertyEnum.IGNORE_NON_CANONICAL);

    String currentCharset = null;
    String headerCharset = null;//  www  .  j a  v  a 2  s.  c o  m
    String detectedCharset = null;

    IndexDocument sourceDocument = getSourceDocument();

    if (sourceDocument != null) {
        FieldValueItem fieldValueItem = sourceDocument
                .getFieldValue(UrlItemFieldEnum.INSTANCE.contentTypeCharset.getName(), 0);
        if (fieldValueItem != null)
            headerCharset = fieldValueItem.getValue();
        if (headerCharset == null) {
            fieldValueItem = sourceDocument.getFieldValue(UrlItemFieldEnum.INSTANCE.contentEncoding.getName(),
                    0);
            if (fieldValueItem != null)
                headerCharset = fieldValueItem.getValue();
        }
        currentCharset = headerCharset;
    }

    if (currentCharset == null) {
        detectedCharset = streamLimiter.getDetectedCharset();
        currentCharset = detectedCharset;
    }

    if (currentCharset == null) {
        currentCharset = getProperty(ClassPropertyEnum.DEFAULT_CHARSET).getValue();
    }

    String xPathExclusions = getProperty(ClassPropertyEnum.XPATH_EXCLUSION).getValue();
    Set<Object> xPathExclusionsSet = null;
    if (!StringUtils.isEmpty(xPathExclusions))
        xPathExclusionsSet = new HashSet<Object>();

    HtmlParserEnum htmlParserEnum = HtmlParserEnum.find(getProperty(ClassPropertyEnum.HTML_PARSER).getValue());

    HtmlDocumentProvider htmlProvider = getHtmlDocumentProvider(htmlParserEnum, currentCharset, streamLimiter,
            xPathExclusions, xPathExclusionsSet);
    if (htmlProvider == null)
        return;

    URL currentURL = htmlProvider.getBaseHref();
    IndexDocument srcDoc = getSourceDocument();
    String streamOriginalUrl = streamLimiter.getOriginURL();
    try {
        if (currentURL == null && !StringUtils.isEmpty(streamOriginalUrl))
            currentURL = LinkUtils.newEncodedURL(streamOriginalUrl);
        if (currentURL == null && srcDoc != null) {
            FieldValueItem fvi = srcDoc.getFieldValue(UrlItemFieldEnum.INSTANCE.url.getName(), 0);
            if (fvi != null)
                currentURL = LinkUtils.newEncodedURL(fvi.getValue());
        }
    } catch (URISyntaxException e) {
        throw new IOException(e);
    }

    URL canonicalURL = htmlProvider.getCanonicalLink(currentURL);
    if (canonicalURL != null) {
        String canUrl = canonicalURL.toExternalForm();
        addDetectedLink(canUrl);
        if (ignoreNonCanonical) {
            String curUrl = currentURL.toExternalForm();
            if (!canUrl.equals(curUrl)) {
                isCanonical = false;
                return;
            }
        }
    }
    isCanonical = true;

    String title = htmlProvider.getTitle();
    if (ignoreUntitledDocuments)
        if (title == null || title.length() == 0)
            return;

    ParserResultItem result = getNewParserResultItem();

    addFieldTitle(result, title);

    result.addField(ParserFieldEnum.htmlProvider, htmlProvider.getName());

    // Check ContentType charset in meta http-equiv
    String metaCharset = htmlProvider.getMetaCharset();

    String selectedCharset = selectCharset(headerCharset, metaCharset, detectedCharset);

    if (selectedCharset != null) {
        if (!selectedCharset.equals(currentCharset)) {
            currentCharset = selectedCharset;
            htmlProvider = getHtmlDocumentProvider(htmlParserEnum, currentCharset, streamLimiter,
                    xPathExclusions, xPathExclusionsSet);
        }
    }

    StringWriter writer = new StringWriter();
    IOUtils.copy(streamLimiter.getNewInputStream(), writer, currentCharset);
    result.addField(ParserFieldEnum.htmlSource, writer.toString());
    writer.close();

    HtmlNodeAbstract<?> rootNode = htmlProvider.getRootNode();
    if (rootNode == null)
        return;

    for (HtmlNodeAbstract<?> metaNode : htmlProvider.getMetas()) {
        String metaName = metaNode.getAttributeText("name");
        if (metaName != null && metaName.startsWith(OPENSEARCHSERVER_FIELD)) {
            String field = metaName.substring(OPENSEARCHSERVER_FIELD_LENGTH);
            String[] fields = field.split("\\.");
            if (fields != null) {
                String content = metaNode.getAttributeText("content");
                result.addDirectFields(fields, content);
            }
        }
    }

    result.addField(ParserFieldEnum.charset, currentCharset);

    String metaRobots = null;

    String metaDcLanguage = null;

    String metaContentLanguage = null;

    for (HtmlNodeAbstract<?> node : htmlProvider.getMetas()) {
        String attr_name = node.getAttributeText("name");
        String attr_http_equiv = node.getAttributeText("http-equiv");
        if ("keywords".equalsIgnoreCase(attr_name))
            result.addField(ParserFieldEnum.meta_keywords, HtmlDocumentProvider.getMetaContent(node));
        else if ("description".equalsIgnoreCase(attr_name))
            result.addField(ParserFieldEnum.meta_description, HtmlDocumentProvider.getMetaContent(node));
        else if ("robots".equalsIgnoreCase(attr_name))
            metaRobots = HtmlDocumentProvider.getMetaContent(node);
        else if ("dc.language".equalsIgnoreCase(attr_name))
            metaDcLanguage = HtmlDocumentProvider.getMetaContent(node);
        else if ("content-language".equalsIgnoreCase(attr_http_equiv))
            metaContentLanguage = HtmlDocumentProvider.getMetaContent(node);
    }

    boolean metaRobotsFollow = true;
    boolean metaRobotsNoIndex = false;
    if (metaRobots != null) {
        metaRobots = metaRobots.toLowerCase();
        if (metaRobots.contains("noindex") && !ignoreMetaNoIndex) {
            metaRobotsNoIndex = true;
            result.addField(ParserFieldEnum.meta_robots, "noindex");
        }
        if (metaRobots.contains("nofollow") && !ignoreMetaNoFollow) {
            metaRobotsFollow = false;
            result.addField(ParserFieldEnum.meta_robots, "nofollow");
        }
    }

    UrlFilterItem[] urlFilterList = getUrlFilterList();

    boolean removeFragment = ClassPropertyEnum.KEEP_REMOVE_LIST[1]
            .equalsIgnoreCase(getProperty(ClassPropertyEnum.URL_FRAGMENT).getValue());

    List<HtmlNodeAbstract<?>> nodes = rootNode.getAllNodes("a", "frame", "img");
    if (srcDoc != null && nodes != null && metaRobotsFollow) {
        for (HtmlNodeAbstract<?> node : nodes) {
            String href = null;
            String rel = null;
            String nodeName = node.getNodeName();
            if ("a".equals(nodeName)) {
                href = node.getAttributeText("href");
                rel = node.getAttributeText("rel");
            } else if ("frame".equals(nodeName) || "img".equals(nodeName)) {
                href = node.getAttributeText("src");
            }
            boolean follow = true;
            if (rel != null)
                if (rel.contains("nofollow") && !ignoreLinkNoFollow)
                    follow = false;
            URL newUrl = null;
            if (href != null)
                if (!href.startsWith("javascript:"))
                    if (currentURL != null) {
                        href = StringEscapeUtils.unescapeXml(href);
                        newUrl = LinkUtils.getLink(currentURL, href, urlFilterList, removeFragment);
                    }
            if (newUrl != null) {
                ParserFieldEnum field = null;
                if (newUrl.getHost().equalsIgnoreCase(currentURL.getHost())) {
                    if (follow)
                        field = ParserFieldEnum.internal_link;
                    else
                        field = ParserFieldEnum.internal_link_nofollow;
                } else {
                    if (follow)
                        field = ParserFieldEnum.external_link;
                    else
                        field = ParserFieldEnum.external_link_nofollow;
                }
                String link = newUrl.toExternalForm();
                result.addField(field, link);
                if (follow)
                    addDetectedLink(link);
            }
        }
    }

    if (!metaRobotsNoIndex) {
        nodes = rootNode.getNodes("html", "body");
        if (nodes == null || nodes.size() == 0)
            nodes = rootNode.getNodes("html");
        if (nodes != null && nodes.size() > 0) {
            StringBuilder sb = new StringBuilder();
            getBodyTextContent(result, sb, nodes.get(0), true, null, 1024, xPathExclusionsSet);
            result.addField(ParserFieldEnum.body, sb);
        }
    }

    // Identification de la langue:
    Locale lang = null;
    String langMethod = null;
    String[] pathHtml = { "html" };
    nodes = rootNode.getNodes(pathHtml);
    if (nodes != null && nodes.size() > 0) {
        langMethod = "html lang attribute";
        String l = nodes.get(0).getAttributeText("lang");
        if (l != null)
            lang = Lang.findLocaleISO639(l);
    }
    if (lang == null && metaContentLanguage != null) {
        langMethod = "meta http-equiv content-language";
        lang = Lang.findLocaleISO639(metaContentLanguage);
    }
    if (lang == null && metaDcLanguage != null) {
        langMethod = "meta dc.language";
        lang = Lang.findLocaleISO639(metaDcLanguage);
    }

    if (lang != null) {
        result.addField(ParserFieldEnum.lang, lang.getLanguage());
        result.addField(ParserFieldEnum.lang_method, langMethod);
    } else if (!metaRobotsNoIndex)
        lang = result.langDetection(10000, ParserFieldEnum.body);

    if (getFieldMap().isMapped(ParserFieldEnum.generated_title)) {

        StringBuilder sb = new StringBuilder();
        try {
            if (!StringUtils.isEmpty(streamOriginalUrl))
                sb.append(new URI(streamOriginalUrl).getHost());
        } catch (URISyntaxException e) {
            Logging.error(e);
        }

        String generatedTitle = null;
        for (Map.Entry<String, BoostTag> entry : boostTagMap.entrySet()) {
            BoostTag boostTag = entry.getValue();
            if (boostTag.firstContent != null) {
                generatedTitle = boostTag.firstContent;
                break;
            }
        }

        if (generatedTitle == null) {
            final String FIELD_TITLE = "contents";

            MemoryIndex bodyMemoryIndex = new MemoryIndex();
            Analyzer bodyAnalyzer = new WhitespaceAnalyzer(Version.LUCENE_36);
            String bodyText = result.getMergedBodyText(100000, " ", ParserFieldEnum.body);
            bodyMemoryIndex.addField(FIELD_TITLE, bodyText, bodyAnalyzer);

            IndexSearcher indexSearcher = bodyMemoryIndex.createSearcher();
            IndexReader indexReader = indexSearcher.getIndexReader();
            MoreLikeThis mlt = new MoreLikeThis(indexReader);
            mlt.setAnalyzer(bodyAnalyzer);
            mlt.setFieldNames(new String[] { FIELD_TITLE });
            mlt.setMinWordLen(3);
            mlt.setMinTermFreq(1);
            mlt.setMinDocFreq(1);

            String[] words = mlt.retrieveInterestingTerms(0);
            if (words != null && words.length > 0)
                generatedTitle = words[0];
        }

        if (generatedTitle != null) {
            if (sb.length() > 0)
                sb.append(" - ");
            sb.append(generatedTitle);
        }

        if (sb.length() > 67) {
            int pos = sb.indexOf(" ", 60);
            if (pos == -1)
                pos = 67;
            sb.setLength(pos);
            sb.append("...");
        }
        result.addField(ParserFieldEnum.generated_title, sb.toString());
    }

}

From source file:com.joliciel.jochre.search.highlight.LuceneQueryHighlighter.java

License:Open Source License

public LuceneQueryHighlighter(JochreQuery jochreQuery, IndexSearcher indexSearcher) {
    try {//from w  w  w  .  j  a v a2s.co  m
        this.indexSearcher = indexSearcher;
        this.jochreQuery = jochreQuery;
        query = rewrite(jochreQuery.getLuceneQuery());
        queryTerms = new TreeSet<Term>();
        query.extractTerms(queryTerms);
        if (LOG.isTraceEnabled())
            queryTermList = new ArrayList<Term>(queryTerms);

        final IndexReader reader = indexSearcher.getIndexReader();
        // add 1 to doc count to ensure even terms in all docs get a very small weight
        docCountLog = Math.log(reader.numDocs() + 1);

        IndexReaderContext readerContext = reader.getContext();
        leaves = readerContext.leaves();

        // since the same terms might be contained in the query multiple times (e.g. once per field)
        // we only consider them once each by using a HashSet
        terms = new HashSet<BytesRef>();
        Map<BytesRef, Integer> termFreqs = new HashMap<BytesRef, Integer>();
        for (Term term : queryTerms) {
            terms.add(term.bytes());
            termFreqs.put(term.bytes(), 0);
        }

        termLogs = new HashMap<BytesRef, Double>();
        for (Term term : queryTerms) {
            int freq = termFreqs.get(term.bytes());
            freq += reader.docFreq(term);
            termFreqs.put(term.bytes(), freq);
        }
        for (BytesRef term : terms) {
            int freq = termFreqs.get(term);
            termLogs.put(term, Math.log(freq));
        }
    } catch (IOException e) {
        LogUtils.logError(LOG, e);
        throw new RuntimeException(e);
    }
}

From source file:com.leavesfly.lia.admin.SearcherManager.java

License:Apache License

public synchronized void release(IndexSearcher searcher) //G
        throws IOException {
    searcher.getIndexReader().decRef();
}