List of usage examples for org.apache.lucene.search IndexSearcher getIndexReader
public IndexReader getIndexReader()
From source file:com.google.gerrit.lucene.WrappableSearcherManager.java
License:Apache License
@Override protected void decRef(IndexSearcher reference) throws IOException { reference.getIndexReader().decRef(); }
From source file:com.google.gerrit.lucene.WrappableSearcherManager.java
License:Apache License
@Override protected IndexSearcher refreshIfNeeded(IndexSearcher referenceToRefresh) throws IOException { final IndexReader r = referenceToRefresh.getIndexReader(); assert r instanceof DirectoryReader : "searcher's IndexReader should be a DirectoryReader, but got " + r; final IndexReader newReader = DirectoryReader.openIfChanged((DirectoryReader) r); if (newReader == null) { return null; } else {//from w w w . j a v a 2 s . c o m return getSearcher(searcherFactory, newReader); } }
From source file:com.google.gerrit.lucene.WrappableSearcherManager.java
License:Apache License
@Override protected boolean tryIncRef(IndexSearcher reference) { return reference.getIndexReader().tryIncRef(); }
From source file:com.google.gerrit.lucene.WrappableSearcherManager.java
License:Apache License
@Override protected int getRefCount(IndexSearcher reference) { return reference.getIndexReader().getRefCount(); }
From source file:com.google.gerrit.lucene.WrappableSearcherManager.java
License:Apache License
/** * Returns <code>true</code> if no changes have occured since this searcher * ie. reader was opened, otherwise <code>false</code>. * @see DirectoryReader#isCurrent()//from ww w .jav a2s .c o m */ public boolean isSearcherCurrent() throws IOException { final IndexSearcher searcher = acquire(); try { final IndexReader r = searcher.getIndexReader(); assert r instanceof DirectoryReader : "searcher's IndexReader should be a DirectoryReader, but got " + r; return ((DirectoryReader) r).isCurrent(); } finally { release(searcher); } }
From source file:com.google.gerrit.lucene.WrappableSearcherManager.java
License:Apache License
/** Expert: creates a searcher from the provided {@link * IndexReader} using the provided {@link * SearcherFactory}. NOTE: this decRefs incoming reader * on throwing an exception. *///from w w w .j av a2s.co m @SuppressWarnings("resource") public static IndexSearcher getSearcher(SearcherFactory searcherFactory, IndexReader reader) throws IOException { boolean success = false; final IndexSearcher searcher; try { searcher = searcherFactory.newSearcher(reader, null); // Modification for Gerrit: Allow searcherFactory to transitively wrap the // provided reader. IndexReader unwrapped = searcher.getIndexReader(); while (true) { if (unwrapped == reader) { break; } else if (unwrapped instanceof FilterDirectoryReader) { unwrapped = ((FilterDirectoryReader) unwrapped).getDelegate(); } else if (unwrapped instanceof FilterLeafReader) { unwrapped = ((FilterLeafReader) unwrapped).getDelegate(); } else { break; } } if (unwrapped != reader) { throw new IllegalStateException("SearcherFactory must wrap the provided reader (got " + searcher.getIndexReader() + " but expected " + reader + ")"); } success = true; } finally { if (!success) { reader.decRef(); } } return searcher; }
From source file:com.ibm.jaql.lang.expr.index.ProbeLuceneFn.java
License:Apache License
@Override public JsonIterator iter(Context context) throws Exception { JsonRecord fd = (JsonRecord) exprs[0].eval(context); if (fd == null) { return JsonIterator.NULL; }// ww w . ja va2 s .co m JsonString loc = (JsonString) fd.get(new JsonString("location")); if (loc == null) { return JsonIterator.NULL; } JsonString jquery = (JsonString) exprs[1].eval(context); if (jquery == null) { return JsonIterator.NULL; } HashSet<String> fields = null; JsonIterator iter = exprs[2].iter(context); for (JsonValue sv : iter) { JsonString s = (JsonString) sv; if (s != null) { if (fields == null) { fields = new HashSet<String>(); } fields.add(s.toString()); } } final FieldSelector fieldSelector = (fields == null) ? null : new SetBasedFieldSelector(fields, new HashSet<String>()); final IndexSearcher searcher = new IndexSearcher(loc.toString()); Analyzer analyzer = new StandardAnalyzer(); QueryParser qp = new QueryParser("key", analyzer); Query query = qp.parse(jquery.toString()); query = searcher.rewrite(query); final Scorer scorer = query.weight(searcher).scorer(searcher.getIndexReader()); final BufferedJsonRecord rec = new BufferedJsonRecord(); final JsonString jdoc = new JsonString("doc"); final MutableJsonLong jdocid = new MutableJsonLong(); return new JsonIterator(rec) { @Override public boolean moveNext() throws Exception { if (!scorer.next()) { return false; } rec.clear(); int i = scorer.doc(); jdocid.set(i); rec.add(jdoc, jdocid); if (fieldSelector != null) { Document doc = searcher.doc(i, fieldSelector); for (Object x : doc.getFields()) { Field f = (Field) x; String name = f.name(); byte[] val = f.binaryValue(); ByteArrayInputStream bais = new ByteArrayInputStream(val); // TODO: reuse DataInputStream in = new DataInputStream(bais); // TODO: reuse JsonValue ival = serializer.read(in, null); rec.add(new JsonString(name), ival); } } return true; // currentValue == rec } }; }
From source file:com.jaeksoft.searchlib.parser.HtmlParser.java
License:Open Source License
@Override protected void parseContent(StreamLimiter streamLimiter, LanguageEnum forcedLang) throws IOException, SearchLibException { titleBoost = getFloatProperty(ClassPropertyEnum.TITLE_BOOST); boostTagMap = new TreeMap<String, BoostTag>(); boostTagMap.put("h1", new BoostTag(ClassPropertyEnum.H1_BOOST)); boostTagMap.put("h2", new BoostTag(ClassPropertyEnum.H2_BOOST)); boostTagMap.put("h3", new BoostTag(ClassPropertyEnum.H3_BOOST)); boostTagMap.put("h4", new BoostTag(ClassPropertyEnum.H4_BOOST)); boostTagMap.put("h5", new BoostTag(ClassPropertyEnum.H5_BOOST)); boostTagMap.put("h6", new BoostTag(ClassPropertyEnum.H6_BOOST)); ignoreMetaNoIndex = getBooleanProperty(ClassPropertyEnum.IGNORE_META_NOINDEX); ignoreMetaNoFollow = getBooleanProperty(ClassPropertyEnum.IGNORE_META_NOFOLLOW); ignoreLinkNoFollow = getBooleanProperty(ClassPropertyEnum.IGNORE_LINK_NOFOLLOW); ignoreUntitledDocuments = getBooleanProperty(ClassPropertyEnum.IGNORE_UNTITLED_DOCUMENTS); ignoreNonCanonical = getBooleanProperty(ClassPropertyEnum.IGNORE_NON_CANONICAL); String currentCharset = null; String headerCharset = null;// www . j a v a 2 s. c o m String detectedCharset = null; IndexDocument sourceDocument = getSourceDocument(); if (sourceDocument != null) { FieldValueItem fieldValueItem = sourceDocument .getFieldValue(UrlItemFieldEnum.INSTANCE.contentTypeCharset.getName(), 0); if (fieldValueItem != null) headerCharset = fieldValueItem.getValue(); if (headerCharset == null) { fieldValueItem = sourceDocument.getFieldValue(UrlItemFieldEnum.INSTANCE.contentEncoding.getName(), 0); if (fieldValueItem != null) headerCharset = fieldValueItem.getValue(); } currentCharset = headerCharset; } if (currentCharset == null) { detectedCharset = streamLimiter.getDetectedCharset(); currentCharset = detectedCharset; } if (currentCharset == null) { currentCharset = getProperty(ClassPropertyEnum.DEFAULT_CHARSET).getValue(); } String xPathExclusions = getProperty(ClassPropertyEnum.XPATH_EXCLUSION).getValue(); Set<Object> xPathExclusionsSet = null; if (!StringUtils.isEmpty(xPathExclusions)) xPathExclusionsSet = new HashSet<Object>(); HtmlParserEnum htmlParserEnum = HtmlParserEnum.find(getProperty(ClassPropertyEnum.HTML_PARSER).getValue()); HtmlDocumentProvider htmlProvider = getHtmlDocumentProvider(htmlParserEnum, currentCharset, streamLimiter, xPathExclusions, xPathExclusionsSet); if (htmlProvider == null) return; URL currentURL = htmlProvider.getBaseHref(); IndexDocument srcDoc = getSourceDocument(); String streamOriginalUrl = streamLimiter.getOriginURL(); try { if (currentURL == null && !StringUtils.isEmpty(streamOriginalUrl)) currentURL = LinkUtils.newEncodedURL(streamOriginalUrl); if (currentURL == null && srcDoc != null) { FieldValueItem fvi = srcDoc.getFieldValue(UrlItemFieldEnum.INSTANCE.url.getName(), 0); if (fvi != null) currentURL = LinkUtils.newEncodedURL(fvi.getValue()); } } catch (URISyntaxException e) { throw new IOException(e); } URL canonicalURL = htmlProvider.getCanonicalLink(currentURL); if (canonicalURL != null) { String canUrl = canonicalURL.toExternalForm(); addDetectedLink(canUrl); if (ignoreNonCanonical) { String curUrl = currentURL.toExternalForm(); if (!canUrl.equals(curUrl)) { isCanonical = false; return; } } } isCanonical = true; String title = htmlProvider.getTitle(); if (ignoreUntitledDocuments) if (title == null || title.length() == 0) return; ParserResultItem result = getNewParserResultItem(); addFieldTitle(result, title); result.addField(ParserFieldEnum.htmlProvider, htmlProvider.getName()); // Check ContentType charset in meta http-equiv String metaCharset = htmlProvider.getMetaCharset(); String selectedCharset = selectCharset(headerCharset, metaCharset, detectedCharset); if (selectedCharset != null) { if (!selectedCharset.equals(currentCharset)) { currentCharset = selectedCharset; htmlProvider = getHtmlDocumentProvider(htmlParserEnum, currentCharset, streamLimiter, xPathExclusions, xPathExclusionsSet); } } StringWriter writer = new StringWriter(); IOUtils.copy(streamLimiter.getNewInputStream(), writer, currentCharset); result.addField(ParserFieldEnum.htmlSource, writer.toString()); writer.close(); HtmlNodeAbstract<?> rootNode = htmlProvider.getRootNode(); if (rootNode == null) return; for (HtmlNodeAbstract<?> metaNode : htmlProvider.getMetas()) { String metaName = metaNode.getAttributeText("name"); if (metaName != null && metaName.startsWith(OPENSEARCHSERVER_FIELD)) { String field = metaName.substring(OPENSEARCHSERVER_FIELD_LENGTH); String[] fields = field.split("\\."); if (fields != null) { String content = metaNode.getAttributeText("content"); result.addDirectFields(fields, content); } } } result.addField(ParserFieldEnum.charset, currentCharset); String metaRobots = null; String metaDcLanguage = null; String metaContentLanguage = null; for (HtmlNodeAbstract<?> node : htmlProvider.getMetas()) { String attr_name = node.getAttributeText("name"); String attr_http_equiv = node.getAttributeText("http-equiv"); if ("keywords".equalsIgnoreCase(attr_name)) result.addField(ParserFieldEnum.meta_keywords, HtmlDocumentProvider.getMetaContent(node)); else if ("description".equalsIgnoreCase(attr_name)) result.addField(ParserFieldEnum.meta_description, HtmlDocumentProvider.getMetaContent(node)); else if ("robots".equalsIgnoreCase(attr_name)) metaRobots = HtmlDocumentProvider.getMetaContent(node); else if ("dc.language".equalsIgnoreCase(attr_name)) metaDcLanguage = HtmlDocumentProvider.getMetaContent(node); else if ("content-language".equalsIgnoreCase(attr_http_equiv)) metaContentLanguage = HtmlDocumentProvider.getMetaContent(node); } boolean metaRobotsFollow = true; boolean metaRobotsNoIndex = false; if (metaRobots != null) { metaRobots = metaRobots.toLowerCase(); if (metaRobots.contains("noindex") && !ignoreMetaNoIndex) { metaRobotsNoIndex = true; result.addField(ParserFieldEnum.meta_robots, "noindex"); } if (metaRobots.contains("nofollow") && !ignoreMetaNoFollow) { metaRobotsFollow = false; result.addField(ParserFieldEnum.meta_robots, "nofollow"); } } UrlFilterItem[] urlFilterList = getUrlFilterList(); boolean removeFragment = ClassPropertyEnum.KEEP_REMOVE_LIST[1] .equalsIgnoreCase(getProperty(ClassPropertyEnum.URL_FRAGMENT).getValue()); List<HtmlNodeAbstract<?>> nodes = rootNode.getAllNodes("a", "frame", "img"); if (srcDoc != null && nodes != null && metaRobotsFollow) { for (HtmlNodeAbstract<?> node : nodes) { String href = null; String rel = null; String nodeName = node.getNodeName(); if ("a".equals(nodeName)) { href = node.getAttributeText("href"); rel = node.getAttributeText("rel"); } else if ("frame".equals(nodeName) || "img".equals(nodeName)) { href = node.getAttributeText("src"); } boolean follow = true; if (rel != null) if (rel.contains("nofollow") && !ignoreLinkNoFollow) follow = false; URL newUrl = null; if (href != null) if (!href.startsWith("javascript:")) if (currentURL != null) { href = StringEscapeUtils.unescapeXml(href); newUrl = LinkUtils.getLink(currentURL, href, urlFilterList, removeFragment); } if (newUrl != null) { ParserFieldEnum field = null; if (newUrl.getHost().equalsIgnoreCase(currentURL.getHost())) { if (follow) field = ParserFieldEnum.internal_link; else field = ParserFieldEnum.internal_link_nofollow; } else { if (follow) field = ParserFieldEnum.external_link; else field = ParserFieldEnum.external_link_nofollow; } String link = newUrl.toExternalForm(); result.addField(field, link); if (follow) addDetectedLink(link); } } } if (!metaRobotsNoIndex) { nodes = rootNode.getNodes("html", "body"); if (nodes == null || nodes.size() == 0) nodes = rootNode.getNodes("html"); if (nodes != null && nodes.size() > 0) { StringBuilder sb = new StringBuilder(); getBodyTextContent(result, sb, nodes.get(0), true, null, 1024, xPathExclusionsSet); result.addField(ParserFieldEnum.body, sb); } } // Identification de la langue: Locale lang = null; String langMethod = null; String[] pathHtml = { "html" }; nodes = rootNode.getNodes(pathHtml); if (nodes != null && nodes.size() > 0) { langMethod = "html lang attribute"; String l = nodes.get(0).getAttributeText("lang"); if (l != null) lang = Lang.findLocaleISO639(l); } if (lang == null && metaContentLanguage != null) { langMethod = "meta http-equiv content-language"; lang = Lang.findLocaleISO639(metaContentLanguage); } if (lang == null && metaDcLanguage != null) { langMethod = "meta dc.language"; lang = Lang.findLocaleISO639(metaDcLanguage); } if (lang != null) { result.addField(ParserFieldEnum.lang, lang.getLanguage()); result.addField(ParserFieldEnum.lang_method, langMethod); } else if (!metaRobotsNoIndex) lang = result.langDetection(10000, ParserFieldEnum.body); if (getFieldMap().isMapped(ParserFieldEnum.generated_title)) { StringBuilder sb = new StringBuilder(); try { if (!StringUtils.isEmpty(streamOriginalUrl)) sb.append(new URI(streamOriginalUrl).getHost()); } catch (URISyntaxException e) { Logging.error(e); } String generatedTitle = null; for (Map.Entry<String, BoostTag> entry : boostTagMap.entrySet()) { BoostTag boostTag = entry.getValue(); if (boostTag.firstContent != null) { generatedTitle = boostTag.firstContent; break; } } if (generatedTitle == null) { final String FIELD_TITLE = "contents"; MemoryIndex bodyMemoryIndex = new MemoryIndex(); Analyzer bodyAnalyzer = new WhitespaceAnalyzer(Version.LUCENE_36); String bodyText = result.getMergedBodyText(100000, " ", ParserFieldEnum.body); bodyMemoryIndex.addField(FIELD_TITLE, bodyText, bodyAnalyzer); IndexSearcher indexSearcher = bodyMemoryIndex.createSearcher(); IndexReader indexReader = indexSearcher.getIndexReader(); MoreLikeThis mlt = new MoreLikeThis(indexReader); mlt.setAnalyzer(bodyAnalyzer); mlt.setFieldNames(new String[] { FIELD_TITLE }); mlt.setMinWordLen(3); mlt.setMinTermFreq(1); mlt.setMinDocFreq(1); String[] words = mlt.retrieveInterestingTerms(0); if (words != null && words.length > 0) generatedTitle = words[0]; } if (generatedTitle != null) { if (sb.length() > 0) sb.append(" - "); sb.append(generatedTitle); } if (sb.length() > 67) { int pos = sb.indexOf(" ", 60); if (pos == -1) pos = 67; sb.setLength(pos); sb.append("..."); } result.addField(ParserFieldEnum.generated_title, sb.toString()); } }
From source file:com.joliciel.jochre.search.highlight.LuceneQueryHighlighter.java
License:Open Source License
public LuceneQueryHighlighter(JochreQuery jochreQuery, IndexSearcher indexSearcher) { try {//from w w w . j a v a2s.co m this.indexSearcher = indexSearcher; this.jochreQuery = jochreQuery; query = rewrite(jochreQuery.getLuceneQuery()); queryTerms = new TreeSet<Term>(); query.extractTerms(queryTerms); if (LOG.isTraceEnabled()) queryTermList = new ArrayList<Term>(queryTerms); final IndexReader reader = indexSearcher.getIndexReader(); // add 1 to doc count to ensure even terms in all docs get a very small weight docCountLog = Math.log(reader.numDocs() + 1); IndexReaderContext readerContext = reader.getContext(); leaves = readerContext.leaves(); // since the same terms might be contained in the query multiple times (e.g. once per field) // we only consider them once each by using a HashSet terms = new HashSet<BytesRef>(); Map<BytesRef, Integer> termFreqs = new HashMap<BytesRef, Integer>(); for (Term term : queryTerms) { terms.add(term.bytes()); termFreqs.put(term.bytes(), 0); } termLogs = new HashMap<BytesRef, Double>(); for (Term term : queryTerms) { int freq = termFreqs.get(term.bytes()); freq += reader.docFreq(term); termFreqs.put(term.bytes(), freq); } for (BytesRef term : terms) { int freq = termFreqs.get(term); termLogs.put(term, Math.log(freq)); } } catch (IOException e) { LogUtils.logError(LOG, e); throw new RuntimeException(e); } }
From source file:com.leavesfly.lia.admin.SearcherManager.java
License:Apache License
public synchronized void release(IndexSearcher searcher) //G throws IOException { searcher.getIndexReader().decRef(); }