List of usage examples for org.apache.lucene.util.automaton Automata makeString
public static Automaton makeString(String s)
From source file:net.yacy.search.query.QueryParams.java
License:Open Source License
public QueryParams(final QueryGoal queryGoal, final QueryModifier modifier, final int maxDistance, final String prefer, final ContentDomain contentdom, final String language, final int timezoneOffset, final Collection<Tagging.Metatag> metatags, final CacheStrategy snippetCacheStrategy, final int itemsPerPage, final int offset, final String urlMask, final String tld, final String inlink, final Searchdom domType, final Bitfield constraint, final boolean allofconstraint, final Set<String> siteexcludes, final int domainzone, final String host, final boolean specialRights, final Segment indexSegment, final RankingProfile ranking, final String userAgent, final boolean filterfailurls, final boolean filterscannerfail, final double lat, final double lon, final double radius, final String[] search_navigation) { this.queryGoal = queryGoal; this.modifier = modifier; this.ranking = ranking; this.maxDistance = maxDistance; this.contentdom = contentdom; this.timezoneOffset = timezoneOffset; this.itemsPerPage = Math.min((specialRights) ? 10000 : 1000, itemsPerPage); this.offset = Math.max(0, Math.min((specialRights) ? 10000 - this.itemsPerPage : 1000 - this.itemsPerPage, offset)); try {// w w w. ja v a 2 s . co m this.urlMaskString = urlMask; // solr doesn't like slashes, backslashes or doublepoints; remove them // urlmask = ".*\\." + ft + "(\\?.*)?"; int p; while ((p = this.urlMaskString.indexOf(':')) >= 0) this.urlMaskString = this.urlMaskString.substring(0, p) + "." + this.urlMaskString.substring(p + 1); while ((p = this.urlMaskString.indexOf('/')) >= 0) this.urlMaskString = this.urlMaskString.substring(0, p) + "." + this.urlMaskString.substring(p + 1); while ((p = this.urlMaskString.indexOf('\\')) >= 0) this.urlMaskString = this.urlMaskString.substring(0, p) + "." + this.urlMaskString.substring(p + 2); this.urlMaskAutomaton = Automata.makeString(this.urlMaskString); this.urlMaskPattern = Pattern.compile(this.urlMaskString); } catch (final Throwable ex) { throw new IllegalArgumentException("Not a valid regular expression: " + urlMask, ex); } this.urlMask_isCatchall = this.urlMaskString.equals(catchall_pattern.toString()); if (this.urlMask_isCatchall) { String protocolfilter = modifier.protocol == null ? ".*" : modifier.protocol; String defaulthostprefix = modifier.protocol == null ? "www" : modifier.protocol; String hostfilter = modifier.sitehost == null && tld == null ? ".*" : modifier.sitehost == null ? ".*\\." + tld : modifier.sitehost.startsWith(defaulthostprefix + ".") ? "(" + defaulthostprefix + "\\.)?" + modifier.sitehost.substring(4) : "(" + defaulthostprefix + "\\.)?" + modifier.sitehost; String filefilter = modifier.filetype == null ? ".*" : ".*" + modifier.filetype + ".*"; String filter = protocolfilter + "..." + hostfilter + "." + filefilter; if (!filter.equals(".*....*..*")) { Pattern r = Pattern.compile("(\\.|(\\.\\*))\\.\\*"); Matcher m; while ((m = r.matcher(filter)).find()) filter = m.replaceAll(".*"); this.urlMaskString = filter; this.urlMaskAutomaton = Automata.makeString(filter); this.urlMask_isCatchall = false; this.urlMaskPattern = Pattern.compile(filter); } } this.tld = tld; this.inlink = inlink; try { this.prefer = Pattern.compile(prefer); } catch (final PatternSyntaxException ex) { throw new IllegalArgumentException("Not a valid regular expression: " + prefer, ex); } this.prefer.toString().equals(matchnothing_pattern.toString()); assert language != null; this.targetlang = language; this.metatags = metatags; this.domType = domType; this.zonecode = domainzone; this.constraint = constraint; this.allofconstraint = allofconstraint; this.siteexcludes = siteexcludes != null && siteexcludes.isEmpty() ? null : siteexcludes; this.snippetCacheStrategy = snippetCacheStrategy; this.clienthost = host; this.remotepeer = null; this.starttime = Long.valueOf(System.currentTimeMillis()); this.maxtime = 10000; this.indexSegment = indexSegment; this.userAgent = userAgent; this.transmitcount = 0; this.filterfailurls = filterfailurls; this.filterscannerfail = filterscannerfail; // we normalize here the location and radius because that should cause a better caching // and as surplus it will increase privacy this.lat = Math.floor(lat * this.kmNormal) / this.kmNormal; this.lon = Math.floor(lon * this.kmNormal) / this.kmNormal; this.radius = Math.floor(radius * this.kmNormal + 1) / this.kmNormal; this.facetfields = new LinkedHashSet<String>(); this.solrSchema = indexSegment.fulltext().getDefaultConfiguration(); for (String navkey : search_navigation) { CollectionSchema f = defaultfacetfields.get(navkey); // handle special field, authors_sxt (add to facet w/o contains check, as authors_sxt is not enabled (is copyfield)) // dto. for coordinate_p_0_coordinate is not enabled but used for location facet (because coordinate_p not valid for facet field) if (f != null && (solrSchema.contains(f) || f.name().equals("author_sxt") || f.name().equals("coordinate_p_0_coordinate"))) this.facetfields.add(f.getSolrFieldName()); } if (LibraryProvider.autotagging != null) for (Tagging v : LibraryProvider.autotagging.getVocabularies()) { if (v.isFacet()) { this.facetfields.add(CollectionSchema.VOCABULARY_PREFIX + v.getName() + CollectionSchema.VOCABULARY_TERMS_SUFFIX); } } this.cachedQuery = null; }
From source file:org.codelibs.elasticsearch.common.regex.Regex.java
License:Apache License
/** Return an {Automaton} that matches the given pattern. */ public static Automaton simpleMatchToAutomaton(String pattern) { List<Automaton> automata = new ArrayList<>(); int previous = 0; for (int i = pattern.indexOf('*'); i != -1; i = pattern.indexOf('*', i + 1)) { automata.add(Automata.makeString(pattern.substring(previous, i))); automata.add(Automata.makeAnyString()); previous = i + 1;//from w ww.j ava 2 s.co m } automata.add(Automata.makeString(pattern.substring(previous))); return Operations.concatenate(automata); }
From source file:org.easynet.resource.queryparser.QueryParserTestBase.java
License:Apache License
public void testBoost() throws Exception { CharacterRunAutomaton stopWords = new CharacterRunAutomaton(Automata.makeString("on")); Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords); QueryParser qp = getParserConfig(oneStopAnalyzer); Query q = getQuery("on^1.0", qp); assertNotNull(q);//from w w w.j ava2 s .co m q = getQuery("\"hello\"^2.0", qp); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = getQuery("hello^2.0", qp); assertNotNull(q); assertEquals(q.getBoost(), (float) 2.0, (float) 0.5); q = getQuery("\"on\"^1.0", qp); assertNotNull(q); Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET); QueryParser qp2 = getParserConfig(a2); q = getQuery("the^3", qp2); // "the" is a stop word so the result is an empty query: assertNotNull(q); assertEquals("", q.toString()); assertEquals(1.0f, q.getBoost(), 0.01f); }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java
License:Open Source License
/** * test filtering two string fields/*w ww. ja v a 2 s .c o m*/ */ public void testIndexed() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(null); IndexWriter iw = new IndexWriter(dir, iwc); // add document with 2 fields Document doc = new Document(); doc.add(new StringField("fieldA", "test", Field.Store.NO)); doc.add(new StringField("fieldB", "test", Field.Store.NO)); iw.addDocument(doc); // open reader DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw), new CharacterRunAutomaton(Automata.makeString("fieldA"))); // see only one field LeafReader segmentReader = ir.leaves().get(0).reader(); Set<String> seenFields = new HashSet<>(); for (FieldInfo info : segmentReader.getFieldInfos()) { seenFields.add(info.name); } assertEquals(Collections.singleton("fieldA"), seenFields); assertNotNull(segmentReader.terms("fieldA")); assertNull(segmentReader.terms("fieldB")); TestUtil.checkReader(ir); IOUtils.close(ir, iw, dir); }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java
License:Open Source License
/** * test filtering two int points/* w ww . j ava 2 s . com*/ */ public void testPoints() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(null); IndexWriter iw = new IndexWriter(dir, iwc); // add document with 2 points Document doc = new Document(); doc.add(new IntPoint("fieldA", 1)); doc.add(new IntPoint("fieldB", 2)); iw.addDocument(doc); // open reader DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw), new CharacterRunAutomaton(Automata.makeString("fieldA"))); // see only one field LeafReader segmentReader = ir.leaves().get(0).reader(); PointValues points = segmentReader.getPointValues("fieldA"); assertNull(segmentReader.getPointValues("fieldB")); // size statistic assertEquals(1, points.size()); // doccount statistic assertEquals(1, points.getDocCount()); // min statistic assertNotNull(points.getMinPackedValue()); // max statistic assertNotNull(points.getMaxPackedValue()); // bytes per dimension assertEquals(Integer.BYTES, points.getBytesPerDimension()); // number of dimensions assertEquals(1, points.getNumDimensions()); // walk the trees: we should see stuff in fieldA AtomicBoolean sawDoc = new AtomicBoolean(false); points.intersect(new IntersectVisitor() { @Override public void visit(int docID) throws IOException { throw new IllegalStateException("should not get here"); } @Override public void visit(int docID, byte[] packedValue) throws IOException { sawDoc.set(true); } @Override public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { return Relation.CELL_CROSSES_QUERY; } }); assertTrue(sawDoc.get()); TestUtil.checkReader(ir); IOUtils.close(ir, iw, dir); }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java
License:Open Source License
/** * test filtering two stored fields (string) *///from w w w . ja va2 s . c o m public void testStoredFieldsString() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(null); IndexWriter iw = new IndexWriter(dir, iwc); // add document with 2 fields Document doc = new Document(); doc.add(new StoredField("fieldA", "testA")); doc.add(new StoredField("fieldB", "testB")); iw.addDocument(doc); // open reader DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw), new CharacterRunAutomaton(Automata.makeString("fieldA"))); // see only one field Document d2 = ir.document(0); assertEquals(1, d2.getFields().size()); assertEquals("testA", d2.get("fieldA")); TestUtil.checkReader(ir); IOUtils.close(ir, iw, dir); }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java
License:Open Source License
/** * test filtering two stored fields (binary) *///from www . j a v a 2 s . c o m public void testStoredFieldsBinary() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(null); IndexWriter iw = new IndexWriter(dir, iwc); // add document with 2 fields Document doc = new Document(); doc.add(new StoredField("fieldA", new BytesRef("testA"))); doc.add(new StoredField("fieldB", new BytesRef("testB"))); iw.addDocument(doc); // open reader DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw), new CharacterRunAutomaton(Automata.makeString("fieldA"))); // see only one field Document d2 = ir.document(0); assertEquals(1, d2.getFields().size()); assertEquals(new BytesRef("testA"), d2.getBinaryValue("fieldA")); TestUtil.checkReader(ir); IOUtils.close(ir, iw, dir); }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java
License:Open Source License
/** * test filtering two stored fields (int) *//*from w w w. j a v a 2s.c o m*/ public void testStoredFieldsInt() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(null); IndexWriter iw = new IndexWriter(dir, iwc); // add document with 2 fields Document doc = new Document(); doc.add(new StoredField("fieldA", 1)); doc.add(new StoredField("fieldB", 2)); iw.addDocument(doc); // open reader DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw), new CharacterRunAutomaton(Automata.makeString("fieldA"))); // see only one field Document d2 = ir.document(0); assertEquals(1, d2.getFields().size()); assertEquals(1, d2.getField("fieldA").numericValue()); TestUtil.checkReader(ir); IOUtils.close(ir, iw, dir); }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java
License:Open Source License
/** * test filtering two stored fields (long) *//*from ww w. ja v a 2s . com*/ public void testStoredFieldsLong() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(null); IndexWriter iw = new IndexWriter(dir, iwc); // add document with 2 fields Document doc = new Document(); doc.add(new StoredField("fieldA", 1L)); doc.add(new StoredField("fieldB", 2L)); iw.addDocument(doc); // open reader DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw), new CharacterRunAutomaton(Automata.makeString("fieldA"))); // see only one field Document d2 = ir.document(0); assertEquals(1, d2.getFields().size()); assertEquals(1L, d2.getField("fieldA").numericValue()); TestUtil.checkReader(ir); IOUtils.close(ir, iw, dir); }
From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java
License:Open Source License
/** * test filtering two stored fields (float) *///w w w .j a v a2 s . co m public void testStoredFieldsFloat() throws Exception { Directory dir = newDirectory(); IndexWriterConfig iwc = new IndexWriterConfig(null); IndexWriter iw = new IndexWriter(dir, iwc); // add document with 2 fields Document doc = new Document(); doc.add(new StoredField("fieldA", 1F)); doc.add(new StoredField("fieldB", 2F)); iw.addDocument(doc); // open reader DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw), new CharacterRunAutomaton(Automata.makeString("fieldA"))); // see only one field Document d2 = ir.document(0); assertEquals(1, d2.getFields().size()); assertEquals(1F, d2.getField("fieldA").numericValue()); TestUtil.checkReader(ir); IOUtils.close(ir, iw, dir); }