List of usage examples for org.apache.lucene.util.automaton RegExp RegExp
public RegExp(String s) throws IllegalArgumentException
RegExp from a string. From source file:com.github.flaxsearch.resources.TermsResource.java
License:Apache License
private TermsEnum getTermsEnum(Terms terms, String filter) throws IOException { if (filter == null) return terms.iterator(); CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(filter).toAutomaton()); return automaton.getTermsEnum(terms); }
From source file:io.crate.expression.operator.RegexpMatchOperator.java
License:Apache License
@Override public Boolean evaluate(Input<BytesRef>... args) { assert args.length == 2 : "invalid number of arguments"; BytesRef source = args[0].value();//from w w w . ja v a 2 s . c o m if (source == null) { return null; } BytesRef pattern = args[1].value(); if (pattern == null) { return null; } String sPattern = pattern.utf8ToString(); if (isPcrePattern(sPattern)) { return source.utf8ToString().matches(sPattern); } else { RegExp regexp = new RegExp(sPattern); ByteRunAutomaton regexpRunAutomaton = new ByteRunAutomaton(regexp.toAutomaton()); return regexpRunAutomaton.run(source.bytes, source.offset, source.length); } }
From source file:io.crate.operation.operator.RegexpMatchOperator.java
License:Apache License
@Override public Boolean evaluate(Input<BytesRef>... args) { assert args.length == 2 : "invalid number of arguments"; BytesRef source = args[0].value();//w ww. j av a 2s . c om if (source == null) { return null; } BytesRef pattern = args[1].value(); if (pattern == null) { return null; } if (isPcrePattern(pattern)) { return source.utf8ToString().matches(pattern.utf8ToString()); } else { RegExp regexp = new RegExp(pattern.utf8ToString()); ByteRunAutomaton regexpRunAutomaton = new ByteRunAutomaton(regexp.toAutomaton()); return regexpRunAutomaton.run(source.bytes, source.offset, source.length); } }
From source file:org.apache.solr.analysis.MockTokenFilterFactory.java
License:Apache License
/** Creates a new MockTokenizerFactory */ public MockTokenFilterFactory(Map<String, String> args) { super(args);/*from w ww . ja v a 2 s .c om*/ String stopset = get(args, "stopset", Arrays.asList("english", "empty"), null, false); String stopregex = get(args, "stopregex"); if (null != stopset) { if (null != stopregex) { throw new IllegalArgumentException("Parameters stopset and stopregex cannot both be specified."); } if ("english".equalsIgnoreCase(stopset)) { filter = MockTokenFilter.ENGLISH_STOPSET; } else { // must be "empty" filter = MockTokenFilter.EMPTY_STOPSET; } } else if (null != stopregex) { RegExp regex = new RegExp(stopregex); filter = new CharacterRunAutomaton(regex.toAutomaton()); } else { throw new IllegalArgumentException( "Configuration Error: either the 'stopset' or the 'stopregex' parameter must be specified."); } enablePositionIncrements = getBoolean(args, "enablePositionIncrements", true); if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } }
From source file:org.apache.solr.core.MockTokenFilterFactory.java
License:Apache License
/** Creates a new MockTokenizerFactory */ public MockTokenFilterFactory(Map<String, String> args) { super(args);/*w w w. ja va2 s . com*/ String stopset = get(args, "stopset", Arrays.asList("english", "empty"), null, false); String stopregex = get(args, "stopregex"); if (null != stopset) { if (null != stopregex) { throw new IllegalArgumentException("Parameters stopset and stopregex cannot both be specified."); } if ("english".equalsIgnoreCase(stopset)) { filter = MockTokenFilter.ENGLISH_STOPSET; } else { // must be "empty" filter = MockTokenFilter.EMPTY_STOPSET; } } else if (null != stopregex) { RegExp regex = new RegExp(stopregex); filter = new CharacterRunAutomaton(regex.toAutomaton()); } else { throw new IllegalArgumentException( "Configuration Error: either the 'stopset' or the 'stopregex' parameter must be specified."); } if (!args.isEmpty()) { throw new IllegalArgumentException("Unknown parameters: " + args); } }
From source file:org.codelibs.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude.java
License:Apache License
public IncludeExclude(String include, String exclude) { this(include == null ? null : new RegExp(include), exclude == null ? null : new RegExp(exclude)); }
From source file:org.codelibs.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude.java
License:Apache License
/** * Read from a stream.// www . jav a2 s. c om */ public IncludeExclude(StreamInput in) throws IOException { if (in.readBoolean()) { includeValues = null; excludeValues = null; incZeroBasedPartition = 0; incNumPartitions = 0; String includeString = in.readOptionalString(); include = includeString == null ? null : new RegExp(includeString); String excludeString = in.readOptionalString(); exclude = excludeString == null ? null : new RegExp(excludeString); return; } include = null; exclude = null; if (in.readBoolean()) { int size = in.readVInt(); includeValues = new TreeSet<>(); for (int i = 0; i < size; i++) { includeValues.add(in.readBytesRef()); } } else { includeValues = null; } if (in.readBoolean()) { int size = in.readVInt(); excludeValues = new TreeSet<>(); for (int i = 0; i < size; i++) { excludeValues.add(in.readBytesRef()); } } else { excludeValues = null; } if (in.getVersion().onOrAfter(Version.V_5_2_0_UNRELEASED)) { incNumPartitions = in.readVInt(); incZeroBasedPartition = in.readVInt(); } else { incNumPartitions = 0; incZeroBasedPartition = 0; } }
From source file:org.easynet.resource.queryparser.QueryParserTestBase.java
License:Apache License
public void testStopwords() throws Exception { CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton()); QueryParser qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet)); Query result = getQuery("field:the OR field:foo", qp); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 0, ((BooleanQuery) result).clauses().size() == 0); result = getQuery("field:woo OR field:the", qp); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a TermQuery", result instanceof TermQuery); result = getQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)", qp); assertNotNull("result is null and it shouldn't be", result); assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery); if (VERBOSE)//from w w w. j a v a 2 s. c o m System.out.println("Result: " + result); assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2, ((BooleanQuery) result).clauses().size() == 2); }
From source file:org.easynet.resource.queryparser.QueryParserTestBase.java
License:Apache License
public void testPhraseQueryPositionIncrements() throws Exception { CharacterRunAutomaton stopStopList = new CharacterRunAutomaton( new RegExp("[sS][tT][oO][pP]").toAutomaton()); QueryParser qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList)); qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList)); qp.setEnablePositionIncrements(true); PhraseQuery phraseQuery = new PhraseQuery(); phraseQuery.add(new Term("field", "1")); phraseQuery.add(new Term("field", "2"), 2); assertEquals(phraseQuery, getQuery("\"1 stop 2\"", qp)); }
From source file:org.elasticsearch.messy.tests.StringTermsTests.java
License:Apache License
public void testSingleValueFieldWithRegexFiltering() throws Exception { // include without exclude // we should be left with: val000, val001, val002, val003, val004, val005, val006, val007, val008, val009 SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type") .addAggregation(terms("terms").executionHint(randomExecutionHint()).field(SINGLE_VALUED_FIELD_NAME) .collectMode(randomFrom(SubAggCollectionMode.values())) .includeExclude(new IncludeExclude("val00.+", null))) .execute().actionGet();/* w w w .ja va2s . co m*/ assertSearchResponse(response); Terms terms = response.getAggregations().get("terms"); assertThat(terms, notNullValue()); assertThat(terms.getName(), equalTo("terms")); assertThat(terms.getBuckets().size(), equalTo(10)); for (int i = 0; i < 10; i++) { Terms.Bucket bucket = terms.getBucketByKey("val00" + i); assertThat(bucket, notNullValue()); assertThat(key(bucket), equalTo("val00" + i)); assertThat(bucket.getDocCount(), equalTo(1L)); } // include and exclude // we should be left with: val002, val003, val004, val005, val006, val007, val008, val009 response = client().prepareSearch("idx").setTypes("high_card_type") .addAggregation(terms("terms").executionHint(randomExecutionHint()).field(SINGLE_VALUED_FIELD_NAME) .collectMode(randomFrom(SubAggCollectionMode.values())) .includeExclude(new IncludeExclude("val00.+", "(val000|val001)"))) .execute().actionGet(); assertSearchResponse(response); terms = response.getAggregations().get("terms"); assertThat(terms, notNullValue()); assertThat(terms.getName(), equalTo("terms")); assertThat(terms.getBuckets().size(), equalTo(8)); for (int i = 2; i < 10; i++) { Terms.Bucket bucket = terms.getBucketByKey("val00" + i); assertThat(bucket, notNullValue()); assertThat(key(bucket), equalTo("val00" + i)); assertThat(bucket.getDocCount(), equalTo(1L)); } // exclude without include // we should be left with: val000, val001, val002, val003, val004, val005, val006, val007, val008, val009 response = client().prepareSearch("idx").setTypes("high_card_type") .addAggregation(terms("terms").executionHint(randomExecutionHint()).field(SINGLE_VALUED_FIELD_NAME) .collectMode(randomFrom(SubAggCollectionMode.values())) .includeExclude(new IncludeExclude(null, new RegExp("val0[1-9]+.+")))) .execute().actionGet(); assertSearchResponse(response); terms = response.getAggregations().get("terms"); assertThat(terms, notNullValue()); assertThat(terms.getName(), equalTo("terms")); assertThat(terms.getBuckets().size(), equalTo(10)); for (int i = 0; i < 10; i++) { Terms.Bucket bucket = terms.getBucketByKey("val00" + i); assertThat(bucket, notNullValue()); assertThat(key(bucket), equalTo("val00" + i)); assertThat(bucket.getDocCount(), equalTo(1L)); } }