Example usage for org.apache.lucene.util.automaton RegExp RegExp

List of usage examples for org.apache.lucene.util.automaton RegExp RegExp

Introduction

In this page you can find the example usage for org.apache.lucene.util.automaton RegExp RegExp.

Prototype

public RegExp(String s) throws IllegalArgumentException 

Source Link

Document

Constructs new RegExp from a string.

Usage

From source file:com.github.flaxsearch.resources.TermsResource.java

License:Apache License

private TermsEnum getTermsEnum(Terms terms, String filter) throws IOException {
    if (filter == null)
        return terms.iterator();

    CompiledAutomaton automaton = new CompiledAutomaton(new RegExp(filter).toAutomaton());
    return automaton.getTermsEnum(terms);
}

From source file:io.crate.expression.operator.RegexpMatchOperator.java

License:Apache License

@Override
public Boolean evaluate(Input<BytesRef>... args) {
    assert args.length == 2 : "invalid number of arguments";
    BytesRef source = args[0].value();//from w  w  w .  ja  v a  2  s  . c o  m
    if (source == null) {
        return null;
    }
    BytesRef pattern = args[1].value();
    if (pattern == null) {
        return null;
    }
    String sPattern = pattern.utf8ToString();
    if (isPcrePattern(sPattern)) {
        return source.utf8ToString().matches(sPattern);
    } else {
        RegExp regexp = new RegExp(sPattern);
        ByteRunAutomaton regexpRunAutomaton = new ByteRunAutomaton(regexp.toAutomaton());
        return regexpRunAutomaton.run(source.bytes, source.offset, source.length);
    }
}

From source file:io.crate.operation.operator.RegexpMatchOperator.java

License:Apache License

@Override
public Boolean evaluate(Input<BytesRef>... args) {
    assert args.length == 2 : "invalid number of arguments";
    BytesRef source = args[0].value();//w ww. j av a  2s . c  om
    if (source == null) {
        return null;
    }
    BytesRef pattern = args[1].value();
    if (pattern == null) {
        return null;
    }
    if (isPcrePattern(pattern)) {
        return source.utf8ToString().matches(pattern.utf8ToString());
    } else {
        RegExp regexp = new RegExp(pattern.utf8ToString());
        ByteRunAutomaton regexpRunAutomaton = new ByteRunAutomaton(regexp.toAutomaton());
        return regexpRunAutomaton.run(source.bytes, source.offset, source.length);
    }
}

From source file:org.apache.solr.analysis.MockTokenFilterFactory.java

License:Apache License

/** Creates a new MockTokenizerFactory */
public MockTokenFilterFactory(Map<String, String> args) {
    super(args);/*from  w  ww . ja  v a  2  s .c  om*/
    String stopset = get(args, "stopset", Arrays.asList("english", "empty"), null, false);
    String stopregex = get(args, "stopregex");
    if (null != stopset) {
        if (null != stopregex) {
            throw new IllegalArgumentException("Parameters stopset and stopregex cannot both be specified.");
        }
        if ("english".equalsIgnoreCase(stopset)) {
            filter = MockTokenFilter.ENGLISH_STOPSET;
        } else { // must be "empty"
            filter = MockTokenFilter.EMPTY_STOPSET;
        }
    } else if (null != stopregex) {
        RegExp regex = new RegExp(stopregex);
        filter = new CharacterRunAutomaton(regex.toAutomaton());
    } else {
        throw new IllegalArgumentException(
                "Configuration Error: either the 'stopset' or the 'stopregex' parameter must be specified.");
    }
    enablePositionIncrements = getBoolean(args, "enablePositionIncrements", true);
    if (!args.isEmpty()) {
        throw new IllegalArgumentException("Unknown parameters: " + args);
    }
}

From source file:org.apache.solr.core.MockTokenFilterFactory.java

License:Apache License

/** Creates a new MockTokenizerFactory */
public MockTokenFilterFactory(Map<String, String> args) {
    super(args);/*w w w. ja  va2  s .  com*/
    String stopset = get(args, "stopset", Arrays.asList("english", "empty"), null, false);
    String stopregex = get(args, "stopregex");
    if (null != stopset) {
        if (null != stopregex) {
            throw new IllegalArgumentException("Parameters stopset and stopregex cannot both be specified.");
        }
        if ("english".equalsIgnoreCase(stopset)) {
            filter = MockTokenFilter.ENGLISH_STOPSET;
        } else { // must be "empty"
            filter = MockTokenFilter.EMPTY_STOPSET;
        }
    } else if (null != stopregex) {
        RegExp regex = new RegExp(stopregex);
        filter = new CharacterRunAutomaton(regex.toAutomaton());
    } else {
        throw new IllegalArgumentException(
                "Configuration Error: either the 'stopset' or the 'stopregex' parameter must be specified.");
    }
    if (!args.isEmpty()) {
        throw new IllegalArgumentException("Unknown parameters: " + args);
    }
}

From source file:org.codelibs.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude.java

License:Apache License

public IncludeExclude(String include, String exclude) {
    this(include == null ? null : new RegExp(include), exclude == null ? null : new RegExp(exclude));
}

From source file:org.codelibs.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude.java

License:Apache License

/**
 * Read from a stream.//  www  .  jav a2 s.  c  om
 */
public IncludeExclude(StreamInput in) throws IOException {
    if (in.readBoolean()) {
        includeValues = null;
        excludeValues = null;
        incZeroBasedPartition = 0;
        incNumPartitions = 0;
        String includeString = in.readOptionalString();
        include = includeString == null ? null : new RegExp(includeString);
        String excludeString = in.readOptionalString();
        exclude = excludeString == null ? null : new RegExp(excludeString);
        return;
    }
    include = null;
    exclude = null;
    if (in.readBoolean()) {
        int size = in.readVInt();
        includeValues = new TreeSet<>();
        for (int i = 0; i < size; i++) {
            includeValues.add(in.readBytesRef());
        }
    } else {
        includeValues = null;
    }
    if (in.readBoolean()) {
        int size = in.readVInt();
        excludeValues = new TreeSet<>();
        for (int i = 0; i < size; i++) {
            excludeValues.add(in.readBytesRef());
        }
    } else {
        excludeValues = null;
    }
    if (in.getVersion().onOrAfter(Version.V_5_2_0_UNRELEASED)) {
        incNumPartitions = in.readVInt();
        incZeroBasedPartition = in.readVInt();
    } else {
        incNumPartitions = 0;
        incZeroBasedPartition = 0;
    }
}

From source file:org.easynet.resource.queryparser.QueryParserTestBase.java

License:Apache License

public void testStopwords() throws Exception {
    CharacterRunAutomaton stopSet = new CharacterRunAutomaton(new RegExp("the|foo").toAutomaton());
    QueryParser qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopSet));
    Query result = getQuery("field:the OR field:foo", qp);
    assertNotNull("result is null and it shouldn't be", result);
    assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
    assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 0,
            ((BooleanQuery) result).clauses().size() == 0);
    result = getQuery("field:woo OR field:the", qp);
    assertNotNull("result is null and it shouldn't be", result);
    assertTrue("result is not a TermQuery", result instanceof TermQuery);
    result = getQuery("(fieldX:xxxxx OR fieldy:xxxxxxxx)^2 AND (fieldx:the OR fieldy:foo)", qp);
    assertNotNull("result is null and it shouldn't be", result);
    assertTrue("result is not a BooleanQuery", result instanceof BooleanQuery);
    if (VERBOSE)//from w  w w. j  a v  a 2 s. c o  m
        System.out.println("Result: " + result);
    assertTrue(((BooleanQuery) result).clauses().size() + " does not equal: " + 2,
            ((BooleanQuery) result).clauses().size() == 2);
}

From source file:org.easynet.resource.queryparser.QueryParserTestBase.java

License:Apache License

public void testPhraseQueryPositionIncrements() throws Exception {
    CharacterRunAutomaton stopStopList = new CharacterRunAutomaton(
            new RegExp("[sS][tT][oO][pP]").toAutomaton());

    QueryParser qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList));

    qp = getParserConfig(new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false, stopStopList));
    qp.setEnablePositionIncrements(true);

    PhraseQuery phraseQuery = new PhraseQuery();
    phraseQuery.add(new Term("field", "1"));
    phraseQuery.add(new Term("field", "2"), 2);
    assertEquals(phraseQuery, getQuery("\"1 stop 2\"", qp));
}

From source file:org.elasticsearch.messy.tests.StringTermsTests.java

License:Apache License

public void testSingleValueFieldWithRegexFiltering() throws Exception {
    // include without exclude
    // we should be left with: val000, val001, val002, val003, val004, val005, val006, val007, val008, val009

    SearchResponse response = client().prepareSearch("idx").setTypes("high_card_type")
            .addAggregation(terms("terms").executionHint(randomExecutionHint()).field(SINGLE_VALUED_FIELD_NAME)
                    .collectMode(randomFrom(SubAggCollectionMode.values()))
                    .includeExclude(new IncludeExclude("val00.+", null)))
            .execute().actionGet();/*  w w w  .ja va2s  .  co  m*/

    assertSearchResponse(response);

    Terms terms = response.getAggregations().get("terms");
    assertThat(terms, notNullValue());
    assertThat(terms.getName(), equalTo("terms"));
    assertThat(terms.getBuckets().size(), equalTo(10));

    for (int i = 0; i < 10; i++) {
        Terms.Bucket bucket = terms.getBucketByKey("val00" + i);
        assertThat(bucket, notNullValue());
        assertThat(key(bucket), equalTo("val00" + i));
        assertThat(bucket.getDocCount(), equalTo(1L));
    }

    // include and exclude
    // we should be left with: val002, val003, val004, val005, val006, val007, val008, val009

    response = client().prepareSearch("idx").setTypes("high_card_type")
            .addAggregation(terms("terms").executionHint(randomExecutionHint()).field(SINGLE_VALUED_FIELD_NAME)
                    .collectMode(randomFrom(SubAggCollectionMode.values()))
                    .includeExclude(new IncludeExclude("val00.+", "(val000|val001)")))
            .execute().actionGet();

    assertSearchResponse(response);

    terms = response.getAggregations().get("terms");
    assertThat(terms, notNullValue());
    assertThat(terms.getName(), equalTo("terms"));
    assertThat(terms.getBuckets().size(), equalTo(8));

    for (int i = 2; i < 10; i++) {
        Terms.Bucket bucket = terms.getBucketByKey("val00" + i);
        assertThat(bucket, notNullValue());
        assertThat(key(bucket), equalTo("val00" + i));
        assertThat(bucket.getDocCount(), equalTo(1L));
    }

    // exclude without include
    // we should be left with: val000, val001, val002, val003, val004, val005, val006, val007, val008, val009

    response = client().prepareSearch("idx").setTypes("high_card_type")
            .addAggregation(terms("terms").executionHint(randomExecutionHint()).field(SINGLE_VALUED_FIELD_NAME)
                    .collectMode(randomFrom(SubAggCollectionMode.values()))
                    .includeExclude(new IncludeExclude(null, new RegExp("val0[1-9]+.+"))))
            .execute().actionGet();

    assertSearchResponse(response);

    terms = response.getAggregations().get("terms");
    assertThat(terms, notNullValue());
    assertThat(terms.getName(), equalTo("terms"));
    assertThat(terms.getBuckets().size(), equalTo(10));

    for (int i = 0; i < 10; i++) {
        Terms.Bucket bucket = terms.getBucketByKey("val00" + i);
        assertThat(bucket, notNullValue());
        assertThat(key(bucket), equalTo("val00" + i));
        assertThat(bucket.getDocCount(), equalTo(1L));
    }
}