Example usage for org.apache.lucene.util.automaton Automata makeString

List of usage examples for org.apache.lucene.util.automaton Automata makeString

Introduction

In this page you can find the example usage for org.apache.lucene.util.automaton Automata makeString.

Prototype

public static Automaton makeString(String s) 

Source Link

Document

Returns a new (deterministic) automaton that accepts the single given string.

Usage

From source file:net.yacy.search.query.QueryParams.java

License:Open Source License

public QueryParams(final QueryGoal queryGoal, final QueryModifier modifier, final int maxDistance,
        final String prefer, final ContentDomain contentdom, final String language, final int timezoneOffset,
        final Collection<Tagging.Metatag> metatags, final CacheStrategy snippetCacheStrategy,
        final int itemsPerPage, final int offset, final String urlMask, final String tld, final String inlink,
        final Searchdom domType, final Bitfield constraint, final boolean allofconstraint,
        final Set<String> siteexcludes, final int domainzone, final String host, final boolean specialRights,
        final Segment indexSegment, final RankingProfile ranking, final String userAgent,
        final boolean filterfailurls, final boolean filterscannerfail, final double lat, final double lon,
        final double radius, final String[] search_navigation) {
    this.queryGoal = queryGoal;
    this.modifier = modifier;
    this.ranking = ranking;
    this.maxDistance = maxDistance;
    this.contentdom = contentdom;
    this.timezoneOffset = timezoneOffset;
    this.itemsPerPage = Math.min((specialRights) ? 10000 : 1000, itemsPerPage);
    this.offset = Math.max(0,
            Math.min((specialRights) ? 10000 - this.itemsPerPage : 1000 - this.itemsPerPage, offset));
    try {// w w  w. ja  v a 2  s . co m
        this.urlMaskString = urlMask;
        // solr doesn't like slashes, backslashes or doublepoints; remove them // urlmask = ".*\\." + ft + "(\\?.*)?";
        int p;
        while ((p = this.urlMaskString.indexOf(':')) >= 0)
            this.urlMaskString = this.urlMaskString.substring(0, p) + "." + this.urlMaskString.substring(p + 1);
        while ((p = this.urlMaskString.indexOf('/')) >= 0)
            this.urlMaskString = this.urlMaskString.substring(0, p) + "." + this.urlMaskString.substring(p + 1);
        while ((p = this.urlMaskString.indexOf('\\')) >= 0)
            this.urlMaskString = this.urlMaskString.substring(0, p) + "." + this.urlMaskString.substring(p + 2);
        this.urlMaskAutomaton = Automata.makeString(this.urlMaskString);
        this.urlMaskPattern = Pattern.compile(this.urlMaskString);
    } catch (final Throwable ex) {
        throw new IllegalArgumentException("Not a valid regular expression: " + urlMask, ex);
    }
    this.urlMask_isCatchall = this.urlMaskString.equals(catchall_pattern.toString());
    if (this.urlMask_isCatchall) {
        String protocolfilter = modifier.protocol == null ? ".*" : modifier.protocol;
        String defaulthostprefix = modifier.protocol == null ? "www" : modifier.protocol;
        String hostfilter = modifier.sitehost == null && tld == null ? ".*"
                : modifier.sitehost == null ? ".*\\." + tld
                        : modifier.sitehost.startsWith(defaulthostprefix + ".")
                                ? "(" + defaulthostprefix + "\\.)?" + modifier.sitehost.substring(4)
                                : "(" + defaulthostprefix + "\\.)?" + modifier.sitehost;
        String filefilter = modifier.filetype == null ? ".*" : ".*" + modifier.filetype + ".*";
        String filter = protocolfilter + "..." + hostfilter + "." + filefilter;
        if (!filter.equals(".*....*..*")) {
            Pattern r = Pattern.compile("(\\.|(\\.\\*))\\.\\*");
            Matcher m;
            while ((m = r.matcher(filter)).find())
                filter = m.replaceAll(".*");
            this.urlMaskString = filter;
            this.urlMaskAutomaton = Automata.makeString(filter);
            this.urlMask_isCatchall = false;
            this.urlMaskPattern = Pattern.compile(filter);
        }
    }
    this.tld = tld;
    this.inlink = inlink;
    try {
        this.prefer = Pattern.compile(prefer);
    } catch (final PatternSyntaxException ex) {
        throw new IllegalArgumentException("Not a valid regular expression: " + prefer, ex);
    }
    this.prefer.toString().equals(matchnothing_pattern.toString());
    assert language != null;
    this.targetlang = language;
    this.metatags = metatags;
    this.domType = domType;
    this.zonecode = domainzone;
    this.constraint = constraint;
    this.allofconstraint = allofconstraint;
    this.siteexcludes = siteexcludes != null && siteexcludes.isEmpty() ? null : siteexcludes;
    this.snippetCacheStrategy = snippetCacheStrategy;
    this.clienthost = host;
    this.remotepeer = null;
    this.starttime = Long.valueOf(System.currentTimeMillis());
    this.maxtime = 10000;
    this.indexSegment = indexSegment;
    this.userAgent = userAgent;
    this.transmitcount = 0;
    this.filterfailurls = filterfailurls;
    this.filterscannerfail = filterscannerfail;
    // we normalize here the location and radius because that should cause a better caching
    // and as surplus it will increase privacy
    this.lat = Math.floor(lat * this.kmNormal) / this.kmNormal;
    this.lon = Math.floor(lon * this.kmNormal) / this.kmNormal;
    this.radius = Math.floor(radius * this.kmNormal + 1) / this.kmNormal;
    this.facetfields = new LinkedHashSet<String>();

    this.solrSchema = indexSegment.fulltext().getDefaultConfiguration();
    for (String navkey : search_navigation) {
        CollectionSchema f = defaultfacetfields.get(navkey);
        // handle special field, authors_sxt (add to facet w/o contains check, as authors_sxt is not enabled (is copyfield))
        // dto. for coordinate_p_0_coordinate is not enabled but used for location facet (because coordinate_p not valid for facet field)
        if (f != null && (solrSchema.contains(f) || f.name().equals("author_sxt")
                || f.name().equals("coordinate_p_0_coordinate")))
            this.facetfields.add(f.getSolrFieldName());
    }
    if (LibraryProvider.autotagging != null)
        for (Tagging v : LibraryProvider.autotagging.getVocabularies()) {
            if (v.isFacet()) {
                this.facetfields.add(CollectionSchema.VOCABULARY_PREFIX + v.getName()
                        + CollectionSchema.VOCABULARY_TERMS_SUFFIX);
            }
        }
    this.cachedQuery = null;
}

From source file:org.codelibs.elasticsearch.common.regex.Regex.java

License:Apache License

/** Return an {Automaton} that matches the given pattern. */
public static Automaton simpleMatchToAutomaton(String pattern) {
    List<Automaton> automata = new ArrayList<>();
    int previous = 0;
    for (int i = pattern.indexOf('*'); i != -1; i = pattern.indexOf('*', i + 1)) {
        automata.add(Automata.makeString(pattern.substring(previous, i)));
        automata.add(Automata.makeAnyString());
        previous = i + 1;//from w ww.j ava  2  s.co m
    }
    automata.add(Automata.makeString(pattern.substring(previous)));
    return Operations.concatenate(automata);
}

From source file:org.easynet.resource.queryparser.QueryParserTestBase.java

License:Apache License

public void testBoost() throws Exception {
    CharacterRunAutomaton stopWords = new CharacterRunAutomaton(Automata.makeString("on"));
    Analyzer oneStopAnalyzer = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, stopWords);
    QueryParser qp = getParserConfig(oneStopAnalyzer);
    Query q = getQuery("on^1.0", qp);
    assertNotNull(q);//from  w w  w.j  ava2 s .co  m
    q = getQuery("\"hello\"^2.0", qp);
    assertNotNull(q);
    assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
    q = getQuery("hello^2.0", qp);
    assertNotNull(q);
    assertEquals(q.getBoost(), (float) 2.0, (float) 0.5);
    q = getQuery("\"on\"^1.0", qp);
    assertNotNull(q);

    Analyzer a2 = new MockAnalyzer(random(), MockTokenizer.SIMPLE, true, MockTokenFilter.ENGLISH_STOPSET);
    QueryParser qp2 = getParserConfig(a2);
    q = getQuery("the^3", qp2);
    // "the" is a stop word so the result is an empty query:
    assertNotNull(q);
    assertEquals("", q.toString());
    assertEquals(1.0f, q.getBoost(), 0.01f);
}

From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java

License:Open Source License

/**
 * test filtering two string fields/*w ww.  ja  v a  2  s .c o  m*/
 */
public void testIndexed() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    IndexWriter iw = new IndexWriter(dir, iwc);

    // add document with 2 fields
    Document doc = new Document();
    doc.add(new StringField("fieldA", "test", Field.Store.NO));
    doc.add(new StringField("fieldB", "test", Field.Store.NO));
    iw.addDocument(doc);

    // open reader
    DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw),
            new CharacterRunAutomaton(Automata.makeString("fieldA")));

    // see only one field
    LeafReader segmentReader = ir.leaves().get(0).reader();
    Set<String> seenFields = new HashSet<>();
    for (FieldInfo info : segmentReader.getFieldInfos()) {
        seenFields.add(info.name);
    }
    assertEquals(Collections.singleton("fieldA"), seenFields);
    assertNotNull(segmentReader.terms("fieldA"));
    assertNull(segmentReader.terms("fieldB"));

    TestUtil.checkReader(ir);
    IOUtils.close(ir, iw, dir);
}

From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java

License:Open Source License

/**
 * test filtering two int points/* w  ww  . j ava  2 s . com*/
 */
public void testPoints() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    IndexWriter iw = new IndexWriter(dir, iwc);

    // add document with 2 points
    Document doc = new Document();
    doc.add(new IntPoint("fieldA", 1));
    doc.add(new IntPoint("fieldB", 2));
    iw.addDocument(doc);

    // open reader
    DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw),
            new CharacterRunAutomaton(Automata.makeString("fieldA")));

    // see only one field
    LeafReader segmentReader = ir.leaves().get(0).reader();
    PointValues points = segmentReader.getPointValues("fieldA");
    assertNull(segmentReader.getPointValues("fieldB"));

    // size statistic
    assertEquals(1, points.size());

    // doccount statistic
    assertEquals(1, points.getDocCount());

    // min statistic
    assertNotNull(points.getMinPackedValue());

    // max statistic
    assertNotNull(points.getMaxPackedValue());

    // bytes per dimension
    assertEquals(Integer.BYTES, points.getBytesPerDimension());

    // number of dimensions
    assertEquals(1, points.getNumDimensions());

    // walk the trees: we should see stuff in fieldA
    AtomicBoolean sawDoc = new AtomicBoolean(false);
    points.intersect(new IntersectVisitor() {
        @Override
        public void visit(int docID) throws IOException {
            throw new IllegalStateException("should not get here");
        }

        @Override
        public void visit(int docID, byte[] packedValue) throws IOException {
            sawDoc.set(true);
        }

        @Override
        public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
            return Relation.CELL_CROSSES_QUERY;
        }
    });
    assertTrue(sawDoc.get());

    TestUtil.checkReader(ir);
    IOUtils.close(ir, iw, dir);
}

From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java

License:Open Source License

/**
 * test filtering two stored fields (string)
 *///from  w w w  .  ja va2 s . c o  m
public void testStoredFieldsString() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    IndexWriter iw = new IndexWriter(dir, iwc);

    // add document with 2 fields
    Document doc = new Document();
    doc.add(new StoredField("fieldA", "testA"));
    doc.add(new StoredField("fieldB", "testB"));
    iw.addDocument(doc);

    // open reader
    DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw),
            new CharacterRunAutomaton(Automata.makeString("fieldA")));

    // see only one field
    Document d2 = ir.document(0);
    assertEquals(1, d2.getFields().size());
    assertEquals("testA", d2.get("fieldA"));

    TestUtil.checkReader(ir);
    IOUtils.close(ir, iw, dir);
}

From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java

License:Open Source License

/**
 * test filtering two stored fields (binary)
 *///from www  . j  a v  a  2  s  . c o  m
public void testStoredFieldsBinary() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    IndexWriter iw = new IndexWriter(dir, iwc);

    // add document with 2 fields
    Document doc = new Document();
    doc.add(new StoredField("fieldA", new BytesRef("testA")));
    doc.add(new StoredField("fieldB", new BytesRef("testB")));
    iw.addDocument(doc);

    // open reader
    DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw),
            new CharacterRunAutomaton(Automata.makeString("fieldA")));

    // see only one field
    Document d2 = ir.document(0);
    assertEquals(1, d2.getFields().size());
    assertEquals(new BytesRef("testA"), d2.getBinaryValue("fieldA"));

    TestUtil.checkReader(ir);
    IOUtils.close(ir, iw, dir);
}

From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java

License:Open Source License

/**
 * test filtering two stored fields (int)
 *//*from w  w w. j  a  v a  2s.c  o m*/
public void testStoredFieldsInt() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    IndexWriter iw = new IndexWriter(dir, iwc);

    // add document with 2 fields
    Document doc = new Document();
    doc.add(new StoredField("fieldA", 1));
    doc.add(new StoredField("fieldB", 2));
    iw.addDocument(doc);

    // open reader
    DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw),
            new CharacterRunAutomaton(Automata.makeString("fieldA")));

    // see only one field
    Document d2 = ir.document(0);
    assertEquals(1, d2.getFields().size());
    assertEquals(1, d2.getField("fieldA").numericValue());

    TestUtil.checkReader(ir);
    IOUtils.close(ir, iw, dir);
}

From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java

License:Open Source License

/**
 * test filtering two stored fields (long)
 *//*from   ww w. ja v a 2s .  com*/
public void testStoredFieldsLong() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    IndexWriter iw = new IndexWriter(dir, iwc);

    // add document with 2 fields
    Document doc = new Document();
    doc.add(new StoredField("fieldA", 1L));
    doc.add(new StoredField("fieldB", 2L));
    iw.addDocument(doc);

    // open reader
    DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw),
            new CharacterRunAutomaton(Automata.makeString("fieldA")));

    // see only one field
    Document d2 = ir.document(0);
    assertEquals(1, d2.getFields().size());
    assertEquals(1L, d2.getField("fieldA").numericValue());

    TestUtil.checkReader(ir);
    IOUtils.close(ir, iw, dir);
}

From source file:org.elasticsearch.xpack.core.security.authz.accesscontrol.FieldSubsetReaderTests.java

License:Open Source License

/**
 * test filtering two stored fields (float)
 *///w w w .j  a  v  a2 s  . co m
public void testStoredFieldsFloat() throws Exception {
    Directory dir = newDirectory();
    IndexWriterConfig iwc = new IndexWriterConfig(null);
    IndexWriter iw = new IndexWriter(dir, iwc);

    // add document with 2 fields
    Document doc = new Document();
    doc.add(new StoredField("fieldA", 1F));
    doc.add(new StoredField("fieldB", 2F));
    iw.addDocument(doc);

    // open reader
    DirectoryReader ir = FieldSubsetReader.wrap(DirectoryReader.open(iw),
            new CharacterRunAutomaton(Automata.makeString("fieldA")));

    // see only one field
    Document d2 = ir.document(0);
    assertEquals(1, d2.getFields().size());
    assertEquals(1F, d2.getField("fieldA").numericValue());

    TestUtil.checkReader(ir);
    IOUtils.close(ir, iw, dir);
}