org.apache.lucene.util.automaton.RunAutomaton.java Source code

Java tutorial

Introduction

Here is the source code for org.apache.lucene.util.automaton.RunAutomaton.java

Source

/*
 * dk.brics.automaton
 * 
 * Copyright (c) 2001-2009 Anders Moeller
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 * 3. The name of the author may not be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 * 
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */

package org.apache.lucene.util.automaton;

import java.util.Arrays;

import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.RamUsageEstimator;

/**
 * Finite-state automaton with fast run operation.  The initial state is always 0.
 * 
 * @lucene.experimental
 */
public abstract class RunAutomaton implements Accountable {
    private static final long BASE_RAM_BYTES = RamUsageEstimator.shallowSizeOfInstance(RunAutomaton.class);

    final Automaton automaton;
    final int alphabetSize;
    final int size;
    final boolean[] accept;
    final int[] transitions; // delta(state,c) = transitions[state*points.length +
    // getCharClass(c)]
    final int[] points; // char interval start points
    final int[] classmap; // map from char number to class

    /**
     * Constructs a new <code>RunAutomaton</code> from a deterministic
     * <code>Automaton</code>.
     * 
     * @param a an automaton
     */
    protected RunAutomaton(Automaton a, int alphabetSize) {
        this(a, alphabetSize, Operations.DEFAULT_MAX_DETERMINIZED_STATES);
    }

    /**
     * Constructs a new <code>RunAutomaton</code> from a deterministic
     * <code>Automaton</code>.
     * 
     * @param a an automaton
     * @param maxDeterminizedStates maximum number of states that can be created
     *   while determinizing a
     */
    protected RunAutomaton(Automaton a, int alphabetSize, int maxDeterminizedStates) {
        this.alphabetSize = alphabetSize;
        a = Operations.determinize(a, maxDeterminizedStates);
        this.automaton = a;
        points = a.getStartPoints();
        size = Math.max(1, a.getNumStates());
        accept = new boolean[size];
        transitions = new int[size * points.length];
        Arrays.fill(transitions, -1);
        for (int n = 0; n < size; n++) {
            accept[n] = a.isAccept(n);
            for (int c = 0; c < points.length; c++) {
                int dest = a.step(n, points[c]);
                assert dest == -1 || dest < size;
                transitions[n * points.length + c] = dest;
            }
        }

        /*
         * Set alphabet table for optimal run performance.
         */
        classmap = new int[Math.min(256, alphabetSize)];
        int i = 0;
        for (int j = 0; j < classmap.length; j++) {
            if (i + 1 < points.length && j == points[i + 1]) {
                i++;
            }
            classmap[j] = i;
        }
    }

    /**
     * Returns a string representation of this automaton.
     */
    @Override
    public String toString() {
        StringBuilder b = new StringBuilder();
        b.append("initial state: 0\n");
        for (int i = 0; i < size; i++) {
            b.append("state ").append(i);
            if (accept[i])
                b.append(" [accept]:\n");
            else
                b.append(" [reject]:\n");
            for (int j = 0; j < points.length; j++) {
                int k = transitions[i * points.length + j];
                if (k != -1) {
                    int min = points[j];
                    int max;
                    if (j + 1 < points.length)
                        max = (points[j + 1] - 1);
                    else
                        max = alphabetSize;
                    b.append(" ");
                    Automaton.appendCharString(min, b);
                    if (min != max) {
                        b.append("-");
                        Automaton.appendCharString(max, b);
                    }
                    b.append(" -> ").append(k).append("\n");
                }
            }
        }
        return b.toString();
    }

    /**
     * Returns number of states in automaton.
     */
    public final int getSize() {
        return size;
    }

    /**
     * Returns acceptance status for given state.
     */
    public final boolean isAccept(int state) {
        return accept[state];
    }

    /**
     * Returns array of codepoint class interval start points. The array should
     * not be modified by the caller.
     */
    public final int[] getCharIntervals() {
        return points.clone();
    }

    /**
     * Gets character class of given codepoint
     */
    final int getCharClass(int c) {

        // binary search
        int a = 0;
        int b = points.length;
        while (b - a > 1) {
            int d = (a + b) >>> 1;
            if (points[d] > c)
                b = d;
            else if (points[d] < c)
                a = d;
            else
                return d;
        }
        return a;
    }

    /**
     * Returns the state obtained by reading the given char from the given state.
     * Returns -1 if not obtaining any such state. (If the original
     * <code>Automaton</code> had no dead states, -1 is returned here if and only
     * if a dead state is entered in an equivalent automaton with a total
     * transition function.)
     */
    public final int step(int state, int c) {
        assert c < alphabetSize;
        if (c >= classmap.length) {
            return transitions[state * points.length + getCharClass(c)];
        } else {
            return transitions[state * points.length + classmap[c]];
        }
    }

    @Override
    public int hashCode() {
        final int prime = 31;
        int result = 1;
        result = prime * result + alphabetSize;
        result = prime * result + points.length;
        result = prime * result + size;
        return result;
    }

    @Override
    public boolean equals(Object obj) {
        if (this == obj)
            return true;
        if (obj == null)
            return false;
        if (getClass() != obj.getClass())
            return false;
        RunAutomaton other = (RunAutomaton) obj;
        if (alphabetSize != other.alphabetSize)
            return false;
        if (size != other.size)
            return false;
        if (!Arrays.equals(points, other.points))
            return false;
        if (!Arrays.equals(accept, other.accept))
            return false;
        if (!Arrays.equals(transitions, other.transitions))
            return false;
        return true;
    }

    @Override
    public long ramBytesUsed() {
        return BASE_RAM_BYTES + RamUsageEstimator.sizeOfObject(accept) + RamUsageEstimator.sizeOfObject(automaton)
                + RamUsageEstimator.sizeOfObject(classmap) + RamUsageEstimator.sizeOfObject(points)
                + RamUsageEstimator.sizeOfObject(transitions);
    }
}