Contextual.Stemmer.Context.java Source code

Java tutorial

Introduction

Here is the source code for Contextual.Stemmer.Context.java

Source

/*
 * The MIT License (MIT)
 * Copyright (c) 2014 Andy Librian
 * Permission is hereby granted, free of charge, to any person obtaining a copy
 * of this software and associated documentation files (the "Software"), to deal
 * in the Software without restriction, including without limitation the rights
 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 * copies of the Software, and to permit persons to whom the Software is
 * furnished to do so, subject to the following conditions:
 * The above copyright notice and this permission notice shall be included in all
 * copies or substantial portions of the Software.
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */

/**
 * Created by Sami on 17/11/14.
 */

package Contextual.Stemmer;

import Contextual.Stemmer.ConfixStripping.PrecedenceAdjustmentSpecification;
import Contextual.Stemmer.Dictionary.DictionaryInterface;
import Contextual.Stemmer.Visitor.EnumRemovalRules;
import Contextual.Stemmer.Visitor.VisitableInterface;
import Contextual.Stemmer.Visitor.VisitorInterface;
import Contextual.Stemmer.Visitor.VisitorProvider;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Lists;

import java.util.List;

public class Context implements ContextInterface, VisitableInterface {

    protected boolean processIsStopped = false;
    protected String originalWord, currentWord;
    protected List<RemovalInterface> removals;
    protected DictionaryInterface dictionary;
    protected VisitorProvider visitorProvider;
    protected List<VisitorInterface> visitors, suffixVisitors, prefixVisitors;

    protected String result;

    public Context(String originalWord, DictionaryInterface dictionary, VisitorProvider visitorProvider) {
        this.originalWord = originalWord;
        this.currentWord = this.originalWord;
        this.dictionary = dictionary;
        this.visitorProvider = visitorProvider;

        this.initVisitors();
    }

    protected void initVisitors() {
        this.visitors.addAll(visitorProvider.getVisitors());
        this.suffixVisitors.addAll(this.visitorProvider.getSuffixVisitors());
        this.prefixVisitors.addAll(this.visitorProvider.getPrefixVisitors());
        //to be implemented after finishing up VisitorProvider
    }

    public String getOriginalWord() {
        return null;
    }

    public String getCurrentWord() {
        return this.currentWord;
    }

    public void setCurrentWord(String word) {
        this.currentWord = word;
    }

    public DictionaryInterface getDictionary() {
        return this.dictionary;
    }

    public void stopProcess() {
        this.processIsStopped = true;
    }

    public boolean processIsStopped() {
        return this.processIsStopped;
    }

    public void addRemoval(RemovalInterface removal) {
        this.removals.add(removal);
    }

    public RemovalInterface[] getRemovals() {
        return (RemovalInterface[]) this.removals.toArray();
    }

    public String getResult() {
        return result;
    }

    /**
     * Execute stemming process; the result can be retrieved with getResult()
     *
     * return void
     */
    public void execute() {
        // step 1 - 5
        this.startStemmingProcess();

        // step 6
        if (this.dictionary.contains(this.getCurrentWord())) {
            this.result = this.getCurrentWord();
        } else {
            this.result = this.originalWord;
        }
    }

    protected void startStemmingProcess() {
        // step 1
        if (this.dictionary.contains(this.getCurrentWord())) {
            return;
        }

        this.acceptVisitors((VisitorInterface[]) this.visitors.toArray());

        if (this.dictionary.contains(this.getCurrentWord())) {
            return;
        }

        PrecedenceAdjustmentSpecification csPrecedenceAdjustmentSpecification = new PrecedenceAdjustmentSpecification();

        /*
         * Confix Stripping
         * Try to remove prefix before suffix if the specification is met
         */
        if (csPrecedenceAdjustmentSpecification.isSatisfiedBy(this.getOriginalWord())) {

            // step 4, 5
            this.removePrefixes();
            if (this.dictionary.contains(this.getCurrentWord())) {
                return;
            }

            // step 2, 3
            this.removeSuffixes();
            if (this.dictionary.contains(this.getCurrentWord()))
                return;
            else {
                // if the trial is failed, restore the original word
                // and continue to normal rule precedence (suffix first, prefix afterwards)
                this.setCurrentWord(this.originalWord);
                this.removals = null;
            }
        }

        // step 2, 3
        this.removeSuffixes();
        if (this.dictionary.contains(this.getCurrentWord()))
            return;

        //step 4, 5
        this.removePrefixes();
        if (this.dictionary.contains(this.getCurrentWord()))
            return;

        // ECS loop pengembalian akhiran
        this.loopPengembalianAkhiran();
    }//=================================end of startStemmingProcess()=================================

    protected String acceptVisitors(VisitorInterface[] visitors) {

        for (VisitorInterface visitor : visitors) {
            this.accept(visitor);

            if (this.getDictionary().contains(this.getCurrentWord()))
                return this.getCurrentWord();

            if (this.processIsStopped) {
                return this.getCurrentWord();
            }
        }
        return null;
    }

    protected void removePrefixes() {
        for (int i = 0; i < 3; i++) {
            this.acceptPrefixVisitors((VisitorInterface[]) this.prefixVisitors.toArray());
            if (this.dictionary.contains(this.getCurrentWord()))
                return;
        }
    }

    protected String acceptPrefixVisitors(VisitorInterface[] visitors) {
        int removalCount = this.removals.size();

        for (VisitorInterface visitor : visitors) {
            this.accept(visitor);

            if (this.getDictionary().contains(this.getCurrentWord()))
                return this.getCurrentWord();

            if (this.processIsStopped)
                return this.getCurrentWord();

            if (this.removals.size() > removalCount)
                break;
        }
        return null;
    }

    /**
     * ECS Loop Pengembalian Akhiran
     */
    public void loopPengembalianAkhiran() {
        // restore prefix to form [DP+[DP+[DP]]] + Root word
        this.restorePrefix();

        //if there's a problem, try to check if the original list is also reversed
        final ImmutableList<RemovalInterface> _removals = ImmutableList.copyOf(removals);
        List<RemovalInterface> reversedRemovals = Lists.reverse(_removals);
        final String _currentWord = this.getCurrentWord();

        for (RemovalInterface removal : reversedRemovals) {
            if (!this.isSuffixRemoval(removal))
                continue;

            if (removal.getRemovedPart().equals("kan")) {
                this.setCurrentWord(removal.getResult().concat("k"));

                // step 4, 5
                this.removePrefixes();
                if (this.dictionary.contains((this.getCurrentWord())))
                    return;

                this.setCurrentWord(removal.getResult().concat("kan"));
            } else
                this.setCurrentWord(removal.getSubject());

            // step 4, 5
            this.removePrefixes();
            if (this.dictionary.contains(this.getCurrentWord()))
                return;
            this.removals = _removals;
            this.setCurrentWord(_currentWord);
        }
    }

    private boolean isSuffixRemoval(RemovalInterface removal) {
        String removalAffixType = removal.getAffixType();
        return removalAffixType.equals(EnumRemovalRules.REMOVE_DERIVATIONAL_SUFFIX.getAffixType())
                || removalAffixType.equals(EnumRemovalRules.REMOVE_INFLECTIONAL_PARTICLE.getAffixType())
                || removalAffixType.equals(EnumRemovalRules.REMOVE_INFLECTIONAL_POSSESSIVE_PRONOUN.getAffixType());
    }

    public void accept(VisitorInterface visitor) {
        visitor.visit(this);
    }

    public void removeSuffixes() {
        System.out.println("Removing suffixes");
    }

    /**
     * Restore prefix to proceed with ECS loop pengembalian akhiran
     *
     * return void
     */
    public void restorePrefix() {
        for (RemovalInterface removal : this.removals) {
            if (removal.getAffixType().equals("DP")) {
                // return the word before precoding (the subject of first prefix removal)
                this.setCurrentWord(removal.getSubject());
                break;
            }
        }
        for (RemovalInterface removal : removals) {
            if (removal.getAffixType().equals("DP")) {
                this.removals.remove(removal);
            }
        }
    }
}