NGramIndexer.java :  » Search » fuzzy-search-tools » ru » fuzzysearch » Java Open Source

Java Open Source » Search » fuzzy search tools 
fuzzy search tools » ru » fuzzysearch » NGramIndexer.java
package ru.fuzzysearch;

public class NGramIndexer implements Indexer {

  public NGramIndexer(Alphabet alphabet) {
    this(alphabet, DEFAULT_N);
  }

  public NGramIndexer(Alphabet alphabet, int n) {
    this.alphabet = alphabet;
    this.n = n;
  }

  public Index createIndex(String[] dictionary) {
    int mapLength = 1;
    for (int i = 0; i < n; ++i)
      mapLength *= alphabet.size();

    int[] ngramCountMap = new int[mapLength];

    int maxLength = 0;

    for (String word : dictionary) {
      if (word.length() > maxLength) maxLength = word.length();

      for (int k = 0; k < word.length() - n + 1; ++k) {
        int ngram = getNGram(alphabet, word, k, n);
        ++ngramCountMap[ngram];
      }
    }

    int[][] ngramMap = new int[mapLength][];

    for (int i = 0; i < dictionary.length; ++i) {
      String word = dictionary[i];
      for (int k = 0; k < word.length() - n + 1; ++k) {
        int ngram = getNGram(alphabet, word, k, n);
        if (ngramMap[ngram] == null) ngramMap[ngram] = new int[ngramCountMap[ngram]];
        ngramMap[ngram][--ngramCountMap[ngram]] = i;
      }
    }

    return new NGramIndex(dictionary, alphabet, ngramMap, n, maxLength);
  }

  public static int getNGram(Alphabet alphabet, CharSequence string, int start, int n) {
    int ngram = 0;
    for (int i = start; i < start + n; ++i)
      ngram = ngram * alphabet.size() + alphabet.mapChar(string.charAt(i));
    return ngram;
  }

  private static final int DEFAULT_N = 3;
  private final Alphabet alphabet;
  private final int n;
}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.