NutchAnalyzer.java :  » Web-Crawler » nutch » org » apache » nutch » analysis » Java Open Source

Java Open Source » Web Crawler » nutch 
nutch » org » apache » nutch » analysis » NutchAnalyzer.java
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.nutch.analysis;

// JDK imports
import java.io.Reader;

// Lucene imports
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;

// Hadoop imports
import org.apache.hadoop.conf.Configurable;
import org.apache.hadoop.conf.Configuration;

// Nutch imports
import org.apache.nutch.plugin.Pluggable;


/** 
 * Extension point for analysis.
 * All plugins found which implement this extension point are run
 * sequentially on the parse.
 *
 * @author Jérôme Charron
 */
public abstract class NutchAnalyzer extends Analyzer
                                    implements Configurable, Pluggable {

  /** The name of the extension point. */
  final static String X_POINT_ID = NutchAnalyzer.class.getName();

  /** The current Configuration */
  protected Configuration conf = null;

  
  /**
   * Creates a TokenStream which tokenizes all the text in the provided Reader.
   */
  public abstract TokenStream tokenStream(String fieldName, Reader reader);


  /* ----------------------------- *
   * <implementation:Configurable> *
   * ----------------------------- */

  // Inherited Javadoc
  public void setConf(Configuration conf) {
    this.conf = conf;
  }

  // Inherited Javadoc
  public Configuration getConf() {
    return this.conf;
  }

  /* ------------------------------ *
   * </implementation:Configurable> *
   * ------------------------------ */

}
java2s.com  | Contact Us | Privacy Policy
Copyright 2009 - 12 Demo Source and Support. All rights reserved.
All other trademarks are property of their respective owners.