nutch ant script : Big Project Ant Script « Ant « Java






nutch ant script


<?xml version="1.0"?>
<!--
 Licensed to the Apache Software Foundation (ASF) under one or more
 contributor license agreements.  See the NOTICE file distributed with
 this work for additional information regarding copyright ownership.
 The ASF licenses this file to You under the Apache License, Version 2.0
 (the "License"); you may not use this file except in compliance with
 the License.  You may obtain a copy of the License at

     http://www.apache.org/licenses/LICENSE-2.0

 Unless required by applicable law or agreed to in writing, software
 distributed under the License is distributed on an "AS IS" BASIS,
 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License.
-->
<project name="Nutch" default="job">

  <!-- Load all the default properties, and any the user wants    -->
  <!-- to contribute (without having to type -D or edit this file -->
  <property file="${user.home}/build.properties" />
  <property file="${basedir}/build.properties" />
  <property file="${basedir}/default.properties" />
  <property name="test.junit.output.format" value="plain"/>
 
  <!-- the normal classpath -->
  <path id="classpath">
    <pathelement location="${build.classes}"/>
    <fileset dir="${lib.dir}">
      <include name="*.jar" />
    </fileset>
  </path>

  <!-- the unit test classpath -->
  <dirname property="plugins.classpath.dir" file="${build.plugins}"/>
  <path id="test.classpath">
    <pathelement location="${test.build.classes}" />
    <pathelement location="${conf.dir}"/>
    <pathelement location="${test.src.dir}"/>
    <pathelement location="${plugins.classpath.dir}"/>
    <path refid="classpath"/>
    <pathelement location="${build.dir}/${final.name}.job" />
  </path>

  <!-- xmlcatalog definition for xslt task -->
  <xmlcatalog id="docDTDs">
     <dtd publicId="-//W3C//DTD XHTML 1.0 Transitional//EN"            
          location="${xmlcatalog.dir}/xhtml1-transitional.dtd"/> 
  </xmlcatalog> 

  <!-- ----------------------------------------------------== -->
  <!-- Stuff needed by all targets                            -->
  <!-- ----------------------------------------------------== -->
  <target name="init">
    <mkdir dir="${build.dir}"/>
    <mkdir dir="${build.classes}"/>

    <mkdir dir="${test.build.dir}"/>
    <mkdir dir="${test.build.classes}"/>

    <touch datetime="01/25/1971 2:00 pm">
      <fileset dir="${conf.dir}" includes="**/*.template"/>
    </touch>

    <copy todir="${conf.dir}" verbose="true">
      <fileset dir="${conf.dir}" includes="**/*.template"/>
      <mapper type="glob" from="*.template" to="*"/>
    </copy>

    <!-- unpack hadoop scripts from hadoop jar into bin directory -->
    <mkdir dir="${build.dir}/hadoop"/>
    <unjar dest="${build.dir}/hadoop">
      <fileset dir="${lib.dir}" includes="hadoop*.jar"/>
      <patternset includes="bin.tgz"/>
    </unjar>
    
    <untar src="${build.dir}/hadoop/bin.tgz" dest="bin" compression="gzip"/>
    <!-- fix broken library paths with spaces -->
    <replace file="bin/hadoop" token="PlatformName" value="PlatformName | sed -e 's/ /_/g'"/>
    <chmod dir="bin" perm="ugo+rx" includes="*.sh,hadoop"/>

    <!-- unpack hadoop webapp from hadoop jar into build directory -->
    <mkdir dir="${build.dir}/webapps"/>
    <unjar dest="${build.dir}">
      <fileset dir="${lib.dir}" includes="hadoop*.jar"/>
      <patternset includes="webapps/**"/>
    </unjar>

  </target>

  <!-- ----------------------------------------------------== -->
  <!-- Compile the Java files                                 -->
  <!-- ----------------------------------------------------== -->
  <target name="compile" depends="compile-core, compile-plugins"/>

  <target name="compile-core" depends="init">
    <javac 
     encoding="${build.encoding}" 
     srcdir="${src.dir}"
     includes="org/apache/nutch/**/*.java"
     destdir="${build.classes}"
     debug="${javac.debug}"
     optimize="${javac.optimize}"
     target="${javac.version}"
     source="${javac.version}"
     deprecation="${javac.deprecation}">
      <classpath refid="classpath"/>
    </javac>    
  </target>

  <target name="compile-plugins">
    <ant dir="src/plugin" target="deploy" inheritAll="false"/>
  </target>

  <target name="generate-src" depends="init">
    <javacc target="${src.dir}/org/apache/nutch/analysis/NutchAnalysis.jj"
            javacchome="${javacc.home}">
    </javacc>

    <fixcrlf srcdir="${src.dir}" eol="lf" includes="**/*.java"/>

  </target>

  <target name="dynamic" depends="generate-src, compile">
  </target>

  <!-- ----------------------------------------------------------------== -->
  <!-- Make nutch.jar                                                     -->
  <!-- ----------------------------------------------------------------== -->
  <!--                                                                    -->
  <!-- ----------------------------------------------------------------== -->
  <target name="jar" depends="compile-core">
    <copy file="${conf.dir}/nutch-default.xml"
          todir="${build.classes}"/>
    <copy file="${conf.dir}/nutch-site.xml"
          todir="${build.classes}"/>
    <jar jarfile="${build.dir}/${final.name}.jar"
         basedir="${build.classes}">
      <manifest>
      </manifest>
    </jar>
  </target>

  <!-- ----------------------------------------------------------------== -->
  <!-- Make job jar                                                       -->
  <!-- ----------------------------------------------------------------== -->
  <!--                                                                    -->
  <!-- ----------------------------------------------------------------== -->
  <target name="job" depends="compile">
    <jar jarfile="${build.dir}/${final.name}.job">
      <!-- If the build.classes has the nutch config files because the jar
           command command has run, exclude them.  The conf directory has 
           them.
      -->
      <zipfileset dir="${build.classes}"
                  excludes="nutch-default.xml,nutch-site.xml"/>
      <zipfileset dir="${conf.dir}" excludes="*.template,hadoop*.*"/>
      <zipfileset dir="${lib.dir}" prefix="lib"
                  includes="**/*.jar" excludes="hadoop-*.jar"/>
      <zipfileset dir="${build.plugins}" prefix="plugins"/>
    </jar>
  </target>

  <!-- ----------------------------------------------------------------== -->
  <!-- Make nutch.war                                                     -->
  <!-- ----------------------------------------------------------------== -->
  <!--                                                                    -->
  <!-- ----------------------------------------------------------------== -->
  <target name="war" depends="jar,compile,generate-docs">

    <!-- generate the nutch.xml (servlet context) file -->
    <xslt in="${basedir}/conf/nutch-default.xml"
          out="${build.dir}/nutch.xml"
          style="${basedir}/conf/context.xsl">
        <xmlcatalog refid="docDTDs"/>
      <outputproperty name="indent" value="yes"/>
    </xslt>
    <war destfile="${build.dir}/${final.name}.war"
      webxml="${web.src.dir}/web.xml">
      <fileset dir="${web.src.dir}/jsp"/>
      <zipfileset dir="${docs.src}" includes="include/*.html"/>
      <zipfileset dir="${build.docs}" includes="*/include/*.html"/>
      <fileset dir="${docs.dir}"/>
      <lib dir="${lib.dir}">
        <include name="lucene*.jar"/>
        <include name="taglibs-*.jar"/>
        <include name="hadoop-*.jar"/>
        <include name="dom4j-*.jar"/>
        <include name="xerces-*.jar"/>
        <include name="tika-*.jar"/>
        <include name="apache-solr-*.jar"/>
        <include name="commons-httpclient-*.jar"/>
        <include name="commons-codec-*.jar"/>
        <include name="commons-collections-*.jar"/>
        <include name="commons-beanutils-*.jar"/>
        <include name="commons-cli-*.jar"/>
        <include name="commons-lang-*.jar"/>
        <include name="commons-logging-*.jar"/>
        <include name="log4j-*.jar"/>
      </lib>
      <lib dir="${build.dir}">
        <include name="${final.name}.jar"/>
      </lib>
      <classes dir="${conf.dir}" excludes="**/*.template"/>
      <classes dir="${web.src.dir}/locale"/>
      <classes file="${web.src.dir}/log4j.properties"/>
      <zipfileset prefix="WEB-INF/classes/plugins" dir="${build.plugins}"/>
      <webinf dir="${lib.dir}">
        <include name="taglibs-*.tld"/>
      </webinf>
    </war>
   </target>


  <!-- ----------------------------------------------------------------== -->
  <!-- Compile test code                                                  --> 
  <!-- ----------------------------------------------------------------== -->
  <target name="compile-core-test" depends="compile-core">
    <javac 
     encoding="${build.encoding}" 
     srcdir="${test.src.dir}"
     includes="org/apache/nutch/**/*.java"
     destdir="${test.build.classes}"
     debug="${javac.debug}"
     optimize="${javac.optimize}"
     target="${javac.version}"
     source="${javac.version}"
     deprecation="${javac.deprecation}">
      <classpath refid="test.classpath"/>
    </javac>    
  </target>

  <!-- ----------------------------------------------------------------== -->
  <!-- Run code checks (PMD)                                              --> 
  <!-- ----------------------------------------------------------------== -->
  <target name="pmd" depends="compile">
  <property name="pmd.report" location="${build.dir}/pmd-report.html" />
  <taskdef name="pmd" classname="net.sourceforge.pmd.ant.PMDTask">
    <classpath>
      <fileset dir="${lib.dir}">
            <include name="pmd-ext/*.jar" />
            <include name="xerces*.jar" />
          </fileset>
    </classpath>
  </taskdef>
  <pmd shortFilenames="true" failonerror="true" failOnRuleViolation="false"
     encoding="${build.encoding}" failuresPropertyName="pmd.failures">
    <ruleset>unusedcode</ruleset>
          <!--ruleset>basic</ruleset-->
          <!--ruleset>optimizations</ruleset-->
      <formatter type="html" toFile="${pmd.report}" />
    <!-- <formatter type="xml" toFile="${tempbuild}/$report_pmd.xml"/> -->
  <fileset dir="${basedir}/src">
          <include name="java/**/*.java"/>
          <include name="plugin/**/*.java"/>
    <!-- Exclude generated sources -->
    <exclude name="**/NutchAnalysis.java" />
    <exclude name="**/NutchAnalysisTokenManager.java" />
      </fileset>
    </pmd>
  <condition property="pmd.stop" value="true">
      <and>
        <isset property="pmd.failures" />
          <not>
            <equals arg1="0" arg2="${pmd.failures}" trim="true" />
          </not>
      </and>
  </condition>
  <fail if="pmd.stop">FAILURE: PMD shows ${pmd.failures} rule violations. See ${pmd.report} for details.</fail>
  </target>

  <!-- ----------------------------------------------------------------== -->
  <!-- Run unit tests                                                     --> 
  <!-- ----------------------------------------------------------------== -->
  <target name="test" depends="test-core, test-plugins"/>

  <target name="test-core" depends="job, compile-core-test">

    <delete dir="${test.build.data}"/>
    <mkdir dir="${test.build.data}"/>
    <!-- 
     copy resources needed in junit tests
    -->
    <copy todir="${test.build.data}">
      <fileset dir="src/testresources" includes="**/*"/>
    </copy>
    <copy file="${test.src.dir}/nutch-site.xml"
          todir="${test.build.classes}"/>

    <copy file="${test.src.dir}/log4j.properties"
          todir="${test.build.classes}"/>

    <junit printsummary="yes" haltonfailure="no" fork="yes" dir="${basedir}"
      errorProperty="tests.failed" failureProperty="tests.failed" maxmemory="1000m">
      <sysproperty key="test.build.data" value="${test.build.data}"/>
      <sysproperty key="test.src.dir" value="${test.src.dir}"/>
      <classpath refid="test.classpath"/>
      <formatter type="${test.junit.output.format}" />
      <batchtest todir="${test.build.dir}" unless="testcase">
        <fileset dir="${test.src.dir}"
                 includes="**/Test*.java" excludes="**/${test.exclude}.java" />
      </batchtest>
      <batchtest todir="${test.build.dir}" if="testcase">
        <fileset dir="${test.src.dir}" includes="**/${testcase}.java"/>
      </batchtest>
    </junit>

    <fail if="tests.failed">Tests failed!</fail>

  </target>   

  <target name="test-plugins" depends="compile">
    <ant dir="src/plugin" target="test" inheritAll="false"/>
  </target>

  <target name="nightly" depends="test, tar">
  </target>

  <!-- ----------------------------------------------------------------== -->
  <!-- Documentation                                                      -->
  <!-- ----------------------------------------------------------------== -->
  <target name="javadoc" depends="compile">
    <mkdir dir="${build.javadoc}"/>
    <javadoc
      overview="${src.dir}/overview.html"
      destdir="${build.javadoc}"
      author="true"
      version="true"
      use="true"
      windowtitle="${Name} ${version} API"
      doctitle="${Name} ${version} API"
      bottom="Copyright &amp;copy; ${year} The Apache Software Foundation"
      >
        <arg value="${javadoc.proxy.host}"/>
        <arg value="${javadoc.proxy.port}"/>

      <packageset dir="${src.dir}"/>
      <packageset dir="${plugins.dir}/lib-http/src/java"/>
      <packageset dir="${plugins.dir}/lib-parsems/src/java"/>
      <packageset dir="${plugins.dir}/lib-regex-filter/src/java"/>
      <packageset dir="${plugins.dir}/microformats-reltag/src/java"/>
      <packageset dir="${plugins.dir}/ontology/src/java"/>
      <packageset dir="${plugins.dir}/protocol-file/src/java"/>
      <packageset dir="${plugins.dir}/protocol-ftp/src/java"/>
      <packageset dir="${plugins.dir}/protocol-http/src/java"/>
      <packageset dir="${plugins.dir}/protocol-httpclient/src/java"/>
      <packageset dir="${plugins.dir}/parse-ext/src/java"/>
      <packageset dir="${plugins.dir}/parse-html/src/java"/>
      <packageset dir="${plugins.dir}/parse-js/src/java"/>
      <packageset dir="${plugins.dir}/parse-text/src/java"/>
      <packageset dir="${plugins.dir}/parse-pdf/src/java"/>
<!--  <packageset dir="${plugins.dir}/parse-rtf/src/java"/> plugin excluded from build due to licensing issues-->
<!--  <packageset dir="${plugins.dir}/parse-mp3/src/java"/> plugin excluded from build due to licensing issues-->
      <packageset dir="${plugins.dir}/parse-msexcel/src/java"/>
      <packageset dir="${plugins.dir}/parse-mspowerpoint/src/java"/>
      <packageset dir="${plugins.dir}/parse-msword/src/java"/>
      <packageset dir="${plugins.dir}/parse-oo/src/java"/>
      <packageset dir="${plugins.dir}/parse-rss/src/java"/>
      <packageset dir="${plugins.dir}/parse-swf/src/java"/>
      <packageset dir="${plugins.dir}/parse-zip/src/java"/>
      <packageset dir="${plugins.dir}/index-basic/src/java"/>
      <packageset dir="${plugins.dir}/index-more/src/java"/>
      <packageset dir="${plugins.dir}/query-basic/src/java"/>
      <packageset dir="${plugins.dir}/query-more/src/java"/>
      <packageset dir="${plugins.dir}/query-site/src/java"/>
      <packageset dir="${plugins.dir}/query-url/src/java"/>
      <packageset dir="${plugins.dir}/scoring-opic/src/java"/>
      <packageset dir="${plugins.dir}/summary-basic/src/java"/>
      <packageset dir="${plugins.dir}/summary-lucene/src/java"/>
      <packageset dir="${plugins.dir}/urlfilter-automaton/src/java"/>
      <packageset dir="${plugins.dir}/urlfilter-regex/src/java"/>
      <packageset dir="${plugins.dir}/urlfilter-prefix/src/java"/>
      <packageset dir="${plugins.dir}/creativecommons/src/java"/>
      <packageset dir="${plugins.dir}/languageidentifier/src/java"/>
      <packageset dir="${plugins.dir}/clustering-carrot2/src/java"/>
      <packageset dir="${plugins.dir}/ontology/src/java"/>
      
      <link href="${javadoc.link.java}"/>
      <link href="${javadoc.link.lucene}"/>
      <link href="${javadoc.link.hadoop}"/>
      
      <classpath refid="classpath"/>
      <classpath>
        <fileset dir="${plugins.dir}" >
          <include name="**/*.jar"/>
        </fileset>
      </classpath>
      
      <group title="Core" packages="org.apache.nutch.*"/>
      <group title="Plugins API" packages="${plugins.api}"/>
      <group title="Protocol Plugins" packages="${plugins.protocol}"/>
      <group title="URL Filter Plugins" packages="${plugins.urlfilter}"/>
      <group title="Scoring Plugins" packages="${plugins.scoring}"/>
      <group title="Parse Plugins" packages="${plugins.parse}"/>
      <group title="Analysis Plugins" packages="${plugins.analysis}"/>
      <group title="Indexing Filter Plugins" packages="${plugins.index}"/>
      <group title="Query Filter Plugins" packages="${plugins.query}"/>
      <group title="Summary Plugins" packages="${plugins.summary}"/>
      <group title="Clustering Plugins" packages="${plugins.clustering}"/>
      <group title="Ontology Plugins" packages="${plugins.ontology}"/>
      <group title="Misc. Plugins" packages="${plugins.misc}"/>
    </javadoc>
    <!-- Copy the plugin.dtd file to the plugin doc-files dir -->
    <copy file="${plugins.dir}/plugin.dtd"
          todir="${build.javadoc}/org/apache/nutch/plugin/doc-files"/>
  </target>  
  
  <target name="default-doc">
    <style basedir="${conf.dir}" destdir="${docs.dir}"
           includes="nutch-default.xml" style="conf/nutch-conf.xsl"/>
  </target>

  <target name="generate-locale" if="doc.locale">
    <echo message="Generating docs for locale=${doc.locale}"/>

    <mkdir dir="${build.docs}/${doc.locale}/include"/>
    <xslt in="${docs.src}/include/${doc.locale}/header.xml"
          out="${build.docs}/${doc.locale}/include/header.html"
          style="${docs.src}/style/nutch-header.xsl">
        <xmlcatalog refid="docDTDs"/>
    </xslt>

    <dependset>
       <srcfileset dir="${docs.src}/include/${doc.locale}" includes="*.xml"/>
       <srcfileset dir="${docs.src}/style" includes="*.xsl"/>
       <targetfileset dir="${docs.dir}/${doc.locale}" includes="*.html"/>
    </dependset>  

    <copy file="${docs.src}/style/nutch-page.xsl"
          todir="${build.docs}/${doc.locale}"
          preservelastmodified="true"/>

    <xslt basedir="${docs.src}/pages/${doc.locale}"
          destdir="${docs.dir}/${doc.locale}"
          includes="*.xml"
          style="${build.docs}/${doc.locale}/nutch-page.xsl">
         <xmlcatalog refid="docDTDs"/>
    </xslt>
  </target>


  <target name="generate-docs" depends="init">
    <dependset>
       <srcfileset dir="${docs.src}/include" includes="*.html"/>
       <targetfileset dir="${docs.dir}" includes="**/*.html"/>
    </dependset>  

    <mkdir dir="${build.docs}/include"/>
    <copy todir="${build.docs}/include">
      <fileset dir="${docs.src}/include"/>
    </copy>

    <antcall target="generate-locale">
      <param name="doc.locale" value="ca"/>
    </antcall>

    <antcall target="generate-locale">
      <param name="doc.locale" value="de"/>
    </antcall>

    <antcall target="generate-locale">
      <param name="doc.locale" value="en"/>
    </antcall>
    
    <antcall target="generate-locale">
      <param name="doc.locale" value="es"/>
    </antcall>
    
    <antcall target="generate-locale">
      <param name="doc.locale" value="fi"/>
    </antcall>
    
    <antcall target="generate-locale">
      <param name="doc.locale" value="fr"/>
    </antcall>
    
    <antcall target="generate-locale">
      <param name="doc.locale" value="hu"/>
    </antcall>

    <antcall target="generate-locale">
      <param name="doc.locale" value="it"/>
    </antcall>

    <antcall target="generate-locale">
      <param name="doc.locale" value="jp"/>
    </antcall>

    <antcall target="generate-locale">
      <param name="doc.locale" value="ms"/>
    </antcall>

    <antcall target="generate-locale">
      <param name="doc.locale" value="nl"/>
    </antcall>

    <antcall target="generate-locale">
      <param name="doc.locale" value="pl"/>
    </antcall>

    <antcall target="generate-locale">
      <param name="doc.locale" value="pt"/>
    </antcall>

    <antcall target="generate-locale">
      <param name="doc.locale" value="sh"/>
    </antcall>

    <antcall target="generate-locale">
      <param name="doc.locale" value="sr"/>
    </antcall>

    <antcall target="generate-locale">
      <param name="doc.locale" value="sv"/>
    </antcall>

    <antcall target="generate-locale">
      <param name="doc.locale" value="th"/>
    </antcall>

    <antcall target="generate-locale">
      <param name="doc.locale" value="zh"/>
    </antcall>

    <fixcrlf srcdir="${docs.dir}" eol="lf" encoding="utf-8"
             includes="**/*.html"/>

  </target>

  <!-- ----------------------------------------------------------------== -->
  <!-- D I S T R I B U T I O N                                            -->
  <!-- ----------------------------------------------------------------== -->
  <!--                                                                    -->
  <!-- ----------------------------------------------------------------== -->
  <target name="package" depends="jar, job, war, javadoc">
    <mkdir dir="${dist.dir}"/>
    <mkdir dir="${dist.dir}/lib"/>
    <mkdir dir="${dist.dir}/bin"/>
    <mkdir dir="${dist.dir}/docs"/>
    <mkdir dir="${dist.dir}/docs/api"/>
    <mkdir dir="${dist.dir}/plugins"/>

    <copy todir="${dist.dir}/lib" includeEmptyDirs="false">
      <fileset dir="lib"/>
    </copy>

    <copy todir="${dist.dir}/plugins">
      <fileset dir="${build.plugins}"/>
    </copy>

    <copy todir="${dist.dir}/webapps">
      <fileset dir="${build.webapps}"/>
    </copy>

    <copy file="${build.dir}/${final.name}.jar" todir="${dist.dir}"/>
    <copy file="${build.dir}/${final.name}.job" todir="${dist.dir}"/>
    <copy file="${build.dir}/${final.name}.war" todir="${dist.dir}"/>

    <copy todir="${dist.dir}/bin">
      <fileset dir="bin"/>
    </copy>

    <copy todir="${dist.dir}/conf">
      <fileset dir="${conf.dir}" excludes="**/*.template"/>
    </copy>

    <chmod perm="ugo+x" type="file">
        <fileset dir="${dist.dir}/bin"/>
    </chmod>

    <copy todir="${dist.dir}/docs">
      <fileset dir="${docs.dir}"/>
    </copy>

    <copy todir="${dist.dir}/docs/api">
      <fileset dir="${build.javadoc}"/>
    </copy>

    <copy todir="${dist.dir}">
      <fileset dir=".">
        <include name="*.txt" />
        <include name="KEYS" />
      </fileset>
    </copy>

    <copy todir="${dist.dir}/src" includeEmptyDirs="true">
      <fileset dir="src"/>
    </copy>

    <copy todir="${dist.dir}/" file="build.xml"/>
    <copy todir="${dist.dir}/" file="default.properties"/>

  </target>

  <!-- ----------------------------------------------------------------== -->
  <!-- Make release tarball                                               -->
  <!-- ----------------------------------------------------------------== -->
  <target name="tar" depends="package">
    <tar compression="gzip" longfile="gnu"
      destfile="${build.dir}/${final.name}.tar.gz">
      <tarfileset dir="${build.dir}" mode="664">
  <exclude name="${final.name}/bin/*" />
        <include name="${final.name}/**" />
      </tarfileset>
      <tarfileset dir="${build.dir}" mode="755">
        <include name="${final.name}/bin/*" />
      </tarfileset>
    </tar>
  </target>
  
  <!-- ----------------------------------------------------------------== -->
  <!-- Clean.  Delete the build files, and their directories              -->
  <!-- ----------------------------------------------------------------== -->
  <target name="clean">
    <delete dir="${build.dir}"/>
  </target>

  <!-- ----------------------------------------------------------------== -->
  <!-- RAT targets                                                        -->
  <!-- ----------------------------------------------------------------== -->
  <target name="rat-sources-typedef">
    <typedef resource="org/apache/rat/anttasks/antlib.xml" >
      <classpath>
        <fileset dir="." includes="rat*.jar"/>
      </classpath>
    </typedef>
  </target>

  <target name="rat-sources" depends="rat-sources-typedef"
    description="runs the tasks over src/java">
    <rat:report xmlns:rat="antlib:org.apache.rat.anttasks">
      <fileset dir="src">
        <include name="java/**/*"/>
        <include name="plugin/**/src/**/*"/>
      </fileset>
    </rat:report>
  </target>
  
</project>


File: default.properties

Name=Nutch
name=nutch
version=1.0
final.name=${name}-${version}
year=2006

basedir = ./
src.dir = ./src/java
lib.dir = ./lib
conf.dir = ./conf
plugins.dir = ./src/plugin
docs.dir = ./docs
docs.src = ${basedir}/src/web
xmlcatalog.dir = ${basedir}/src/xmlcatalog

build.dir = ./build
build.classes = ${build.dir}/classes
build.webapps = ${build.dir}/webapps
build.plugins = ${build.dir}/plugins
build.docs = ${build.dir}/docs
build.javadoc = ${build.docs}/api
build.encoding = UTF-8

test.src.dir = ./src/test
test.build.dir = ${build.dir}/test
test.build.data =  ${test.build.dir}/data
test.build.classes = ${test.build.dir}/classes
test.build.javadoc = ${test.build.dir}/docs/api

javacc.home=/usr/java/javacc

web.src.dir = ./src/web
src.webapps = ./src/webapps

# Proxy Host and Port to use for building JavaDoc
javadoc.proxy.host=-J-DproxyHost=
javadoc.proxy.port=-J-DproxyPort=
javadoc.link.java=http://java.sun.com/j2se/1.4.2/docs/api/
javadoc.link.lucene=http://jakarta.apache.org/lucene/docs/api/
javadoc.link.hadoop=http://lucene.apache.org/hadoop/docs/api/
javadoc.packages=org.apache.nutch.*

dist.dir=${build.dir}/${final.name}

javac.debug=on
javac.optimize=on
javac.deprecation=off
javac.version= 1.5

#
# Plugins API
#
plugins.api=\
   org.apache.nutch.protocol.http.api*:\
   org.apache.nutch.urlfilter.api*:\
   org.apache.nutch.parse.ms*

#
# Protocol Plugins
#
plugins.protocol=\
   org.apache.nutch.protocol.file*:\
   org.apache.nutch.protocol.ftp*:\
   org.apache.nutch.protocol.http*:\
   org.apache.nutch.protocol.httpclient*

#
# URL Filter Plugins
#
plugins.urlfilter=\
   org.apache.nutch.urlfilter.automaton*:\
   org.apache.nutch.urlfilter.prefix*:\
   org.apache.nutch.urlfilter.regex*

#
# Scoring Plugins
#
plugins.scoring=\
   org.apache.nutch.scoring.opic*

#
# Parse Plugins
#
plugins.parse=\
   org.apache.nutch.parse.ext*:\
   org.apache.nutch.parse.html*:\
   org.apache.nutch.parse.js:\
   org.apache.nutch.parse.mp3*:\
   org.apache.nutch.parse.msexcel*:\
   org.apache.nutch.parse.mspowerpoint*:\
   org.apache.nutch.parse.msword*:\
   org.apache.nutch.parse.oo*:\
   org.apache.nutch.parse.pdf*:\
   org.apache.nutch.parse.rtf*:\
   org.apache.nutch.parse.rss*:\
   org.apache.nutch.parse.swf*:\
   org.apache.nutch.parse.text:\
   org.apache.nutch.parse.zip

#
# Analysis Plugins
#
plugins.analysis=\
#  ${plugin.analysis-de}:\
#  ${plugin.analysis-fr}

#
# Indexing Filter Plugins
#
plugins.index=\
   org.apache.nutch.indexer.basic*:\
   org.apache.nutch.indexer.more*

#
# Query Filter Plugins
#
plugins.query=\
   org.apache.nutch.searcher.basic*:\
   org.apache.nutch.searcher.more*:\
   org.apache.nutch.searcher.site*:\
   org.apache.nutch.searcher.url*

#
# Ontology Plugins
#
plugins.ontology=\
   org.apache.nutch.ontology.jena*

#
# Online Clusterer Plugins
#
plugins.clustering=\
   org.apache.nutch.clustering.carrot2*

#
# Summary Plugins
#
plugins.summary=\
   org.apache.nutch.summary.basic*:\
   org.apache.nutch.summary.lucene*

#
# Misc. Plugins
#
# (gathers plugins that cannot be dispatched
# in any category, mainly because they contains
# many extension points)
#
plugins.misc=\
   org.apache.nutch.analysis.lang*:\
   org.apache.nutch.microformats.reltag*:\
   org.creativecommons.nutch*

 








Related examples in the same category

1.Ant script for xmlgraphics-commons
2.rhino ant build script
3.apache solr ant script
4.Tomcat ant build script
5.OFBiz ant build script
6.Apache Lenya Build System
7.Apache pivot ant build script
8.XmlSchema ant script
9.xml security
10.velocity tools ant script
11.weka build script
12.xml bean ant script
13.xml graphics common ant script
14.uPortal ant script
15.SmartGWT ant script
16.Build file to fetch maven2 tasks; extracted from (Ant's) fetch.xml
17.Build file to fetch optional libraries for Apache Ant
18.Ant build script
19.Build script for apache-cassandra-0.5.1-src
20.apache-log4j-site\build.xml
21.apache-roller-src-4.0.1
22.Build script from apache dbutils
23.Fop build script
24.Google guice ant script
25.GWT ant script
26.hadoop ant build script
27.jakarta jmeter ant script
28.jakarta oro ant script
29.jakarta regexp ant script
30.jedit build script
31.jibx ant build script
32.lucene ant build script