Download WebCollector-2.32.jar file

Introduction

You can download WebCollector-2.32.jar in this page.

License

GPL2.0

Type List

WebCollector-2.32.jar file has the following types.

.netbeans_automatic_build
META-INF/MANIFEST.MF
META-INF/maven/cn.edu.hfut.dmic.webcollector/WebCollector/pom.properties
META-INF/maven/cn.edu.hfut.dmic.webcollector/WebCollector/pom.xml
cn.edu.hfut.dmic.contentextractor.ContentExtractor.class
cn.edu.hfut.dmic.contentextractor.News.class
cn.edu.hfut.dmic.webcollector.crawldb.DBManager.class
cn.edu.hfut.dmic.webcollector.crawldb.Generator.class
cn.edu.hfut.dmic.webcollector.crawldb.Injector.class
cn.edu.hfut.dmic.webcollector.crawldb.SegmentWriter.class
cn.edu.hfut.dmic.webcollector.crawler.AutoParseCrawler.class
cn.edu.hfut.dmic.webcollector.crawler.Crawler.class
cn.edu.hfut.dmic.webcollector.example.DemoBingCrawler.class
cn.edu.hfut.dmic.webcollector.example.DemoDepthCrawler.class
cn.edu.hfut.dmic.webcollector.example.DemoMetaCrawler.class
cn.edu.hfut.dmic.webcollector.example.DemoPostCrawler.class
cn.edu.hfut.dmic.webcollector.example.DemoSelenium.class
cn.edu.hfut.dmic.webcollector.example.TutorialCrawler.class
cn.edu.hfut.dmic.webcollector.fetcher.Executor.class
cn.edu.hfut.dmic.webcollector.fetcher.Fetcher.class
cn.edu.hfut.dmic.webcollector.fetcher.Visitor.class
cn.edu.hfut.dmic.webcollector.model.CrawlDatum.class
cn.edu.hfut.dmic.webcollector.model.CrawlDatums.class
cn.edu.hfut.dmic.webcollector.model.Links.class
cn.edu.hfut.dmic.webcollector.model.Page.class
cn.edu.hfut.dmic.webcollector.net.HttpRequest.class
cn.edu.hfut.dmic.webcollector.net.HttpResponse.class
cn.edu.hfut.dmic.webcollector.net.Proxys.class
cn.edu.hfut.dmic.webcollector.net.Requester.class
cn.edu.hfut.dmic.webcollector.plugin.berkeley.BerkeleyCrawler.class
cn.edu.hfut.dmic.webcollector.plugin.berkeley.BerkeleyDBManager.class
cn.edu.hfut.dmic.webcollector.plugin.berkeley.BerkeleyDBReader.class
cn.edu.hfut.dmic.webcollector.plugin.berkeley.BerkeleyDBUtils.class
cn.edu.hfut.dmic.webcollector.plugin.berkeley.BerkeleyGenerator.class
cn.edu.hfut.dmic.webcollector.plugin.berkeley.BreadthCrawler.class
cn.edu.hfut.dmic.webcollector.plugin.ram.RamCrawler.class
cn.edu.hfut.dmic.webcollector.plugin.ram.RamDB.class
cn.edu.hfut.dmic.webcollector.plugin.ram.RamDBManager.class
cn.edu.hfut.dmic.webcollector.plugin.ram.RamGenerator.class
cn.edu.hfut.dmic.webcollector.util.CharsetDetector.class
cn.edu.hfut.dmic.webcollector.util.Config.class
cn.edu.hfut.dmic.webcollector.util.CrawlDatumFormater.class
cn.edu.hfut.dmic.webcollector.util.FileSystemOutput.class
cn.edu.hfut.dmic.webcollector.util.FileUtils.class
cn.edu.hfut.dmic.webcollector.util.JsoupUtils.class
cn.edu.hfut.dmic.webcollector.util.RegexRule.class
log4j.properties

Pom

WebCollector-2.32.pom file content.

<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
    <modelVersion>4.0.0</modelVersion>
    <name>WebCollector</name>

    <groupId>cn.edu.hfut.dmic.webcollector</groupId>
    <artifactId>WebCollector</artifactId>
    <version>2.32</version>
    <description>A java crawler for information collection</description>
    <url>https://github.com/CrawlScript/WebCollector</url>
    <packaging>jar</packaging>
    <properties>
        <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
        <maven.compiler.source>1.6</maven.compiler.source>
        <maven.compiler.target>1.6</maven.compiler.target>
    </properties>

    <licenses>
        <license>
            <name>GPL2.0</name>
            <url>http://www.gnu.org/licenses/gpl-2.0.html</url>
            <distribution>repo</distribution>
        </license>
    </licenses>

    <scm>
        <url>https://github.com/CrawlScript/WebCollector</url>
        <connection>scm:git:https://github.com/CrawlScript/WebCollector.git</connection>
    </scm>


    <distributionManagement>
        <snapshotRepository>
            <id>ossrh</id>
            <url>https://oss.sonatype.org/content/repositories/snapshots</url>
        </snapshotRepository>
    </distributionManagement>

    <reporting>
        <plugins>
            <plugin>
                <groupId>org.codehaus.mojo</groupId>
                <artifactId>cobertura-maven-plugin</artifactId>
                <version>2.7</version>
                <reportSets>
                    <reportSet>
                        <reports>
                            <report>cobertura</report>
                        </reports>
                    </reportSet>
                </reportSets>
                <configuration>
                    <formats>
                        <format>html</format>
                    </formats>
                </configuration>
            </plugin>
        </plugins>
    </reporting>

    <build>

        <plugins>
        
                <plugin>
                    <groupId>org.sonatype.plugins</groupId>
                    <artifactId>nexus-staging-maven-plugin</artifactId>
                    <version>1.6.3</version>
                    <extensions>true</extensions>
                    <configuration>
                        <serverId>ossrh</serverId>
                        <nexusUrl>https://oss.sonatype.org/</nexusUrl>
                        <autoReleaseAfterClose>true</autoReleaseAfterClose>
                    </configuration>
                </plugin>
           
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-surefire-plugin</artifactId>
                <version>2.7.2</version>
                <configuration>
                    <forkMode>once</forkMode>
                    <argLine>-Dfile.encoding=UTF-8</argLine>
                    <systemProperties>
                        <property>
                            <name>net.sourceforge.cobertura.datafile</name>
                            <value>target/cobertura/cobertura.ser</value>
                        </property>
                    </systemProperties>
                    <skipTests>false</skipTests>
                </configuration>
            </plugin>
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-dependency-plugin</artifactId>
                <version>2.8</version>
                <executions>
                    <execution>
                        <id>copy-dependencies</id>
                        <phase>package</phase>
                        <goals>
                            <goal>copy-dependencies</goal>
                        </goals>
                        <configuration>
                            <outputDirectory>${project.build.directory}/</outputDirectory>
                            <overWriteReleases>false</overWriteReleases>
                            <overWriteSnapshots>true</overWriteSnapshots>
                            <excludeTransitive>false</excludeTransitive>
                            <excludeScope>provided</excludeScope>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
            <!--
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-jar-plugin</artifactId>
                <version>2.5</version>
                <configuration>
                    <archive>
                        <manifest>
                            <addClasspath>true</addClasspath>
                            <mainClass>cn.edu.hfut.dmic.webcollector.ui.BreadthCrawlerUI</mainClass>
                        </manifest>
                    </archive>
                </configuration>
            </plugin> 
            -->
            
          
      
            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-javadoc-plugin</artifactId>
                <version>2.6.1</version>
                <configuration>
                </configuration>
                <executions>
                    <execution>
                        <id>attach-javadoc</id>
                        <phase>verify</phase>
                        <goals>
                            <goal>jar</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>


            <plugin>
                <groupId>org.apache.maven.plugins</groupId>
                <artifactId>maven-source-plugin</artifactId>
                <version>2.1.1</version>
                <configuration>
                </configuration>
                <executions>
                    <execution>
                        <id>attach-sources</id>
                        <phase>verify</phase>
                        <goals>
                            <goal>jar</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>


          
                        <plugin>
                            <groupId>org.apache.maven.plugins</groupId>
                            <artifactId>maven-gpg-plugin</artifactId>
                            <version>1.5</version>
                            <executions>
                                <execution>
                                    <id>sign-artifacts</id>
                                    <phase>verify</phase>
                                    <goals>
                                        <goal>sign</goal>
                                    </goals>
                                </execution>
                            </executions>
                        </plugin>
                     
        

            <plugin>
                <artifactId>maven-antrun-plugin</artifactId>
                <executions>
                    <execution>
                        <phase>package</phase>
                        <configuration>
                            <tasks>
                                <copy todir="${project.build.directory}/bin">
                                    <fileset dir="${project.build.directory}">
                                        <include name="*.jar"/>
                                        <include name="*.sh"/>
                                        <include name="*.bat"/>
                                    </fileset>
                                </copy>

                                <zip basedir="${project.build.directory}/bin"
                                     destfile="../webcollector-${project.version}-bin.zip"/>
                            </tasks>
                        </configuration>
                        <goals>
                            <goal>run</goal>
                        </goals>
                    </execution>
                </executions>
            </plugin>

        </plugins>

    </build>

    <dependencies>
        <!-- https://mvnrepository.com/artifact/org.jsoup/jsoup -->
        <dependency>
            <groupId>org.jsoup</groupId>
            <artifactId>jsoup</artifactId>
            <version>1.9.2</version>
        </dependency>

        <dependency>
            <groupId>com.googlecode.juniversalchardet</groupId>
            <artifactId>juniversalchardet</artifactId>
            <version>1.0.3</version>
        </dependency>

        <dependency>
            <groupId>junit</groupId>
            <artifactId>junit</artifactId>
            <version>4.11</version>
        </dependency>

        <dependency>
            <groupId>org.json</groupId>
            <artifactId>json</artifactId>
            <version>20140107</version>
        </dependency>

        <dependency>
            <groupId>com.sleepycat</groupId>
            <artifactId>je</artifactId>
            <version>5.0.73</version>
        </dependency>

        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-api</artifactId>
            <version>1.7.21</version>
        </dependency>
        <dependency>
            <groupId>org.slf4j</groupId>
            <artifactId>slf4j-log4j12</artifactId>
            <version>1.7.21</version>
        </dependency>


   
        <!-- 
            ??????dependency??JS????????(selenium)?
            ????????????? <scope>provided</scope>????????????
            ?????dependency???????M???????????????maven???,????
            ??????http://maven.oschina.net/help.html
        -->

        <!-- https://mvnrepository.com/artifact/org.seleniumhq.selenium/selenium-java -->
        <dependency>
            <groupId>org.seleniumhq.selenium</groupId>
            <artifactId>selenium-java</artifactId>
            <version>2.44.0</version>
             <scope>provided</scope>
        </dependency>

        <!--
          ??????dependency??????????(mysql?spring jdbc)?
          ??????????????????????????
          ?????dependency???????M???????????????maven???,????
          ??????http://maven.oschina.net/help.html
      -->
        <!--
     <dependency>
         <groupId>mysql</groupId>
         <artifactId>mysql-connector-java</artifactId>
         <version>5.1.31</version>
     </dependency>
     <dependency>
         <groupId>org.springframework</groupId>
         <artifactId>spring-jdbc</artifactId>
         <version>4.0.5.RELEASE</version>
     </dependency>

        <dependency>
            <groupId>commons-dbcp</groupId>
            <artifactId>commons-dbcp</artifactId>
            <version>1.4</version>
        </dependency>
        -->


    </dependencies>

    <developers>
        <developer>
            <id>hujun</id>
            <name>Hu Jun</name>
            <email>hujunxianligong@gmail.com</email>
            <roles>
                <role>Lead Developer</role>
            </roles>
            <timezone>+8</timezone>
        </developer>
    </developers>


</project>

POM Entry

<dependency>
   <groupId>cn.edu.hfut.dmic.webcollector</groupId>
   <artifactId>WebCollector</artifactId>
   <version>2.32</version>
</dependency>

Download

If you think the following WebCollector-2.32.jar downloaded from Maven central repository is inappropriate, such as containing malicious code/tools or violating the copyright, please email , thanks.



Download WebCollector-2.32.jar file




PreviousNext

Related