Download crawler4j-4.3.jar file

Introduction

You can download crawler4j-4.3.jar in this page.

License

The Apache Software License, Version 2.0

Type List

crawler4j-4.3.jar file has the following types.

META-INF/MANIFEST.MF
META-INF/maven/edu.uci.ics/crawler4j/pom.properties
META-INF/maven/edu.uci.ics/crawler4j/pom.xml
edu.uci.ics.crawler4j.crawler.Configurable.class
edu.uci.ics.crawler4j.crawler.CrawlConfig.class
edu.uci.ics.crawler4j.crawler.CrawlController.class
edu.uci.ics.crawler4j.crawler.Page.class
edu.uci.ics.crawler4j.crawler.WebCrawler.class
edu.uci.ics.crawler4j.crawler.authentication.AuthInfo.class
edu.uci.ics.crawler4j.crawler.authentication.BasicAuthInfo.class
edu.uci.ics.crawler4j.crawler.authentication.FormAuthInfo.class
edu.uci.ics.crawler4j.crawler.authentication.NtAuthInfo.class
edu.uci.ics.crawler4j.crawler.exceptions.ContentFetchException.class
edu.uci.ics.crawler4j.crawler.exceptions.PageBiggerThanMaxSizeException.class
edu.uci.ics.crawler4j.crawler.exceptions.ParseException.class
edu.uci.ics.crawler4j.fetcher.IdleConnectionMonitorThread.class
edu.uci.ics.crawler4j.fetcher.PageFetchResult.class
edu.uci.ics.crawler4j.fetcher.PageFetcher.class
edu.uci.ics.crawler4j.fetcher.SniPoolingHttpClientConnectionManager.class
edu.uci.ics.crawler4j.fetcher.SniSSLConnectionSocketFactory.class
edu.uci.ics.crawler4j.frontier.Counters.class
edu.uci.ics.crawler4j.frontier.DocIDServer.class
edu.uci.ics.crawler4j.frontier.Frontier.class
edu.uci.ics.crawler4j.frontier.InProcessPagesDB.class
edu.uci.ics.crawler4j.frontier.WebURLTupleBinding.class
edu.uci.ics.crawler4j.frontier.WorkQueues.class
edu.uci.ics.crawler4j.parser.AllTagMapper.class
edu.uci.ics.crawler4j.parser.BinaryParseData.class
edu.uci.ics.crawler4j.parser.ExtractedUrlAnchorPair.class
edu.uci.ics.crawler4j.parser.HtmlContentHandler.class
edu.uci.ics.crawler4j.parser.HtmlParseData.class
edu.uci.ics.crawler4j.parser.NotAllowedContentException.class
edu.uci.ics.crawler4j.parser.ParseData.class
edu.uci.ics.crawler4j.parser.Parser.class
edu.uci.ics.crawler4j.parser.TextParseData.class
edu.uci.ics.crawler4j.robotstxt.HostDirectives.class
edu.uci.ics.crawler4j.robotstxt.PathRule.class
edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig.class
edu.uci.ics.crawler4j.robotstxt.RobotstxtParser.class
edu.uci.ics.crawler4j.robotstxt.RobotstxtServer.class
edu.uci.ics.crawler4j.robotstxt.UserAgentDirectives.class
edu.uci.ics.crawler4j.url.TLDList.class
edu.uci.ics.crawler4j.url.URLCanonicalizer.class
edu.uci.ics.crawler4j.url.UrlResolver.class
edu.uci.ics.crawler4j.url.WebURL.class
edu.uci.ics.crawler4j.util.IO.class
edu.uci.ics.crawler4j.util.Net.class
edu.uci.ics.crawler4j.util.Util.class
tld-names.txt

Pom

crawler4j-4.3.pom file content.

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>
	<groupId>edu.uci.ics</groupId>
	<artifactId>crawler4j</artifactId>
	<packaging>jar</packaging>
	<name>crawler4j</name>
	<version>4.3</version>
	<description>Open Source Web Crawler for Java</description>
	<url>https://github.com/yasserg/crawler4j</url>

	<licenses>
		<license>
			<name>The Apache Software License, Version 2.0</name>
			<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
			<distribution>repo</distribution>
		</license>
	</licenses>
	<scm>
		<url>https://github.com/yasserg/crawler4j</url>
		<connection>scm:git:git@github.com:yasserg/crawler4j.git</connection>
		<developerConnection>scm:git:git@github.com:yasserg/crawler4j.git</developerConnection>
	  <tag>crawler4j-4.3</tag>
  </scm>
	<distributionManagement>
		<snapshotRepository>
			<id>ossrh</id>
			<url>https://oss.sonatype.org/content/repositories/snapshots</url>
		</snapshotRepository>
		<repository>
			<id>ossrh</id>
			<url>https://oss.sonatype.org/service/local/staging/deploy/maven2/</url>
		</repository>
	</distributionManagement>	

	<properties>
		<slf4j.version>1.7.22</slf4j.version>
		<logback.version>1.1.7</logback.version>
		<guava.version>21.0</guava.version>
		<apache.http.components.version>4.5.3</apache.http.components.version>
		<je.version>5.0.84</je.version>
		<apache.tika.version>1.14</apache.tika.version>
		<!--test dependency versions -->
		<junit.version>4.12</junit.version>
		<wiremock.version>2.4.1</wiremock.version>
		<spock.version>1.0-groovy-2.4</spock.version>
		<groovy.version>2.4.7</groovy.version>
	</properties>

	<parent>
		<groupId>org.sonatype.oss</groupId>
		<artifactId>oss-parent</artifactId>
		<version>7</version>
	</parent>

	<profiles>
		<profile>
			<id>release</id>

			<build>
				<plugins>
					<!-- generate a source jar -->
					<plugin>
						<groupId>org.apache.maven.plugins</groupId>
						<artifactId>maven-source-plugin</artifactId>
						<version>2.4</version>
						<executions>
							<execution>
								<id>attach-sources</id>
								<goals>
									<goal>jar</goal>
								</goals>
							</execution>
						</executions>
					</plugin>
					<!-- generate a javadoc jar -->
					<plugin>
						<groupId>org.apache.maven.plugins</groupId>
						<artifactId>maven-javadoc-plugin</artifactId>
						<version>2.10.1</version>
						<configuration>
							<additionalparam>-Xdoclint:none</additionalparam>
						</configuration>
						<executions>
							<execution>
								<id>attach-javadocs</id>
								<goals>
									<goal>jar</goal>
								</goals>
							</execution>
						</executions>
					</plugin>
					<plugin>
						<groupId>org.apache.maven.plugins</groupId>
						<artifactId>maven-gpg-plugin</artifactId>
						<version>1.5</version>
						<executions>
							<execution>
								<id>sign-artifacts</id>
								<phase>verify</phase>
								<goals>
									<goal>sign</goal>
								</goals>
							</execution>
						</executions>
					</plugin>
					<plugin>
						<groupId>org.sonatype.plugins</groupId>
						<artifactId>nexus-staging-maven-plugin</artifactId>
						<version>1.6.7</version>
						<extensions>true</extensions>
						<configuration>
							<serverId>ossrh</serverId>
							<nexusUrl>https://oss.sonatype.org/</nexusUrl>
							<autoReleaseAfterClose>true</autoReleaseAfterClose>
						</configuration>
					</plugin>
					<plugin>
						<groupId>org.apache.maven.plugins</groupId>
						<artifactId>maven-release-plugin</artifactId>
						<version>2.5.3</version>
						<configuration>
							<autoVersionSubmodules>true</autoVersionSubmodules>
							<useReleaseProfile>false</useReleaseProfile>
							<releaseProfiles>release</releaseProfiles>
							<goals>deploy</goals>
						</configuration>
					</plugin>
				</plugins>
			</build>
		</profile>

		<profile>
			<id>fatjar</id>
			<build>
				<plugins>
					<plugin>
						<artifactId>maven-assembly-plugin</artifactId>
						<version>2.5.3</version>
						<configuration>
							<descriptorRefs>
								<descriptorRef>jar-with-dependencies</descriptorRef>
							</descriptorRefs>
						</configuration>
						<executions>
							<execution>
								<id>make-fat-jar</id>
								<phase>package</phase>
								<goals>
									<goal>single</goal>
								</goals>
								<configuration>
									<finalName>crawler4j-${project.version}</finalName>
								</configuration>
							</execution>
						</executions>
					</plugin>
				</plugins>
			</build>
		</profile>
	</profiles>

	<build>
		<plugins>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-compiler-plugin</artifactId>
				<version>3.2</version>
				<configuration>
					<source>1.8</source>
					<target>1.8</target>
				</configuration>
			</plugin>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-jar-plugin</artifactId>
				<version>2.5</version>
				<configuration>
					<excludes>
						<exclude>**/*.properties</exclude>
					</excludes>
				</configuration>
			</plugin>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-checkstyle-plugin</artifactId>
				<version>2.17</version>
				<executions>
					<execution>
						<id>compile</id>
						<phase>compile</phase>
						<configuration>
							<configLocation>checkstyle.xml</configLocation>
							<encoding>UTF-8</encoding>
							<consoleOutput>true</consoleOutput>
							<failsOnError>true</failsOnError>
							<includeTestSourceDirectory>true</includeTestSourceDirectory>
						</configuration>
						<goals>
							<goal>check</goal>
						</goals>
					</execution>
				</executions>
				<dependencies>
					<dependency>
						<groupId>com.puppycrawl.tools</groupId>
						<artifactId>checkstyle</artifactId>
						<version>7.1</version>
					</dependency>
				</dependencies>
			</plugin>
            <plugin>
                <groupId>org.jacoco</groupId>
                <artifactId>jacoco-maven-plugin</artifactId>
                <version>0.7.9</version>
                <executions>
                    <execution>
                        <id>pre-unit-test</id>
                        <goals>
                            <goal>prepare-agent</goal>
                        </goals>
                    </execution>
                    <execution>
                        <id>post-unit-test</id>
                        <phase>test</phase>
                        <goals>
                            <goal>report</goal>
                        </goals>
                        <configuration>
                            <excludes>
                                <exclude>**/exceptions/**</exclude>
                            </excludes>
                        </configuration>
                    </execution>
                </executions>
            </plugin>
			<!-- Groovy compiler stuff -->
			<plugin>
				<groupId>org.codehaus.gmavenplus</groupId>
				<artifactId>gmavenplus-plugin</artifactId>
				<version>1.5</version>
				<executions>
					<execution>
						<goals>
							<goal>addSources</goal>
							<goal>addTestSources</goal>
							<goal>generateStubs</goal>
							<goal>compile</goal>
							<goal>testGenerateStubs</goal>
							<goal>testCompile</goal>
							<goal>removeStubs</goal>
							<goal>removeTestStubs</goal>
						</goals>
					</execution>
				</executions>
			</plugin>
			<plugin>
				<groupId>org.codehaus.mojo</groupId>
				<artifactId>build-helper-maven-plugin</artifactId>
				<version>1.9.1</version>
				<executions>
					<execution>
						<id>add-source</id>
						<phase>generate-sources</phase>
						<goals>
							<goal>add-source</goal>
						</goals>
						<configuration>
							<sources>
								<source>src/main/groovy</source>
							</sources>
						</configuration>
					</execution>
					<execution>
						<id>add-test-source</id>
						<phase>generate-test-sources</phase>
						<goals>
							<goal>add-test-source</goal>
						</goals>
						<configuration>
							<sources>
								<source>src/test/groovy</source>
							</sources>
						</configuration>
					</execution>
				</executions>
			</plugin>
		</plugins>
	</build>

	<dependencies>

    <!-- Compile time Dependencies -->

    <dependency>
        <!-- Logging framework -->
        <groupId>org.slf4j</groupId>
        <artifactId>slf4j-api</artifactId>
        <version>${slf4j.version}</version>
    </dependency>
    <dependency>
        <!-- Implementation of slf4j -->
        <groupId>ch.qos.logback</groupId>
        <artifactId>logback-classic</artifactId>
        <version>${logback.version}</version>
        <scope>runtime</scope>
    </dependency>
    <dependency>
        <!-- Google's core Java libraries -->
        <groupId>com.google.guava</groupId>
        <artifactId>guava</artifactId>
        <version>${guava.version}</version>
    </dependency>
	<dependency>
		<groupId>org.apache.httpcomponents</groupId>
		<artifactId>httpclient</artifactId>
		<version>${apache.http.components.version}</version>
		<scope>compile</scope>
	</dependency>

	<dependency>
		<groupId>com.sleepycat</groupId>
		<artifactId>je</artifactId>
		<version>${je.version}</version>
	</dependency>

	<dependency>
		<groupId>org.apache.tika</groupId>
		<artifactId>tika-parsers</artifactId>
		<version>${apache.tika.version}</version>
	</dependency>

    <!-- Test Dependencies -->
		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>${junit.version}</version>
			<scope>test</scope>
		</dependency>
		<dependency>
			<groupId>com.github.tomakehurst</groupId>
			<artifactId>wiremock</artifactId>
			<version>${wiremock.version}</version>
			<scope>test</scope>
		</dependency>
		<dependency>
			<groupId>org.codehaus.groovy</groupId>
			<artifactId>groovy-all</artifactId>
			<version>${groovy.version}</version>
			<scope>test</scope>
		</dependency>
		<dependency>
			<groupId>org.spockframework</groupId>
			<artifactId>spock-core</artifactId>
			<version>${spock.version}</version>
			<scope>test</scope>
		</dependency>
	</dependencies>

	<repositories>
		<repository>
			<id>oracleReleases</id>
			<name>Oracle Released Java Packages</name>
			<url>http://download.oracle.com/maven</url>
			<layout>default</layout>
		</repository>
	</repositories>

</project>

POM Entry

<dependency>
   <groupId>edu.uci.ics</groupId>
   <artifactId>crawler4j</artifactId>
   <version>4.3</version>
</dependency>

Download

If you think the following crawler4j-4.3.jar downloaded from Maven central repository is inappropriate, such as containing malicious code/tools or violating the copyright, please email , thanks.



Download crawler4j-4.3.jar file




PreviousNext

Related