Download crawler4j-4.1.jar file

Introduction

You can download crawler4j-4.1.jar in this page.

License

The Apache Software License, Version 2.0

Type List

crawler4j-4.1.jar file has the following types.

META-INF/MANIFEST.MF
META-INF/maven/edu.uci.ics/crawler4j/pom.properties
META-INF/maven/edu.uci.ics/crawler4j/pom.xml
edu.uci.ics.crawler4j.crawler.Configurable.class
edu.uci.ics.crawler4j.crawler.CrawlConfig.class
edu.uci.ics.crawler4j.crawler.CrawlController.class
edu.uci.ics.crawler4j.crawler.Page.class
edu.uci.ics.crawler4j.crawler.WebCrawler.class
edu.uci.ics.crawler4j.crawler.authentication.AuthInfo.class
edu.uci.ics.crawler4j.crawler.authentication.BasicAuthInfo.class
edu.uci.ics.crawler4j.crawler.authentication.FormAuthInfo.class
edu.uci.ics.crawler4j.crawler.exceptions.ContentFetchException.class
edu.uci.ics.crawler4j.crawler.exceptions.PageBiggerThanMaxSizeException.class
edu.uci.ics.crawler4j.crawler.exceptions.ParseException.class
edu.uci.ics.crawler4j.crawler.exceptions.RedirectException.class
edu.uci.ics.crawler4j.fetcher.IdleConnectionMonitorThread.class
edu.uci.ics.crawler4j.fetcher.PageFetchResult.class
edu.uci.ics.crawler4j.fetcher.PageFetcher.class
edu.uci.ics.crawler4j.frontier.Counters.class
edu.uci.ics.crawler4j.frontier.DocIDServer.class
edu.uci.ics.crawler4j.frontier.Frontier.class
edu.uci.ics.crawler4j.frontier.InProcessPagesDB.class
edu.uci.ics.crawler4j.frontier.WebURLTupleBinding.class
edu.uci.ics.crawler4j.frontier.WorkQueues.class
edu.uci.ics.crawler4j.parser.BinaryParseData.class
edu.uci.ics.crawler4j.parser.ExtractedUrlAnchorPair.class
edu.uci.ics.crawler4j.parser.HtmlContentHandler.class
edu.uci.ics.crawler4j.parser.HtmlParseData.class
edu.uci.ics.crawler4j.parser.NotAllowedContentException.class
edu.uci.ics.crawler4j.parser.ParseData.class
edu.uci.ics.crawler4j.parser.Parser.class
edu.uci.ics.crawler4j.parser.TextParseData.class
edu.uci.ics.crawler4j.robotstxt.HostDirectives.class
edu.uci.ics.crawler4j.robotstxt.RobotstxtConfig.class
edu.uci.ics.crawler4j.robotstxt.RobotstxtParser.class
edu.uci.ics.crawler4j.robotstxt.RobotstxtServer.class
edu.uci.ics.crawler4j.robotstxt.RuleSet.class
edu.uci.ics.crawler4j.url.TLDList.class
edu.uci.ics.crawler4j.url.URLCanonicalizer.class
edu.uci.ics.crawler4j.url.UrlResolver.class
edu.uci.ics.crawler4j.url.WebURL.class
edu.uci.ics.crawler4j.util.IO.class
edu.uci.ics.crawler4j.util.Net.class
edu.uci.ics.crawler4j.util.Util.class
logback.xml
tld-names.txt

Pom

crawler4j-4.1.pom file content.

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
	<modelVersion>4.0.0</modelVersion>
	<groupId>edu.uci.ics</groupId>
	<artifactId>crawler4j</artifactId>
	<packaging>jar</packaging>
	<name>crawler4j</name>
	<version>4.1</version>
	<description>Open Source Web Crawler for Java</description>
  <url>https://github.com/yasserg/crawler4j</url>
	<licenses>
		<license>
			<name>The Apache Software License, Version 2.0</name>
			<url>http://www.apache.org/licenses/LICENSE-2.0.txt</url>
			<distribution>repo</distribution>
		</license>
  </licenses>
  <scm>
    <url>https://github.com/yasserg/crawler4j</url>
    <connection>scm:git:git@github.com:yasserg/crawler4j.git</connection>
    <developerConnection>scm:git:git@github.com:yasserg/crawler4j.git</developerConnection>
  </scm>

	<parent>
		<groupId>org.sonatype.oss</groupId>
		<artifactId>oss-parent</artifactId>
		<version>7</version>
	</parent>

	<build>
		<plugins>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-compiler-plugin</artifactId>
				<configuration>
					<source>1.7</source>
					<target>1.7</target>
				</configuration>
				<version>2.3.2</version>
			</plugin>
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-jar-plugin</artifactId>
				<version>2.3</version>
				<configuration>
					<excludes>
						<exclude>**/*.properties</exclude>
					</excludes>
				</configuration>
			</plugin>
			<!-- generate a source jar -->
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-source-plugin</artifactId>
				<version>2.2.1</version>
				<executions>
					<execution>
						<id>attach-sources</id>
						<goals>
							<goal>jar</goal>
						</goals>
					</execution>
				</executions>
			</plugin>
			<!-- generate a javadoc jar -->
			<plugin>
				<groupId>org.apache.maven.plugins</groupId>
				<artifactId>maven-javadoc-plugin</artifactId>
				<version>2.9</version>
				<executions>
					<execution>
						<id>attach-javadocs</id>
						<goals>
							<goal>jar</goal>
						</goals>
					</execution>
				</executions>
			</plugin>
		</plugins>
	</build>

	<dependencies>

    <!-- Compile time Dependencies -->
    <dependency>
      <groupId>org.slf4j</groupId>
      <artifactId>slf4j-api</artifactId>
      <version>1.7.7</version>
      <scope>compile</scope>
    </dependency>

		<dependency>
			<groupId>uk.org.lidalia</groupId>
			<artifactId>lidalia-slf4j-ext</artifactId>
			<version>1.0.0</version>
		</dependency>

    <dependency>
      <groupId>ch.qos.logback</groupId>
      <artifactId>logback-classic</artifactId>
      <version>1.1.2</version>
			<scope>test</scope>
    </dependency>

		<dependency>
			<groupId>org.apache.httpcomponents</groupId>
			<artifactId>httpclient</artifactId>
			<version>4.3.5</version>
			<scope>compile</scope>
		</dependency>

		<dependency>
			<groupId>com.sleepycat</groupId>
			<artifactId>je</artifactId>
			<version>5.0.73</version>
		</dependency>

		<dependency>
			<groupId>org.apache.tika</groupId>
			<artifactId>tika-parsers</artifactId>
			<version>1.5</version>
		</dependency>

    <!-- Test Dependencies -->
		<dependency>
			<groupId>junit</groupId>
			<artifactId>junit</artifactId>
			<version>4.11</version>
			<scope>test</scope>
		</dependency>
	</dependencies>

	<repositories>
		<repository>
			<id>oracleReleases</id>
			<name>Oracle Released Java Packages</name>
			<url>http://download.oracle.com/maven</url>
			<layout>default</layout>
		</repository>
	</repositories>

</project>

POM Entry

<dependency>
   <groupId>edu.uci.ics</groupId>
   <artifactId>crawler4j</artifactId>
   <version>4.1</version>
</dependency>

Download

If you think the following crawler4j-4.1.jar downloaded from Maven central repository is inappropriate, such as containing malicious code/tools or violating the copyright, please email , thanks.



Download crawler4j-4.1.jar file




PreviousNext

Related