Download Book MarkCrawler 1 Free Java Code

Description

A version of the web crawler from algorithms of the intelligent web. Adapted to extract specific sections from pages.

Source Files

The download file BookMarkCrawler_1-master.zip has the following entries.


.gitignore/* w  ww  . j  av  a2 s.  co m*/
README
body_filter.txt
build.xml
link_filter.txt
manifest.mf
nbproject/build-impl.xml
nbproject/genfiles.properties
nbproject/project.properties
nbproject/project.xml
src/bookmarkcrawler/BasicWebCrawler.java
src/bookmarkcrawler/BookmarkCrawler.java
src/bookmarkcrawler/CrawlData.java
src/bookmarkcrawler/CrawlDataProcessor.java
src/bookmarkcrawler/DocumentFilter.java
src/bookmarkcrawler/FetchAndProcessCrawler.java
src/bookmarkcrawler/URLFilter.java
src/bookmarkcrawler/URLNormalizer.java
src/bookmarkcrawler/db/FetchedDocsDB.java
src/bookmarkcrawler/db/KnownUrlDB.java
src/bookmarkcrawler/db/PageLinkDB.java
src/bookmarkcrawler/db/ProcessedDocsDB.java
src/bookmarkcrawler/model/FetchedDocument.java
src/bookmarkcrawler/model/KnownUrlEntry.java
src/bookmarkcrawler/model/Outlink.java
src/bookmarkcrawler/model/ProcessedDocument.java
src/bookmarkcrawler/parser/common/DocumentParser.java
src/bookmarkcrawler/parser/common/DocumentParserException.java
src/bookmarkcrawler/parser/common/DocumentParserFactory.java
src/bookmarkcrawler/parser/html/CompositeFilter.java
src/bookmarkcrawler/parser/html/ElementNodeFilter.java
src/bookmarkcrawler/parser/html/HTMLDocumentParser.java
src/bookmarkcrawler/parser/html/HTMLDocumentParserException.java
src/bookmarkcrawler/parser/html/HTMLPageSectionFilters.java
src/bookmarkcrawler/parser/html/HTMLWriter.java
src/bookmarkcrawler/parser/html/LinkNodeFilter.java
src/bookmarkcrawler/parser/html/MultiFilter.java
src/bookmarkcrawler/parser/html/NodeSpec.java
src/bookmarkcrawler/parser/html/SectionFilter.java
src/bookmarkcrawler/parser/html/SectionFilterSpec.java
src/bookmarkcrawler/parser/msword/MSWordDocumentParser.java
src/bookmarkcrawler/parser/msword/MSWordDocumentParserException.java
src/bookmarkcrawler/transport/common/Transport.java
src/bookmarkcrawler/transport/common/TransportException.java
src/bookmarkcrawler/transport/file/FileTransport.java
src/bookmarkcrawler/transport/file/FileTransportException.java
src/bookmarkcrawler/transport/http/HTTPTransport.java
src/bookmarkcrawler/transport/http/HTTPTransportException.java
src/bookmarkcrawler/transport/http/HTTPUtils.java
src/bookmarkcrawler/utils/CSVFile.java
src/bookmarkcrawler/utils/DocumentIdUtils.java
src/bookmarkcrawler/utils/FileUtils.java
src/bookmarkcrawler/utils/UrlGroup.java
src/bookmarkcrawler/utils/UrlUtils.java
src/bookmarkcrawler/utils/ValueToIndexMapping.java

Download

Click the following link to download BookMarkCrawler_1-master.zip.

BookMarkCrawler_1-master.zip




















Home »
  Java Free Code »
    Book »




Book
Book Algorithm
Book Design Patterns
Book Example
Book Hadoop
Book JEE
Book Maven
Book Spring