Download leech Free Java Code

Description

Crawling capabilities for Apache Tika. Crawl content out of e.g. file systems, http(s) sources (webcrawling) imap(s) servers or your own arbitrary data sources. Leech offers additional Tika parsers providing these crawling capabilities.

Source Files

The download file leech-master.zip has the following entries.


.gitignore/*ww w. j  a  v a2s.  co  m*/
README.md
codeSnippets.md
extending.md
gpl_v3.txt
how2build.txt
how2start.md
mailinglist.md
people.md
pom.xml
src/main/assembly/distributable.xml
src/main/java/de/dfki/km/leech/Leech.java
src/main/java/de/dfki/km/leech/config/CrawlerContext.java
src/main/java/de/dfki/km/leech/config/DirectoryCrawlerContext.java
src/main/java/de/dfki/km/leech/config/HtmlCrawlerContext.java
src/main/java/de/dfki/km/leech/config/ImapCrawlerContext.java
src/main/java/de/dfki/km/leech/config/LeechDefaultConfig.java
src/main/java/de/dfki/km/leech/detect/DatasourceMediaTypes.java
src/main/java/de/dfki/km/leech/detect/DirectoryDatasourceDetector.java
src/main/java/de/dfki/km/leech/detect/ImapDatasourceDetector.java
src/main/java/de/dfki/km/leech/detect/LeechDefaultDetector.java
src/main/java/de/dfki/km/leech/io/FileURLStreamProvider.java
src/main/java/de/dfki/km/leech/io/HttpURLStreamProvider.java
src/main/java/de/dfki/km/leech/io/ImapURLStreamProvider.java
src/main/java/de/dfki/km/leech/io/ShiftInitInputStream.java
src/main/java/de/dfki/km/leech/io/URLStreamProvider.java
src/main/java/de/dfki/km/leech/lucene/Buzzwords.java
src/main/java/de/dfki/km/leech/lucene/DateParser.java
src/main/java/de/dfki/km/leech/lucene/DateUtils.java
src/main/java/de/dfki/km/leech/lucene/FieldConfig.java
src/main/java/de/dfki/km/leech/lucene/FieldFactory.java
src/main/java/de/dfki/km/leech/lucene/LeechSimpleAnalyzer.java
src/main/java/de/dfki/km/leech/lucene/LetterOrDigitLowerCaseTokenizer.java
src/main/java/de/dfki/km/leech/lucene/LuceneAnalyzerFactory.java
src/main/java/de/dfki/km/leech/lucene/PageCountEstimator.java
src/main/java/de/dfki/km/leech/lucene/Term2FrequencyEntry.java
src/main/java/de/dfki/km/leech/lucene/ToLuceneContentHandler.java
src/main/java/de/dfki/km/leech/lucene/TopFrequentTermsTermVectorMapper.java
src/main/java/de/dfki/km/leech/metadata/LeechMetadata.java
src/main/java/de/dfki/km/leech/parser/CrawlerParser.java
src/main/java/de/dfki/km/leech/parser/DirectoryCrawlerParser.java
src/main/java/de/dfki/km/leech/parser/HtmlCrawlerParser.java
src/main/java/de/dfki/km/leech/parser/ImapCrawlerParser.java
src/main/java/de/dfki/km/leech/parser/SambaCrawlerParser.java
src/main/java/de/dfki/km/leech/parser/filter/RegExpPattern.java
src/main/java/de/dfki/km/leech/parser/filter/SubstringPattern.java
src/main/java/de/dfki/km/leech/parser/filter/URLFilter.java
src/main/java/de/dfki/km/leech/parser/filter/URLFilterPattern.java
src/main/java/de/dfki/km/leech/parser/filter/URLFilteringParser.java
src/main/java/de/dfki/km/leech/parser/incremental/IncrementalCrawlingHistory.java
src/main/java/de/dfki/km/leech/parser/incremental/IncrementalCrawlingParser.java
src/main/java/de/dfki/km/leech/parser/rss/FeedParser2.java
src/main/java/de/dfki/km/leech/parser/wikipedia/WikipediaDumpParser.java
src/main/java/de/dfki/km/leech/sax/CrawlReportContentHandler.java
src/main/java/de/dfki/km/leech/sax/DataSinkContentHandler.java
src/main/java/de/dfki/km/leech/sax/DataSinkContentHandlerAdapter.java
src/main/java/de/dfki/km/leech/sax/PrintlnContentHandler.java
src/main/java/de/dfki/km/leech/util/ExceptionUtils.java
src/main/java/de/dfki/km/leech/util/FileUtils.java
src/main/java/de/dfki/km/leech/util/IndexPostprocessor.java
src/main/java/de/dfki/km/leech/util/InquisitionMapEntry.java
src/main/java/de/dfki/km/leech/util/LeechException.java
src/main/java/de/dfki/km/leech/util/Levenshtein.java
src/main/java/de/dfki/km/leech/util/LuceneIndexCreator.java
src/main/java/de/dfki/km/leech/util/MultiValueBalancedTreeMap.java
src/main/java/de/dfki/km/leech/util/MultiValueHashMap.java
src/main/java/de/dfki/km/leech/util/MultiValueMap.java
src/main/java/de/dfki/km/leech/util/MultiValueTreeMap.java
src/main/java/de/dfki/km/leech/util/OSUtils.java
src/main/java/de/dfki/km/leech/util/StopWatch.java
src/main/java/de/dfki/km/leech/util/StringUtils.java
src/main/java/de/dfki/km/leech/util/TikaUtils.java
src/main/java/de/dfki/km/leech/util/UrlUtil.java
src/main/java/de/dfki/km/leech/util/ValueHolder.java
src/main/java/de/dfki/km/leech/util/certificates/CertificateIgnoringSocketFactory.java
src/main/java/de/dfki/km/leech/util/certificates/CertificateStore.java
src/main/java/de/dfki/km/leech/util/certificates/Decision.java
src/main/java/de/dfki/km/leech/util/certificates/PersistentCertificateStore.java
src/main/java/de/dfki/km/leech/util/certificates/RootCertificateStore.java
src/main/java/de/dfki/km/leech/util/certificates/SessionCertificateStore.java
src/main/java/de/dfki/km/leech/util/certificates/StandardTrustManager.java
src/main/java/de/dfki/km/leech/util/certificates/TrustDecider.java
src/main/resources/META-INF/services/de.dfki.km.leech.io.URLStreamProvider
src/main/resources/META-INF/services/org.apache.tika.parser.Parser
src/main/resources/org/apache/tika/mime/custom-mimetypes.xml
src/test/.gitignore
supporters.md

Download

Click the following link to download leech-master.zip.

leech-master.zip




















Home »
  Java Free Code »
    Development »




Algebra
Algorithm
Ant
Bytecode
Compiler
DataMining
DSL
Eclipse
Example
File
Git
Intellij
JavaBean
JAX
jBPM
JDK8
JME
JMX
JodaTime
JRuby
JSE
JSON
JVM
Lambda
libgdx
Library
log4j
Markdown
Math
Maven
Netbeans
NIO
NLP
OSGi
Plugin
Scala
SDK
slf4j
Template
Thread
Tutorial
Unicode
Utility
ZooKeeper