public interface Nutch
Modifier and Type | Field and Description |
---|---|
static String |
ALL_BATCH_ID_STR |
static org.apache.avro.util.Utf8 |
ALL_CRAWL_ID |
static String |
ARG_BATCH
Batch id to select.
|
static String |
ARG_CLASS
Class to run as a NutchTool.
|
static String |
ARG_CRAWL
Crawl id to use.
|
static String |
ARG_CURTIME
The notion of current time.
|
static String |
ARG_DEPTH
Depth (number of cycles) of a crawl.
|
static String |
ARG_FILTER
Apply URLFilters.
|
static String |
ARG_FORCE
Force processing even if there are locks or inconsistencies.
|
static String |
ARG_NORMALIZE
Apply URLNormalizers.
|
static String |
ARG_NUMTASKS
Number of fetcher tasks.
|
static String |
ARG_RESUME
Resume previously aborted op.
|
static String |
ARG_SEEDDIR
a path to a directory containing a list of seed URLs.
|
static String |
ARG_SEEDLIST
Whitespace-separated list of seed URLs.
|
static String |
ARG_SOLR
Solr URL.
|
static String |
ARG_SORT
Sort statistics.
|
static String |
ARG_THREADS
Number of fetcher threads (per map task).
|
static String |
ARG_TOPN
Generate topN scoring URLs.
|
static String |
BATCH_NAME_KEY |
static String |
CACHING_FORBIDDEN_ALL
Don't show either original forbidden content or summaries.
|
static String |
CACHING_FORBIDDEN_CONTENT
Don't show original forbidden content, but show summaries.
|
static String |
CACHING_FORBIDDEN_KEY
Sites may request that search engines don't provide access to cached
documents.
|
static org.apache.avro.util.Utf8 |
CACHING_FORBIDDEN_KEY_UTF8 |
static String |
CACHING_FORBIDDEN_NONE
Show both original forbidden content and summaries (default).
|
static String |
CHAR_ENCODING_FOR_CONVERSION |
static String |
CRAWL_ID_KEY |
static String |
FETCH_STATUS_KEY |
static String |
FETCH_TIME_KEY |
static String |
GENERATE_TIME_KEY |
static String |
ORIGINAL_CHAR_ENCODING |
static String |
PROTO_STATUS_KEY |
static String |
REPR_URL_KEY |
static String |
SCORE_KEY |
static String |
SIGNATURE_KEY |
static String |
STAT_COUNTERS
Counters.
|
static String |
STAT_JOBS
Jobs.
|
static String |
STAT_MESSAGE
Status / result message.
|
static String |
STAT_PHASE
Phase of processing.
|
static String |
STAT_PROGRESS
Progress (float).
|
static Text |
WRITABLE_GENERATE_TIME_KEY |
static Text |
WRITABLE_PROTO_STATUS_KEY |
static Text |
WRITABLE_REPR_URL_KEY |
static final String ORIGINAL_CHAR_ENCODING
static final String CHAR_ENCODING_FOR_CONVERSION
static final String SIGNATURE_KEY
static final String BATCH_NAME_KEY
static final String SCORE_KEY
static final String GENERATE_TIME_KEY
static final Text WRITABLE_GENERATE_TIME_KEY
static final String PROTO_STATUS_KEY
static final Text WRITABLE_PROTO_STATUS_KEY
static final String FETCH_TIME_KEY
static final String FETCH_STATUS_KEY
static final String CACHING_FORBIDDEN_KEY
static final org.apache.avro.util.Utf8 CACHING_FORBIDDEN_KEY_UTF8
static final String CACHING_FORBIDDEN_NONE
static final String CACHING_FORBIDDEN_ALL
static final String CACHING_FORBIDDEN_CONTENT
static final String REPR_URL_KEY
static final Text WRITABLE_REPR_URL_KEY
static final String ALL_BATCH_ID_STR
static final org.apache.avro.util.Utf8 ALL_CRAWL_ID
static final String CRAWL_ID_KEY
static final String ARG_BATCH
static final String ARG_CRAWL
static final String ARG_RESUME
static final String ARG_FORCE
static final String ARG_SORT
static final String ARG_SOLR
static final String ARG_THREADS
static final String ARG_NUMTASKS
static final String ARG_TOPN
static final String ARG_CURTIME
static final String ARG_FILTER
static final String ARG_NORMALIZE
static final String ARG_SEEDLIST
static final String ARG_SEEDDIR
static final String ARG_CLASS
static final String ARG_DEPTH
static final String STAT_MESSAGE
static final String STAT_PHASE
static final String STAT_PROGRESS
static final String STAT_JOBS
static final String STAT_COUNTERS
Copyright © 2015 The Apache Software Foundation