public class GeneratorJob extends NutchTool implements Tool
Modifier and Type | Class and Description |
---|---|
static class |
GeneratorJob.SelectorEntry |
static class |
GeneratorJob.SelectorEntryComparator |
Modifier and Type | Field and Description |
---|---|
static String |
BATCH_ID |
static String |
GENERATE_COUNT |
static String |
GENERATE_UPDATE_CRAWLDB |
static String |
GENERATOR_COUNT_MODE |
static String |
GENERATOR_COUNT_VALUE_DOMAIN |
static String |
GENERATOR_COUNT_VALUE_HOST |
static String |
GENERATOR_COUNT_VALUE_IP |
static String |
GENERATOR_CUR_TIME |
static String |
GENERATOR_DELAY |
static String |
GENERATOR_FILTER |
static String |
GENERATOR_MAX_COUNT |
static String |
GENERATOR_MIN_SCORE |
static String |
GENERATOR_NORMALISE |
static String |
GENERATOR_RANDOM_SEED |
static String |
GENERATOR_TOP_N |
static org.slf4j.Logger |
LOG |
currentJob, currentJobNum, numJobs, results, status
Constructor and Description |
---|
GeneratorJob() |
GeneratorJob(Configuration conf) |
Modifier and Type | Method and Description |
---|---|
String |
generate(long topN,
long curTime,
boolean filter,
boolean norm)
Mark URLs ready for fetching.
|
Collection<WebPage.Field> |
getFields(Job job) |
static void |
main(String[] args) |
static String |
randomBatchId()
Generate a random batch id
|
Map<String,Object> |
run(Map<String,Object> args)
Runs the tool, using a map of arguments.
|
int |
run(String[] args) |
getProgress, getStatus, killJob, stopJob
getConf, setConf
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait
getConf, setConf
public static final String GENERATE_UPDATE_CRAWLDB
public static final String GENERATOR_MIN_SCORE
public static final String GENERATOR_FILTER
public static final String GENERATOR_NORMALISE
public static final String GENERATOR_MAX_COUNT
public static final String GENERATOR_COUNT_MODE
public static final String GENERATOR_COUNT_VALUE_DOMAIN
public static final String GENERATOR_COUNT_VALUE_HOST
public static final String GENERATOR_COUNT_VALUE_IP
public static final String GENERATOR_TOP_N
public static final String GENERATOR_CUR_TIME
public static final String GENERATOR_DELAY
public static final String GENERATOR_RANDOM_SEED
public static final String BATCH_ID
public static final String GENERATE_COUNT
public static final org.slf4j.Logger LOG
public GeneratorJob()
public GeneratorJob(Configuration conf)
public Collection<WebPage.Field> getFields(Job job)
public static String randomBatchId()
public Map<String,Object> run(Map<String,Object> args) throws Exception
NutchTool
public String generate(long topN, long curTime, boolean filter, boolean norm) throws Exception
Copyright © 2015 The Apache Software Foundation