|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |
java.lang.Objectscone.robot.RobotTask
public class RobotTask
RobotTask classes are used to define tasks for the robot. Use the constructor to get a task and set the basic properties
Field Summary | |
---|---|
static int |
ALL
Follow only external links |
static int |
EXTERNAL
Follow only links that point to files in the same subdirectory |
static int |
INTERNAL
|
static int |
SUBDIRECTORIES
Follow only internal links |
Constructor Summary | |
---|---|
RobotTask(SimpleUri startURI,
int depth,
int restriction,
RobotUser robotUser)
constructor |
Method Summary | |
---|---|
void |
addLinkClassifier(LinkClassifier linkClassifier)
Adds a LinkClassifier to this task. |
void |
addLinkFilter(LinkFilter linkFilter)
Filters decides whether to follow a link or not. |
void |
addPageClassifier(PageClassifier pageClassifier)
Adds a PageClassifier to this task. |
void |
addPageFilter(PageFilter pageFilter)
Filters decides whether to stop the crawling at the current document or to continue with the links. |
void |
addResultNode(RobotHtmlNode robotHtmlNode)
Adds an element to the result set |
long |
getArrivalTime()
Get the arrival time of this task at the robot |
int |
getCacheHits()
Get the number of cach hits queuedUris = downloadedUris + cacheHits |
boolean |
getCheckDatabase()
See, if the robot checks the database |
int |
getCheckedUris()
Get the number of checked URIs. |
int |
getDepth()
Get the crawling depth |
boolean |
getDoContenSeenTest()
Checks if the robot does a content-seen-test. |
int |
getDownloadedUris()
Get the number of downloaded URIs |
long |
getEndTime()
Get the end time for this task |
long |
getExpiry()
Get the expiry time |
int |
getFilteredUris()
Get the number of filtered URIs. |
boolean |
getHeadOnly()
Checks, if the robot is in headOnly mode |
int |
getId()
Get the task's uinique id |
java.util.Enumeration |
getLinkClassifier()
Get an Enumeration of all LinkClassifiers |
java.util.Enumeration |
getLinkFilter()
Get an Enumeration of all LinkFilter |
long |
getMaxDownloadTime()
Gets the maximium download time |
int |
getMaxDownloadUris()
Get the maximum number of documents the robot will download |
int |
getMaxPageSize()
Gets the download size limit |
int |
getNumberOfOpenUris()
Get the number of open URIs for this task. |
int |
getNumberOfResultNodes()
Get the number of result nodes |
boolean |
getObeyRobotExclusion()
Checks, if the robot is in obeyRobotExclusion mode |
QueueEntry |
getOpenUri(SimpleUri uri)
Checks if there's an element in this task's list of open URIs which is equal to uri |
java.util.Enumeration |
getOpenUris()
Get the URIs of this task the robot is currently working on. |
java.util.Enumeration |
getPageClassifier()
Get an Enumeration of all PageClassifiers |
java.util.Enumeration |
getPageFilter()
Get an Enumeration of all PageFilter |
int |
getQueuedUris()
Get the number of queued URIS |
boolean |
getRequireSourceCode()
Checks if the robot does a content-seen test |
RobotHtmlNode |
getResultNode(SimpleUri uri)
Get the result node with the URI equal to the parameter uri |
java.util.Enumeration |
getResultNodes()
Get all result nodes |
long |
getStartTime()
Get the start time of this task |
SimpleUri |
getStartURI()
Get the start URI of this task |
long |
getUpdateDate()
Gets the update date |
boolean |
isOpenUri(SimpleUri uri)
Checks if there's an element in this task's list of open URIs which is equal to uri |
boolean |
isResultUri(SimpleUri uri)
Checks if this URI is in the result |
void |
removeLinkClassifier(LinkClassifier linkClassifier)
Removes a Classifier |
void |
removeLinkFilter(LinkFilter linkFilter)
Removes a Filter |
void |
removePageClassifier(PageClassifier pageClassifier)
Removes a Classifier |
void |
removePageFilter(PageFilter pageFilter)
Removes a Filter |
void |
setCheckDatabase(boolean checkDatabase)
Should the robot check the database before trying to download a document from the web. |
void |
setDoContentSeenTest()
Enables the content-seen-test. |
void |
setExpiry(long time)
When should this task expire. |
void |
setHeadOnly(boolean headOnly)
If this flag is set HEAD instaed of GET is used to contact the server |
void |
setMaxDownloadTime(long time)
The robot will only download a document for the specified time |
void |
setMaxDownloadUris(int max)
Max Documents are downloaded from the web. |
void |
setMaxPageSize(int size)
Only the specified amount of bytes are downloaded from each document |
void |
setObeyRobotExclusion(boolean obeyRobotExclusion)
Should the robot obey the robotExclusion. |
void |
setRequireSourceCode(boolean requireSourceCode)
If this is set to true, the robot saves the source code of every document. |
void |
setUpdateDate(long date)
Pages that were accessed (by the robot or the user) before date are downloaded again. |
boolean |
wasStopped()
Get the value of the stop flag |
Methods inherited from class java.lang.Object |
---|
clone, equals, finalize, getClass, hashCode, notify, notifyAll, toString, wait, wait, wait |
Field Detail |
---|
public static final int INTERNAL
public static final int SUBDIRECTORIES
public static final int EXTERNAL
public static final int ALL
Constructor Detail |
---|
public RobotTask(SimpleUri startURI, int depth, int restriction, RobotUser robotUser)
startUri
- start the crawl from at this uridepth
- follow the links with this depthrestriction
- use the constants defined in this class to restrict the crawling processrobotUser
- the robotUser will be called for every found document and at the end of the crawlingMethod Detail |
---|
public int getId()
public SimpleUri getStartURI()
public void setHeadOnly(boolean headOnly)
headOnly
- if true, only the head of startUri will be loadedpublic boolean getHeadOnly()
public int getDepth()
public void setObeyRobotExclusion(boolean obeyRobotExclusion)
obeyRobotExclusion
- if true, the robot will obey the robot exclusion protocolpublic boolean getObeyRobotExclusion()
public void setExpiry(long time)
time
- time period in milliseconds beginning with the arrival of the task at the robot to the task's expiry.public long getExpiry()
public void setMaxDownloadUris(int max)
max
- download max documentspublic int getMaxDownloadUris()
public void setCheckDatabase(boolean checkDatabase)
checkDatabase
- if true the robot allways tries to find linked documents in the database.public boolean getCheckDatabase()
public void setUpdateDate(long date)
date
- date in milliseconds after January 1, 1970 00:00:00 GMTpublic long getUpdateDate()
public void setMaxPageSize(int size)
size
- download only size bytespublic int getMaxPageSize()
public void setMaxDownloadTime(long time)
maximum
- download time for each documentpublic long getMaxDownloadTime()
public void addPageClassifier(PageClassifier pageClassifier)
pageClassifier
- add this PageClassifierpublic void removePageClassifier(PageClassifier pageClassifier)
pageClassifier
- remove this onepublic void addLinkClassifier(LinkClassifier linkClassifier)
linkClassifier
- add this LinkClassifierpublic void removeLinkClassifier(LinkClassifier linkClassifier)
linkClassifier
- remove this onepublic void addPageFilter(PageFilter pageFilter)
pageFilter
- add this PageFilterpublic void removePageFilter(PageFilter pageFilter)
pageFilter
- remove this onepublic void addLinkFilter(LinkFilter linkFilter)
linkFilter
- add this LinkFilterpublic void removeLinkFilter(LinkFilter linkFilter)
linkFilter
- remove this onepublic java.util.Enumeration getPageClassifier()
public java.util.Enumeration getLinkClassifier()
public java.util.Enumeration getPageFilter()
public java.util.Enumeration getLinkFilter()
public void setDoContentSeenTest()
public boolean getDoContenSeenTest()
public void setRequireSourceCode(boolean requireSourceCode)
requireSourceCode
- do a content-seen-test or notpublic boolean getRequireSourceCode()
public long getArrivalTime()
public long getStartTime()
public long getEndTime()
public int getCheckedUris()
public int getQueuedUris()
public int getFilteredUris()
public int getDownloadedUris()
public int getCacheHits()
public boolean isOpenUri(SimpleUri uri)
uri
- look for this uri
public QueueEntry getOpenUri(SimpleUri uri)
uri
- look for this uri
public int getNumberOfOpenUris()
public java.util.Enumeration getOpenUris()
public void addResultNode(RobotHtmlNode robotHtmlNode)
robotHtmlNode
- add this nodepublic boolean isResultUri(SimpleUri uri)
uri
- check this URI
public int getNumberOfResultNodes()
public java.util.Enumeration getResultNodes()
public RobotHtmlNode getResultNode(SimpleUri uri)
uri
- get the result node for this URI
public boolean wasStopped()
|
||||||||||
PREV CLASS NEXT CLASS | FRAMES NO FRAMES | |||||||||
SUMMARY: NESTED | FIELD | CONSTR | METHOD | DETAIL: FIELD | CONSTR | METHOD |