/*
 * Decompiled with CFR 0.152.
 */
package scone.robot;

import HTTPClient.HTTPResponse;
import HTTPClient.ModuleException;
import HTTPClient.NVPair;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.Enumeration;
import java.util.Locale;
import java.util.TimeZone;
import java.util.TimerTask;
import java.util.Vector;
import scone.netobjects.HtmlNode;
import scone.netobjects.HtmlNodeCache;
import scone.netobjects.Inclusion;
import scone.netobjects.InclusionCache;
import scone.netobjects.Link;
import scone.netobjects.NetNode;
import scone.netobjects.NetNodeCache;
import scone.netobjects.Server;
import scone.netobjects.ServerCache;
import scone.netobjects.SimpleUri;
import scone.robot.LimitedInputStream;
import scone.robot.LinkClassifier;
import scone.robot.LinkFilter;
import scone.robot.PageClassifier;
import scone.robot.PageFilter;
import scone.robot.PageLoaderInterruptTimer;
import scone.robot.QueueEntry;
import scone.robot.Robot;
import scone.robot.RobotHtmlNode;
import scone.robot.RobotHttpConnection;
import scone.robot.RobotLink;
import scone.util.DocumentParser;
import scone.util.tokenstream.DummyTokenOutputStream;
import scone.util.tokenstream.TokenInputStreamTokenizerImpl;

class PageLoaderThread
implements Runnable {
    private QueueEntry qe;
    private Robot robot;
    private boolean finished;
    private boolean interrupted;
    private boolean sizeLimitReached;
    private InputStream inputstream = null;

    PageLoaderThread(Robot robot, QueueEntry qe) {
        this.robot = robot;
        this.qe = qe;
        this.finished = false;
        this.interrupted = false;
        this.sizeLimitReached = false;
    }

    private String makeDateString(long date) {
        Date d = new Date(date);
        SimpleDateFormat formatter = new SimpleDateFormat("EEE, dd MMM yyyy HH:mm:ss z", Locale.US);
        formatter.setTimeZone(TimeZone.getDefault());
        String dateString = formatter.format(d);
        return dateString;
    }

    private void loadPage(NetNode netNode, boolean sendIfModifiedSince, long lastAccess) {
        RobotHttpConnection con = null;
        LimitedInputStream limitedInputStream = null;
        BufferedReader bufferedreader = null;
        boolean error = false;
        try {
            String file = netNode.getSUri().getFile();
            String path = netNode.getSUri().getPath();
            String query = netNode.getSUri().getQuery();
            if (query != null && query.length() > 0) {
                file = file + "?" + query;
            }
            Date d1 = new Date();
            long starttime = d1.getTime();
            con = this.robot.getHttpConnectionPool().getHttpConnection(this.qe.getURI().toURL());
            con.setTimeout(this.robot.getTimeout());
            con.setAllowUserInteraction(false);
            NVPair[] h = new NVPair[2];
            h[0] = new NVPair("User-Agent", this.robot.getRobotName());
            if (sendIfModifiedSince) {
                h[1] = new NVPair("If-Modified-Since", this.makeDateString(lastAccess));
            }
            con.setDefaultHeaders(h);
            HTTPResponse rsp = null;
            rsp = this.qe.getRobotTask().getHeadOnly() ? con.Head(path + file) : con.Get(path + file);
            Date d2 = new Date();
            long endtime = d2.getTime();
            long responsetime = endtime - starttime;
            Server s = ServerCache.get(netNode.getHost());
            if (!con.reUsed()) {
                s.setDelay(String.valueOf(responsetime));
            }
            netNode.setAccessStatus(Integer.toString(rsp.getStatusCode()));
            netNode.setLastRobotAccess(d1.getTime());
            if (rsp.getHeader("Content-Type") != null) {
                netNode.setMimeType(rsp.getHeader("Content-Type"));
            }
            if (rsp.getHeader("Content-Length") != null) {
                netNode.setSize(rsp.getHeader("Content-Length"));
            }
            if (rsp.getHeader("Last-Modified") != null) {
                netNode.setLastModifiedString(rsp.getHeader("Last-Modified"));
            }
            if (rsp.getStatusCode() >= 500) {
                s.setAccessStatus(String.valueOf(rsp.getStatusCode()));
            } else {
                s.setAccessStatus("200");
            }
            if (rsp.getStatusCode() >= 300) {
                error = true;
                System.err.println("Robot - PageLoaderThread -> Received Error: " + rsp.getReasonLine() + " " + netNode.getUri());
            } else if (rsp.getHeader("Content-Type").indexOf("text/html") == -1) {
                error = true;
                System.err.println("Robot - PageLoaderThread -> Received Error: Wrong mime-type " + netNode.getUri());
            } else if (!this.qe.getRobotTask().getHeadOnly()) {
                this.inputstream = rsp.getInputStream();
                if (this.qe.getRobotTask().getMaxPageSize() > 0) {
                    limitedInputStream = new LimitedInputStream(this.inputstream, this.qe.getRobotTask().getMaxPageSize());
                    bufferedreader = new BufferedReader(new InputStreamReader(limitedInputStream));
                } else {
                    bufferedreader = new BufferedReader(new InputStreamReader(this.inputstream));
                }
            }
        }
        catch (IOException ioe) {
            System.out.println("Robot - PageLoaderThread -> Problems loading the uri " + this.qe.getURI());
            System.err.println(ioe.toString());
            netNode.setAccessStatus("408");
        }
        catch (ModuleException me) {
            System.err.println("Robot - PageLoaderThread -> Error handling request: " + me.getMessage());
        }
        if (this.inputstream != null && !error) {
            int parserRequirements = 1049305;
            if (this.qe.getRobotTask().getRequireSourceCode()) {
                parserRequirements |= 0x100101;
            }
            DocumentParser documentParser = new DocumentParser(parserRequirements, false);
            TokenInputStreamTokenizerImpl tokenInputStream = new TokenInputStreamTokenizerImpl(bufferedreader);
            tokenInputStream.getMetaInfo().put("baseNode", netNode);
            DummyTokenOutputStream tokenOutputStream = new DummyTokenOutputStream();
            documentParser.parse(tokenInputStream, tokenOutputStream);
            try {
                tokenInputStream.close();
                tokenOutputStream.close();
            }
            catch (IOException ioe) {
                System.out.println(ioe.toString());
            }
            this.sizeLimitReached = limitedInputStream != null && limitedInputStream.getLimitReached();
        }
        this.robot.getHttpConnectionPool().returnHttpConnection(con);
        this.qe.getRobotTask().incDownloadedUris();
    }

    private void runPageClassifier(RobotHtmlNode robotHtmlNode) {
        Enumeration classifier = this.qe.getRobotTask().getPageClassifier();
        if (classifier != null) {
            while (classifier.hasMoreElements()) {
                PageClassifier cl = (PageClassifier)classifier.nextElement();
                cl.classify(robotHtmlNode, this.qe);
            }
        }
    }

    private boolean runPageFilter(RobotHtmlNode robotHtmlNode) {
        boolean result = true;
        Enumeration filter = this.qe.getRobotTask().getPageFilter();
        if (filter != null) {
            while (filter.hasMoreElements()) {
                PageFilter fi = (PageFilter)filter.nextElement();
                result &= fi.filter(robotHtmlNode, this.qe);
            }
        }
        return result;
    }

    private void runLinkClassifier(RobotLink robotLink, RobotHtmlNode robotHtmlNode) {
        Enumeration classifier = this.qe.getRobotTask().getLinkClassifier();
        if (classifier != null) {
            while (classifier.hasMoreElements()) {
                LinkClassifier cl = (LinkClassifier)classifier.nextElement();
                cl.classify(robotLink, robotHtmlNode, this.qe);
            }
        }
    }

    private boolean runLinkFilter(RobotLink robotLink, RobotHtmlNode robotHtmlNode) {
        boolean result = true;
        Enumeration filter = this.qe.getRobotTask().getLinkFilter();
        if (filter != null) {
            while (filter.hasMoreElements()) {
                LinkFilter fi = (LinkFilter)filter.nextElement();
                result &= fi.filter(robotLink, robotHtmlNode, this.qe);
            }
        }
        return result;
    }

    private void handleLink(SimpleUri link, RobotLink robotLink, RobotHtmlNode robotHtmlNode) {
        this.runLinkClassifier(robotLink, robotHtmlNode);
        if (this.qe.getRobotTask().getDefaultFilter().filter(link) && this.runLinkFilter(robotLink, robotHtmlNode)) {
            QueueEntry new_qe = new QueueEntry(link, this.qe.getDepth() - 1, this.qe.getRobotTask());
            QueueEntry pending_qe = this.robot.getPendingURL(link, this.qe.getRobotTask());
            if (pending_qe != null) {
                if (pending_qe.getDepth() < new_qe.getDepth()) {
                    pending_qe.setDepth(new_qe.getDepth());
                }
            } else {
                QueueEntry open_qe = this.qe.getRobotTask().getOpenUri(link);
                if (open_qe != null) {
                    if (open_qe.getDepth() < new_qe.getDepth()) {
                        open_qe.setDepth(new_qe.getDepth());
                    }
                } else {
                    RobotHtmlNode downloaded_h = this.qe.getRobotTask().getResultNode(link);
                    if (downloaded_h == null || downloaded_h.getScannedDepth() < new_qe.getDepth()) {
                        this.robot.queue(new_qe);
                        this.qe.getRobotTask().incQueuedUris();
                    }
                }
            }
        } else {
            this.qe.getRobotTask().incFilteredUris();
        }
    }

    private void handleInclusion(SimpleUri link, Inclusion inclusion) {
        if (this.qe.getRobotTask().getDefaultFilter().filter(link)) {
            QueueEntry new_qe = new QueueEntry(link, this.qe.getDepth() - 1, this.qe.getRobotTask());
            QueueEntry pending_qe = this.robot.getPendingURL(link, this.qe.getRobotTask());
            if (pending_qe != null) {
                if (pending_qe.getDepth() < new_qe.getDepth()) {
                    pending_qe.setDepth(new_qe.getDepth());
                }
            } else {
                QueueEntry open_qe = this.qe.getRobotTask().getOpenUri(link);
                if (open_qe != null) {
                    if (open_qe.getDepth() < new_qe.getDepth()) {
                        open_qe.setDepth(new_qe.getDepth());
                    }
                } else {
                    RobotHtmlNode downloaded_h = this.qe.getRobotTask().getResultNode(link);
                    if (downloaded_h == null || downloaded_h.getScannedDepth() < new_qe.getDepth()) {
                        this.robot.queue(new_qe);
                        this.qe.getRobotTask().incQueuedUris();
                    }
                }
            }
        } else {
            this.qe.getRobotTask().incFilteredUris();
        }
    }

    private void scan() {
        NetNode netNode = null;
        HtmlNode htmlNode = null;
        RobotHtmlNode robotHtmlNode = null;
        if (this.qe.getDepth() >= 0) {
            boolean downloadCondition;
            netNode = NetNodeCache.get(this.qe.getURI());
            if (!this.qe.getRobotTask().getHeadOnly()) {
                htmlNode = HtmlNodeCache.check(netNode);
            }
            long lastAccess = netNode.getLastAccess() > netNode.getLastRobotAccess() ? netNode.getLastAccess() : netNode.getLastRobotAccess();
            boolean updateDownloadCondition = this.qe.getRobotTask().getUpdateDate() > 0L && lastAccess > 0L && lastAccess < this.qe.getRobotTask().getUpdateDate();
            boolean generalDownloadCondition = false;
            generalDownloadCondition = this.qe.getRobotTask().getHeadOnly() ? !this.qe.getRobotTask().getCheckDatabase() || netNode.getAccessStatus().equals("-1") : !this.qe.getRobotTask().getCheckDatabase() || htmlNode == null || htmlNode.getNumberOfLinks().equals("-1") || !htmlNode.isCompletelyDownloaded() || this.qe.getRobotTask().getRequireSourceCode() && htmlNode.getSourceCode().equals("") || this.qe.getRobotTask().getDoContenSeenTest() && htmlNode.getFingerprint().equals("");
            boolean bl = downloadCondition = generalDownloadCondition || updateDownloadCondition;
            if (downloadCondition) {
                if (this.qe.getRobotTask().getObeyRobotExclusion()) {
                    if (this.robot.noRobotsTest(htmlNode.getSUri())) {
                        this.loadPage(netNode, !generalDownloadCondition && updateDownloadCondition, lastAccess);
                    }
                } else {
                    this.loadPage(netNode, !generalDownloadCondition && updateDownloadCondition, lastAccess);
                }
                if (this.qe.getRobotTask().getMaxDownloadUris() > 0 && this.qe.getRobotTask().getDownloadedUris() >= this.qe.getRobotTask().getMaxDownloadUris()) {
                    this.robot.stopRobotTask(this.qe.getRobotTask());
                }
            } else {
                this.qe.getRobotTask().incCacheHits();
            }
            if ((htmlNode = HtmlNodeCache.check(netNode)) == null) {
                robotHtmlNode = new RobotHtmlNode(netNode, this.qe.getDepth());
            } else {
                robotHtmlNode = new RobotHtmlNode(htmlNode, this.qe.getDepth());
                if (downloadCondition) {
                    if (!this.interrupted && !this.sizeLimitReached) {
                        htmlNode.setCompletelyDownloaded(true);
                    } else {
                        htmlNode.setCompletelyDownloaded(false);
                    }
                }
            }
            this.runPageClassifier(robotHtmlNode);
            if (htmlNode != null && this.qe.getDepth() >= 1 && this.runPageFilter(robotHtmlNode) && !this.qe.getRobotTask().wasStopped()) {
                Enumeration links = htmlNode.getOutgoingLinks().elements();
                while (links.hasMoreElements() && !this.qe.getRobotTask().wasStopped()) {
                    SimpleUri link = null;
                    RobotLink robotLink = null;
                    try {
                        Link l = (Link)links.nextElement();
                        robotLink = new RobotLink(l);
                        link = l.getToNode().getSUri();
                    }
                    catch (Exception e) {
                        System.out.println("Robot -> PageLoaderThread: Invalid link, e.g. JavaScript");
                        continue;
                    }
                    this.handleLink(link, robotLink, robotHtmlNode);
                    this.qe.getRobotTask().incCheckedUris();
                }
                if (htmlNode.isFrames()) {
                    InclusionCache.clean();
                    Vector v = InclusionCache.getIncludedObjects(htmlNode.getNode());
                    Inclusion inclusion = null;
                    SimpleUri link = null;
                    for (int i = 0; i < v.size() && this.qe.getRobotTask().wasStopped(); ++i) {
                        try {
                            inclusion = (Inclusion)v.elementAt(i);
                            link = inclusion.getChildNode().getSUri();
                        }
                        catch (Exception e) {
                            System.out.println("Robot - PageLoaderThread -> Invalid link, e.g. JavaScript");
                            continue;
                        }
                        this.handleInclusion(link, inclusion);
                        this.qe.getRobotTask().incCheckedUris();
                    }
                }
            }
            this.qe.getRobotTask().moveOpenUriToResult(this.qe, robotHtmlNode);
            this.robot.pageFinished(this.qe, robotHtmlNode);
        }
    }

    void interrupt() {
        if (!this.finished) {
            System.out.println("Robot - PageLoaderThread -> interrupted " + this.qe.getURI().toDocString());
            try {
                this.interrupted = true;
                this.inputstream.close();
            }
            catch (Exception exception) {
                // empty catch block
            }
        }
    }

    public void run() {
        PageLoaderInterruptTimer pit = new PageLoaderInterruptTimer(this);
        if (this.qe.getRobotTask().getMaxDownloadTime() != -1L) {
            this.robot.getTimer().schedule((TimerTask)pit, this.qe.getRobotTask().getMaxDownloadTime());
        }
        this.scan();
        this.finished = true;
    }
}

