package lookoflink;


import java.util.Enumeration;
import java.util.Observable;
import java.util.Observer;

import scone.netobjects.AccessCache;
import scone.netobjects.AccessEvent;
import scone.netobjects.HtmlNode;
import scone.netobjects.HtmlNodeCache;
import scone.netobjects.LinkCache;
import scone.netobjects.LinkVector;
import scone.robot.LinkClassifier;
import scone.robot.LinkFilter;
import scone.robot.QueueEntry;
import scone.robot.Robot;
import scone.robot.RobotHtmlNode;
import scone.robot.RobotLink;
import scone.robot.RobotTask;
import scone.robot.RobotUser;


/**
 * Implementation of a fifo queue used for breadthsearch
 *
 * @author Frank Wollenweber, Harald Weinreich
 */
public class RobotPlugin extends Plugin implements Observer, RobotUser {
    private Robot robot;
    private RobotTask rt;
    private int depth = 1;            // Crawl depth of Plugin 

    public RobotPlugin() {}

    // requirements
    public int getRequirements() {
        return PARSEDOCUMENT | ACCESSTRACKING | CONSIDERLINKS;
    }

    // diese Methode ruft Scone bei allen plugins auf!
    public void init() {

        robot = scone.robot.Robot.instance();
        AccessCache.putObserver(this);
    }

    public void update(Observable o, Object arg) {
        if (o instanceof AccessCache) {
            AccessEvent e = (AccessEvent) arg;

            if (e.getAccess().getAction() == 0
                    && e.getAccess().getStayTime() == 0) {  // first Access-Event!
                System.out.println("\nRobotPlugin\n" + e.getUser().getUserName() + " accessed " + e.getNode().toString());
                HtmlNode hNode = HtmlNodeCache.check(e.getNode());

                if (hNode == null) {
                    System.out.println("Error: Access from non HTML-Page!");
                } else {
                    try {
                        rt = new RobotTask(hNode.getSUri(), depth, RobotTask.ALL, this);
                        rt.addLinkClassifier(new LookOfLinkClassifier());
                        rt.addLinkFilter(new LookOfLinkFilter());
                        robot.scan(rt);
                    } catch (Exception ex) {
                        System.out.println("Invalid URL");
                    }
                    System.out.println("Title: " + hNode.getTitle());
                }
            }
        }
    }
    
    // Neue Seite wird geparst
    public void robotNewPage(RobotHtmlNode robotHtmlNode, RobotTask robotTask) {}

    // Task beendet...
    public void robotTaskFinished(RobotTask robotTask) {
        System.out.println("Auftrag: " + robotTask.getStartURI().toString() + " Tiefe: " + robotTask.getDepth() + " abgeschlossen");
        System.out.println("Results: ");
        Enumeration downloadedNodes = robotTask.getResultNodes();

        while (downloadedNodes.hasMoreElements()) {
            RobotHtmlNode robotHtmlNode = (RobotHtmlNode) downloadedNodes.nextElement();

            System.out.println(robotHtmlNode.getSimpleUri().toString());
            LinkVector lv = (LinkVector) LinkCache.getLinks("where fromNodeId='" + robotHtmlNode.getNodeId() + "' and type&1=0 and type&2<>0"); // Only links to other uri and links with fragment...

            for (int i = 0; i < lv.size(); i++) {
                System.out.println(" Link: " + lv.linkAt(i).getLinkText() + " " + lv.linkAt(i).getAlt());
                System.out.println(" ->" + lv.linkAt(i).getToNode().getUri() + "#" + lv.linkAt(i).getFragment());
            }
            
        }
    }

    /**
     * Classify Links that shall not be followed
     *
     */
    private class LookOfLinkClassifier implements LinkClassifier {
        public void classify(RobotLink robotLink, RobotHtmlNode robotHtmlNode, QueueEntry qe) {
            String urlText = robotLink.getLink().getToNode().getUri();

            if (urlText.indexOf("google.") != -1
                    || urlText.indexOf("alltheweb.") != -1
                    || urlText.indexOf("fastsearch.") != -1
                    || urlText.indexOf("scirus.") != -1
                    || urlText.indexOf("megasoccer.") != -1
                    || urlText.indexOf("lycos.") != -1
                    || urlText.indexOf("hotbot.") != -1
                    || urlText.indexOf("yahoo.") != -1
                    || urlText.indexOf("dmoz.") != -1
                    || urlText.indexOf("doubleclick.") != -1) {
                robotLink.setAttribute("SearchEngineURL", "Yes");
            }
        }
    }


    /**
     *
     *
     */
    private class LookOfLinkFilter implements LinkFilter {

        /**
         * Search Engine Links sollen nicht verfolgt werden-
         *
         */
        public boolean filter(RobotLink robotLink, RobotHtmlNode robotHtmlNode, QueueEntry qe) {
            if (robotLink.getAttribute("SearchEngineURL") == "Yes") {
                return false;
            }
            return true;
        }
    }

}

