/*
 * Scone - The Web Enhancement Framework
 * Copyright (C) 2009 Harald Weinreich, Volkert Buchmann, Frank Wollenweber, Torsten Ha
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
 package scone.robot;


import scone.netobjects.SimpleUri;


/**
 * This filter is applied to every URI the robot finds. It is one criterion to follow a link.
 *
 * @author Frank Wollenweber
 */


class DefaultFilter {

    private int restriction; // Restriction of the robotTask (internal/subdirectories/external/no restrictions)
    private boolean headOnly; // If the robot only downloads the head of a document, no file-type checks are done
    private SimpleUri startUri; // The startUri of the robotTask
    private Robot robot; // Reference to the robot

    /**
     * Constructor
     *
     * @param restriction Restriction of the robotTask (RobotTask.ALL/RobotTask.EXTERNAL/RobotTask.INTERNAL/RobotTask.SUBDIRECTORIES)
     * @param startUri The startUri of the robotTask
     **/
    DefaultFilter(int restriction, SimpleUri startUri) {
        this.restriction = restriction;
        this.startUri = startUri;
        headOnly = false;
    }

    /**
     * RobotTasks that download only the head of a document require different filter-rules
     * @param b true, if headOnly
     *
     **/
    void setHeadOnly(boolean b) {
        headOnly = b;
    }

    /**
     * Applies the filter-rules to uri
     * @param uri The SimpleUri to filter
     * @return true, if the uri is ok
     **/
    boolean filter(SimpleUri uri) {
        String urlText = uri.getUri();

        urlText = urlText.toLowerCase();
        String datei = uri.getFile();

        datei = datei.toLowerCase();
        if (urlText.indexOf("mailto") != -1) {
            return false;
        }
        if (urlText.indexOf("javascript") != -1) {
            return false;
        }
        robot = Robot.instance();
        if (!(headOnly || robot.isValidFileExtension(datei))) {
            return false;
        }
        if (!(uri.getProtocol().equalsIgnoreCase("http"))) {
            return false;
        }
        if (restriction == RobotTask.INTERNAL) { // follow only internal links
            if (!uri.getHost().equals(startUri.getHost())) {
                return false;
            }
        }
        if (restriction == RobotTask.SUBDIRECTORIES) {
            if (!((uri.getHost().equals(startUri.getHost()))
                    && (uri.getPath().startsWith(startUri.getPath())))) {
                return false;
            }
        }
        if (restriction == RobotTask.EXTERNAL) { // scan start page and all external links
            if (uri.equals(startUri)) {
                return true;
            }
            if (uri.getHost().equals(startUri.getHost())) {
                return false;
            }
        }
        return true;
    }
}
