/*
 * Scone - The Web Enhancement Framework
 * Copyright (C) 2009 Harald Weinreich, Volkert Buchmann, Frank Wollenweber, Torsten Ha
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
package  scone.util;


import java.util.Date;
import java.util.StringTokenizer;

import scone.netobjects.HtmlNode;
import scone.netobjects.HtmlNodeCache;
import scone.netobjects.Inclusion;
import scone.netobjects.InclusionCache;
import scone.netobjects.Keyword;
import scone.netobjects.KeywordCache;
import scone.netobjects.Link;
import scone.netobjects.LinkCache;
import scone.netobjects.LinkToken;
import scone.netobjects.NetNode;
import scone.netobjects.NetNodeCache;
import scone.netobjects.Node2KeyCache;
import scone.netobjects.Server;
import scone.netobjects.ServerCache;
import scone.netobjects.SimpleUri;
import scone.util.tokenstream.HtmlTagToken;
import scone.util.tokenstream.HtmlTextToken;
import scone.util.tokenstream.Token;
import scone.util.tokenstream.TokenInputStream;
import scone.util.tokenstream.TokenOutputStream;
import adc.parser.HtmlStreamTokenizer;


/**
 * transforms tokens into database objects.
 * <br>
 * HtmlTokens which represent links are transformed into <code>LinkToken</code> objects.
 * The following keys and values are added to the meta data:<br>
 * <table>
 *  <tr><td>"baseNode"</td><td>the NetNode</td></tr>
 *  <tr><td>"htmlDocument"</td><td>the HtmlNode</td></tr>
 * </table>
 *
 * @author Harald Weinreich
 * @author Volkert Buchmann
 */
public class DocumentParser {
    public static final String COPYRIGHT = "(C) Harald Weinreich & Volkert Buchmann";
    public static final int CONSIDERLINKS = scone.Plugin.CONSIDERLINKS;
    public static final int CONSIDERINCLUSIONS = scone.Plugin.CONSIDERINCLUSIONS;
    public static final int PARSEDOCUMENT = scone.Plugin.PARSEDOCUMENT;
    public static final int CONSIDERKEYWORDS = scone.Plugin.CONSIDERKEYWORDS;
    public static final int SAVEBODYTEXT = scone.Plugin.SAVEBODYTEXT;
    public static final int SAVESOURCECODE = scone.Plugin.SAVESOURCECODE;
    public static final int CALCFINGERPRINT = scone.Plugin.CALCFINGERPRINT;
    public static final int POSTDATA = scone.Plugin.POSTDATA;
    // store the requirements locally
    static boolean reqLinks;
    static boolean reqInclusions;
    static boolean reqParse;
    static boolean reqKeywords;
    static boolean reqBodyText;
    static boolean reqSourceCode;
    static boolean reqFingerPrint;
    static boolean reqPostData;
    // Maximum amount of data per object to be stored in DB
    public static final int MAX_BODYTEXT = 1000000;                // Save max 1MB body text.
    public static final int MAX_SOURCECODE = 1000000;              // Save max 1MB of html source code.
    // Very big numbers can cause problems!
    // You may have to set 'set-variable = max_allowed_packet=16M' in
    // [mysqld]-Section of the "mysql.ini"-File
    private boolean hasFrames = false;
    private boolean hasForms = false;
    // private boolean hasCommercial=false;
    private boolean hasJavascript = false;
    private boolean hasPlugins = false;
    private boolean hasAnimation = false;
    private boolean hasSound = false;

    /**
     * create the initial instance
     */
    public DocumentParser(int requirements) {
        this(requirements, true);
    }

    /**
     * create the initial instance
     * @param requirement is an bitarray. See scone.Plugin for more information.
     * @param showRequirements shall the requirements be displayed?
     */
    public DocumentParser(int requirements, boolean showRequirements) {
        reqLinks = ((requirements & CONSIDERLINKS) == CONSIDERLINKS);
        reqInclusions = ((requirements & CONSIDERINCLUSIONS)
                == CONSIDERINCLUSIONS);
        reqParse = ((requirements & PARSEDOCUMENT) == PARSEDOCUMENT);
        reqKeywords = ((requirements & CONSIDERKEYWORDS) == CONSIDERKEYWORDS);
        reqBodyText = ((requirements & SAVEBODYTEXT) == SAVEBODYTEXT);
        reqSourceCode = ((requirements & SAVESOURCECODE) == SAVESOURCECODE);
        reqFingerPrint = ((requirements & CALCFINGERPRINT) == CALCFINGERPRINT);
        reqPostData = ((requirements & POSTDATA) == POSTDATA);
        // Output of requirements...
        if (showRequirements) {
            System.out.print("-> Links      : ");
            System.out.println(reqLinks);
            System.out.print("-> Inclusions : ");
            System.out.println(reqInclusions);
            System.out.print("-> Parse      : ");
            System.out.println(reqParse);
            System.out.print("-> Keywords   : ");
            System.out.println(reqKeywords);
            System.out.print("-> BodyText   : ");
            System.out.println(reqBodyText);
            System.out.print("-> SourceCode : ");
            System.out.println(reqSourceCode);
            System.out.print("-> FingerPrint: ");
            System.out.println(reqFingerPrint);
            System.out.print("-> Post Data  : ");
            System.out.println(reqPostData);
        }
    }

    /**
     * Parse document and collect data for NetNode and HtmlNode objects: number of links,
     * number of images, language etc.
     */
    public void parse(TokenInputStream in, TokenOutputStream out) {
        try {
            NetNode baseNode = null;
            HtmlNode document = null;
            Link link = null;
            // helper variables
            StringBuffer bodytext     = new StringBuffer();
            StringBuffer sourcecode   = new StringBuffer();
            StringBuffer title        = new StringBuffer();
            StringBuffer author       = new StringBuffer();
            StringBuffer keywords     = new StringBuffer();
            StringBuffer description  = new StringBuffer();
            StringBuffer abstractText = new StringBuffer();
            StringBuffer linkText     = new StringBuffer();
            StringBuffer linkAltText  = new StringBuffer();
            StringBuffer emailText    = new StringBuffer();
            boolean readTitle = false;
            boolean isLink    = false;
            boolean isEmail   = false;
            boolean foundAbstractBody = false;
            int abstractWordCount = 0;
            int linkCount = 0;                  // Count Links
            int extLinkCount = 0;               // Count external Links
            int images = 0;                     // Count images
            long space = 0L;                    // Calc space of images
            int words = 0;      // Count words
            int paragraphs = 0;                 // Count paragraphs on page
            int emailCount = 0;                 // Count E-Mail-Addresses on page: Only one mail-> Propably author.
            Date lastModified;
            String now = String.valueOf(new Date().getTime());

            // Reset Attributes, also used in findInclusions.
            hasFrames = false;
            hasForms = false;
            // hasCommercial=false;
            hasJavascript = false;
            hasPlugins = false;
            hasAnimation = false;
            hasSound = false;
            LanguageAnalyzer ca = new LanguageAnalyzer();

            baseNode = (NetNode) in.getMetaInfo().get("baseNode");
            // create meta-data
            if (reqParse) {
                document = HtmlNodeCache.get(baseNode);
                in.getMetaInfo().put("htmlDocument", document);
            }

            if (reqPostData)  // Do something with this...?!
                System.out.println(in.getMetaInfo().get("postData"));

            Token t = null;
            HtmlTagToken tag = null;
            boolean written = false;

            // System.out.print("About to read!");

            while ((t = in.read()) != null) {
                written = false;
                try { // "while-try"
                    // System.out.print(t.toString());
                    if (reqSourceCode || reqFingerPrint) {
                        sourcecode.append(t.toString());
                    }            // Save sourcecode of page.

                    if (t instanceof HtmlTagToken) {
                        tag = (HtmlTagToken) t;
                        // find inclusions if required
                        if (reqInclusions) {
                            findInclusion(tag, baseNode, reqInclusions);
                        }

                        // handle forms -------------
                        if (tag.getTagType() == tag.T_INPUT
                                && tag.getParam("type") != null
                                && !tag.getParam("type").equalsIgnoreCase("hidden")) {
                            // This is the only ones of interest for HyperScout! || tag.getTagType() == tag.T_SELECT)
                            // tag.getTagType() == tag.T_FORM) // Not good: Also for hidden fields etc....
                            hasForms = true;
                        }

                        // handle links: Link start -------------
                        if ((tag.getTagType() == tag.T_A
                                        || tag.getTagType() == tag.T_AREA)
                                && tag.hasParam("href")) {
                            // we found a link, so count it! Only Requirement needed is parseDocument, as important data for robot!
                            if (tag.getParam("href").indexOf("#") != 0) {  // Count only non-local non-fragment-links
                                linkCount++;
                            }
                            // Create an Author from found E-Mail-Addresses, if not existent already...
                            if (reqParse && tag.getTagType() == tag.T_A
                                    && tag.getParam("href").indexOf("mailto:")
                                            >= 0) {
                                emailCount++;
                                isEmail = true;
                            }
                            // the rest is only neccessary if links are required
                            if (reqLinks) {
                                try { // "req-links-try"
                                    // Create objects...
                                    SimpleUri uri = new SimpleUri(baseNode.getSUri(), tag.getParam("href"));
                                    NetNode to = NetNodeCache.get(uri);

                                    link = LinkCache.get(baseNode, to, uri.getFragment(), tag);
                                    LinkToken lt = new LinkToken(tag, link);

                                    // objects to gather LinkText and Image alt parameter...
                                    linkText = new StringBuffer();
                                    linkAltText = new StringBuffer();
                                    // Is it an external link? Only considered, if reqLinks!
                                    if (!to.toHostString().equals(baseNode.toHostString())) {
                                        extLinkCount++;                 // external link!
                                    }
                                    if (tag.getTagType() == tag.T_AREA) {
                                        // sometimes there is an alt text in Imagemaps "AREA"-Tags (used by Netscape)
                                        if (lt.hasParam("alt")
                                                && lt.getParam("alt").length()
                                                        > 0) {
                                            linkAltText = new StringBuffer(lt.getParam("alt"));
                                        }
                                        link.setAlt(HtmlStreamTokenizer.unescape(linkAltText.toString()));
                                        link.store();           // Save Link to Database now, as there are ususally no closing tags.
                                    } else {
                                        // Am A-Link stated, so set flag -> Text read and closing tag is considered.
                                        isLink = true;
                                    }
                                    out.write(lt);
                                    written = true;
                                } catch (Exception exc_rlt) { // "req-links-try"
                                    ErrorLog.log(this, "handleRequest", "error in reqlinks", exc_rlt);
                                }

                            }       // if(reqLinks)
                        }           // if(openingLinkTag)

                        // handle links: link end -------------
                        if (isLink && (tag.getTagType() == tag.T_A) &&   // AREA-Tags have no closing tag...
                                tag.isEndTag() && reqLinks) {
                            // Find useful abstract: remove text in preceeding navigation Links (only few words long...)
                            if (reqParse && !foundAbstractBody
                                    &&                   // Abstract start in Body found.
                                            abstractWordCount <= 6
                                    &&               // Only few words already in abstract and...
                                            linkText.length() < 25)                 // The link is quite short: Navigation Link
                            {
                                abstractWordCount = 0;              // Delete abstract so far and...
                                abstractText.setLength(0);          // try again.
                            }       // create useful abstract text
                            // set image ALT text...
                            if (linkAltText != null && linkAltText.length() != 0
                                    && link.getAlt().length()
                                            < linkAltText.length()) {
                                link.setAlt(HtmlStreamTokenizer.unescape(linkAltText.toString()));
                                linkAltText = null;
                            }
                            // set link text...
                            if (linkText != null && linkText.length() != 0
                                    && link.getLinkText().length()
                                            < linkText.length()) {
                                link.setLinkText(HtmlStreamTokenizer.unescape(linkText.toString()));
                                // System.out.println(linkText.toString());
                                linkText = null;
                            }
                            link.store();           // Save Link to Database...
                            // The link is over. Do not delete link object, as it is still needed in cache!
                            isLink = false;
                        }           // handle link end
                        if (isEmail && tag.getTagType() == tag.T_A
                                && tag.isEndTag()) {
                            isEmail = false;        // The email link is over...
                        }
                        if (reqParse) {
                            // handle Paragraphs -------------
                            if (tag.getTagType() == tag.T_P && !tag.isEndTag()) {
                                paragraphs++;       // count paragraphs
                            }       // handle paragraphs
                            // handle meta-tags -------------
                            if (tag.getTagType() == tag.T_META
                                    && tag.hasParam("name")
                                    && tag.hasParam("content")) {
                                String name = tag.getParam("name").toLowerCase();
                                String content = tag.getParam("content");

                                if (name.equals("author")) {
                                    author.append(HtmlStreamTokenizer.unescape(content));
                                } else if (name.equals("keywords")) {
                                    keywords.append(HtmlStreamTokenizer.unescape(content));
                                } else if (name.equals("description") && content.length() > 1) {
                                    if (description.length()>1)
                                        description.append("\n");
                                    description.append(HtmlStreamTokenizer.unescape(content).replace('\n',' ').replace('\r',' '));
                                }      // Save description
                            }       // handle metatags
                            // handle title -------------
                            if (tag.getTagType() == tag.T_TITLE
                                    && !tag.isEndTag()) {
                                readTitle = true;
                            }
                            if (readTitle && tag.getTagType() == tag.T_TITLE
                                    && tag.isEndTag()) {
                                readTitle = false;
                                document.setTitle(HtmlStreamTokenizer.unescape(new String(title)));               // Set title as early as possible for access events!!
                                // System.out.println(title);
                            }       // handle title
                            // handle title, 2nd try: <TITLE>-Tag was not found or empty: Create title from <H1> to <H3>
                            if (title.length() == 0
                                    && (tag.getTagType() == tag.T_H1
                                            || tag.getTagType() == tag.T_H2
                                            || tag.getTagType() == tag.T_H3)
                                    && !tag.isEndTag()) {
                                readTitle = true;
                            }
                            if (readTitle
                                    && (tag.getTagType() == tag.T_H1
                                            || tag.getTagType() == tag.T_H2
                                            || tag.getTagType() == tag.T_H3)
                                    && tag.isEndTag()) {
                                readTitle = false;
                                document.setTitle(new String(title));
                            }       // handle title
                            // Create useful Abstract: Remove preceeding navigation link texts.
                            if (!foundAbstractBody &&               // No Header found yet: Abstract has not startet
                                    words < 30 &&           // Less than 30 words yet: Otherwise the Navigation text is quite unlikely long...
                                    !tag.isEndTag()
                                    && (tag.getTagType() == tag.T_H1
                                            || tag.getTagType() == tag.T_H2
                                            || tag.getTagType() == tag.T_H3
                                            || tag.getTagType() == tag.T_H4
                                            || tag.getTagType() == tag.T_FONT
                                                    && tag.getParam("size")
                                                            != null
                                                    && (tag.getParam("size").equals("+2")
                                                            || tag.getParam("size").equals("+3")
                                                            || tag.getParam("size").equals("+4")
                                                            || tag.getParam("size").equals("6")
                                                            || tag.getParam("size").equals("7")
                                                            || tag.getParam("size").equals("8")))) {
                                abstractText.setLength(0);
                                abstractWordCount = 0;
                                foundAbstractBody = true;           // Now the usefull text of the body has quite likely started...
                            }       // create useful body text
                            // Abstract removed as quite useless if paragraph is much too short.
                            if ((tag.getTagType() == tag.T_P
                                            || tag.getTagType() == tag.T_TD
                                            || tag.getTagType() == tag.T_OPTION)
                                    && !foundAbstractBody
                                    && abstractWordCount <= 3) {
                                abstractText.setLength(0);
                                abstractWordCount = 0;
                            }

                            // handle IMaGes -------------
                            if (tag.getTagType() == tag.T_IMG && !tag.isEndTag()) {
                                images++;           // count images
                                // Calculate Space
                                int width = 0;
                                int height = 0;

                                if (space != -1)                    // Space can not be calculated.
                                {
                                    if (tag.hasParam("width")
                                            && tag.hasParam("height")) {
                                        try {
                                            width = Integer.parseInt(tag.getParam("width"));
                                            height = Integer.parseInt(tag.getParam("height"));
                                            space += width * height;
                                        } catch (NumberFormatException nfe) {
                                            space = -1;
                                        }
                                    } else {
                                        space = -1;
                                    }
                                }                   // calculate space
                                // Look if it is inside a link and then copy and remove the ALT parameter
                                if (isLink && tag.hasParam("alt")) {
                                    // System.out.println(t.tag.getParam("alt"));
                                    linkAltText = new StringBuffer(tag.getParam("alt"));
                                    // t.tag.removeParam("alt");
                                }                   // hasparam alt
                            }       // handle images
                            // Add CRs...
                            if (tag.getTagType() == tag.T_P
                                  || tag.getTagType() == tag.T_BR
                                  || tag.getTagType() == tag.T_LI
                                  || tag.getTagType() == tag.T_DD
                                  || (tag.getTagType() == tag.T_H1 && tag.isEndTag())
                                  || (tag.getTagType() == tag.T_H2 && tag.isEndTag())
                                  || (tag.getTagType() == tag.T_H3 && tag.isEndTag())
                                  || tag.getTagType() == tag.T_TD ) {
                                  if (reqBodyText) {              // Save body text required?
                                      bodytext.append('\n');  // Insert CRs...
                                  }
                                  if (abstractWordCount > 0 && abstractText.length() > 5 && // foundAbstractBody &&
                                      !abstractText.substring(abstractText.length()-1,abstractText.length()).equals("\n")) { // Abstract also needs CRs for better readablilty...
                                        abstractText.append('\n');  // Insert CR
                                  }
                            }

                        }           // if(reqParse)
                    }               // instanceof HtmlTagToken
                    // handle text
                    if (t instanceof HtmlTextToken) {
                        HtmlTextToken tt = (HtmlTextToken) t;

                        if (reqParse || reqBodyText) {              // Parsing of the document required?
                            // are we in the <title> or in <H1>-<H3>?
                            if (readTitle) {
                                title.append(tt.getUnescapedText());
                                title.append(' ');
                            } else                    // ordinary body text...
                            {
                                words++;            // count the words!
                                if (reqBodyText) {
                                    bodytext.append(tt.getUnescapedText());
                                    bodytext.append(' ');
                                }
                                if (abstractWordCount < 25
                                        &&       // Limit length of abstract (25 words)
                                                (abstractText.length()
                                                        + tt.getUnescapedText().length()
                                                < 255)) {                   // Abstract max 255 chars long.
                                    if (tt.getUnescapedText().length() > 2 ||
                                            (tt.getUnescapedText().charAt(0) > 32 && tt.getUnescapedText().charAt(0) < 128 )) { // HTML-Pages are filled with stupid control characters... :-(
                                        abstractText.append(tt.getUnescapedText());
                                        abstractText.append(' ');                   // Keine Lcken zwischen einzelnen Zeichen...
                                        abstractWordCount++;
                                    }
                                } else if (abstractWordCount == 25 && abstractText.length()<252 && !abstractText.substring(abstractText.length()-1,abstractText.length()).equals(".") ) {
                                    abstractWordCount++;
                                    abstractText.append("...");                   // Add ... at the end...
                                }
                            }
                            ca.countWord(tt.getText());             // find out language!
                        }           // reqparse
                        // Save link text
                        if (isLink)                 // Reading Link text?
                        {
                            linkText.append(tt.getUnescapedText());
                            linkText.append(' ');
                        }
                        // Save Link text in Emails for author information...
                        if (isEmail && emailCount == 1) {           // No author meta information was given...
                            emailText.append(tt.getUnescapedText());
                            emailText.append(' ');
                        }
                    }               // instanceof HtmlTextToken
                    if (!written)                   // Linktokens were written before...
                    {
                        out.write(t);
                    }

                } catch (Exception exc_wt) { // "while-try"
                    ErrorLog.log(this, "handleRequest", "error in while loop", exc_wt);
                }
            }// while

            if (reqParse)       // Was document parsed? Save data.
            {                   // document.setTitle(new String(title));  // Already set above
                if (author.length() > 0) {
                    document.setAuthor(new String(author));
                } else if (author.length() > 7 && author.length() < 40
                        && author.toString().indexOf("omment") < 0
                        && author.toString().indexOf("feeback") < 0
                        && author.toString().indexOf("ontact") < 0
                        && emailCount <= 2) {
                    document.setAuthor(new String(emailText));
                }
                // Save Body text. 50k must be enough...
                if (bodytext.length() > MAX_BODYTEXT) {
                    bodytext.setLength(MAX_BODYTEXT);
                }
                if (reqBodyText) {
                    document.setBodyText(new String(bodytext));
                }
                // System.out.println("\nBody:"+bodytext);
                // Create and store fingerprint
                if (reqFingerPrint) {
                    document.setFingerprint(SimpleUri.getHexHashCode(new String(sourcecode)));
                }
                // Save first 200k Byte of sourcecode. Hope it is enough...
                if (sourcecode.length() > MAX_SOURCECODE) {
                    sourcecode.setLength(MAX_SOURCECODE);
                }
                if (reqSourceCode) {
                    document.setSourceCode(new String(sourcecode));
                }
                if (description.length() > 255) {
                    description.setLength(255);
                }
                document.setDescription(new String(description));
                // System.out.println("Desc:"+description);
                if (abstractText.length() > 255) {
                    abstractText.setLength(255);
                }
                document.setAbstract(new String(abstractText));
                // System.out.println("Abstract:\n"+abstractText);
                document.setLanguage(ca.getLanguage());
                document.setNumberOfLinks(String.valueOf(linkCount));
                document.setNumberOfExternalLinks(String.valueOf(extLinkCount));
                document.setNumberOfImages(String.valueOf(images));
                document.setSpaceOfImages(String.valueOf(space));
                document.setNumberOfWords(String.valueOf(words));
                document.setNumberOfParagraphs(String.valueOf(paragraphs));
                document.setNumberOfParagraphs(String.valueOf(paragraphs));
                document.setFrames(hasFrames);
                document.setForms(hasForms);                    // Page with Forms?
                // document.setCommercial(hasCommercial);  // Ads?
                document.setJavaScript(hasJavascript);  // JavaScript, JScript etc.?
                document.setPlugins(hasPlugins);                // Any Plugins?
                document.setAnimation(hasAnimation);            // Any animation
                document.setSound(hasSound);                    // Any background sound?
                if (reqKeywords) {
                    Keyword kw = null;
                    String k = "";
                    StringTokenizer st = new StringTokenizer(keywords.toString(),
                            " \t\n\r\f,;.:");

                    while (st.hasMoreTokens()) {
                        k = st.nextToken();
                        if (k.length() > 0) {
                            // System.out.println("Keyword: "+kw);
                            kw = KeywordCache.get(k);
                            Node2KeyCache.associate(baseNode, kw);
                        }
                    }
                }               // if(reqKeywords)
                // After finishing this node: store to DB
                document.store();
            }                   // reqParse
            baseNode.store();
            if (baseNode.getPath().equals("/") && baseNode.getFile().equals(""))                // This was the homepage of the Server!
            {
                Server s = ServerCache.get(baseNode.getHost());

                s.setTitle(new String(title));
                s.setDescription(new String(description));
                s.access();
            }
        } catch (Exception exc) {
            ErrorLog.log(this, "handleRequest", "Fatal Error!", exc);
        }
    }

    // builds inclusion objects
    private void findInclusion(HtmlTagToken tag, NetNode baseNode, boolean reqInclusions) {
        try {
            int type = tag.getTagType();

            switch (type) {
            // handle images
            case HtmlTagToken.T_IMG:
                if (reqInclusions) {
                    inclusion(baseNode, tag, "src", "alt");
                }
                if (reqInclusions) {
                    inclusion(baseNode, tag, "lowsrc");
                }
                if (reqInclusions) {
                    inclusion(baseNode, tag, "dynsrc");
                }
                break;

            case HtmlTagToken.T_EMBED:
                if (reqInclusions) {
                    inclusion(baseNode, tag, "src");
                }
                hasPlugins = true;
                if (tag != null && tag.hasParam("src")) {
                    String param = tag.getParam("src");

                    if (param.indexOf(".swf") > 0) {              // Flash
                        hasAnimation = true;
                    } else if (param.indexOf(".mov") > 0
                            || param.indexOf(".avi") > 0) {            // Movie?
                        hasAnimation = true;
                    } else if (param.indexOf(".mid") > 0
                            || param.indexOf(".au") > 0
                            || param.indexOf(".wav") > 0) {       // Midi / Wave?
                        hasSound = true;
                    }
                }
                break;

            case HtmlTagToken.T_BGSOUND:
                if (reqInclusions) {
                    inclusion(baseNode, tag, "src");
                }
                hasSound = true;
                break;

            case HtmlTagToken.T_APPLET:
                if (!tag.hasParam("codebase")) {
                    if (reqInclusions) {
                        inclusion(baseNode, tag, "src", "name");
                    }
                    if (reqInclusions) {
                        inclusion(baseNode, tag, "archive");
                    }
                }
                break;

            case HtmlTagToken.T_OBJECT:
                if (reqInclusions) {
                    inclusion(baseNode, tag, "data", "type");
                }
                hasPlugins = true;
                break;

            case HtmlTagToken.T_FRAME:
                if (reqInclusions) {
                    inclusion(baseNode, tag, "src", "name");
                }
                hasFrames = true;
                break;

            case HtmlTagToken.T_SCRIPT:    // script tags are no more created.
                if (reqInclusions) {
                    inclusion(baseNode, tag, "src", "language");
                }
                hasJavascript = true;
                break;

            case HtmlTagToken.T_LINK:		// now this drives me nuts, the browsers use the link for inclusions!
                if (reqInclusions && tag.hasParam("rel")) {
                    if (tag.getParam("rel").equalsIgnoreCase("stylesheet")) {
                        inclusion(baseNode, tag, "href");
                    } else
                    if (tag.getParam("rel").equalsIgnoreCase("fontdef")) {
                        inclusion(baseNode, tag, "src");
                    }
                }

            }
        } catch (Exception exc) {
            ErrorLog.log(this, "findInclusion", "error in finding an inclusion", exc);
        }
    }

    // creates an inclusion if the parameter in tag exists, save additional info!
    private void inclusion(NetNode baseNode, HtmlTagToken tag, String parameter,
            String infoParameter) {
        if (tag.hasParam(parameter)) {
            NetNode node = NetNodeCache.get(baseNode, tag.getParam(parameter));
            Inclusion inc = InclusionCache.get(baseNode, node);

            inc.setTag(tag.getTagString().toUpperCase());                     // Store tag Type: img, applet etc.
            if (infoParameter != null && tag.hasParam(infoParameter)) {
                inc.setInfo(tag.getParam(infoParameter));
            }
            // System.out.println("Inclusion:"+baseNode+node+tag.getTagString());
        }
    }

    // creates an inclusion if the parameter in tag exists
    private void inclusion(NetNode baseNode, HtmlTagToken tag, String parameter) {
        inclusion(baseNode, tag, parameter, null);
    }
}

