/*
 * Decompiled with CFR 0.152.
 */
package scone.util;

import adc.parser.HtmlStreamTokenizer;
import java.util.Date;
import java.util.StringTokenizer;
import scone.netobjects.HtmlNode;
import scone.netobjects.HtmlNodeCache;
import scone.netobjects.Inclusion;
import scone.netobjects.InclusionCache;
import scone.netobjects.Keyword;
import scone.netobjects.KeywordCache;
import scone.netobjects.Link;
import scone.netobjects.LinkCache;
import scone.netobjects.LinkToken;
import scone.netobjects.NetNode;
import scone.netobjects.NetNodeCache;
import scone.netobjects.Node2KeyCache;
import scone.netobjects.Server;
import scone.netobjects.ServerCache;
import scone.netobjects.SimpleUri;
import scone.util.ErrorLog;
import scone.util.LanguageAnalyzer;
import scone.util.tokenstream.HtmlTagToken;
import scone.util.tokenstream.HtmlTextToken;
import scone.util.tokenstream.Token;
import scone.util.tokenstream.TokenInputStream;
import scone.util.tokenstream.TokenOutputStream;

public class DocumentParser {
    public static final String COPYRIGHT = "(C) Harald Weinreich & Volkert Buchmann";
    public static final int CONSIDERLINKS = 0x100008;
    public static final int CONSIDERINCLUSIONS = 0x100010;
    public static final int PARSEDOCUMENT = 0x100001;
    public static final int CONSIDERKEYWORDS = 0x100041;
    public static final int SAVEBODYTEXT = 0x100081;
    public static final int SAVESOURCECODE = 0x100101;
    public static final int CALCFINGERPRINT = 0x100201;
    public static final int POSTDATA = 0x100020;
    static boolean reqLinks;
    static boolean reqInclusions;
    static boolean reqParse;
    static boolean reqKeywords;
    static boolean reqBodyText;
    static boolean reqSourceCode;
    static boolean reqFingerPrint;
    static boolean reqPostData;
    public static final int MAX_BODYTEXT = 1000000;
    public static final int MAX_SOURCECODE = 1000000;
    private boolean hasFrames = false;
    private boolean hasForms = false;
    private boolean hasJavascript = false;
    private boolean hasPlugins = false;
    private boolean hasAnimation = false;
    private boolean hasSound = false;

    public DocumentParser(int requirements) {
        this(requirements, true);
    }

    public DocumentParser(int requirements, boolean showRequirements) {
        reqLinks = (requirements & 0x100008) == 0x100008;
        reqInclusions = (requirements & 0x100010) == 0x100010;
        reqParse = (requirements & 0x100001) == 0x100001;
        reqKeywords = (requirements & 0x100041) == 0x100041;
        reqBodyText = (requirements & 0x100081) == 0x100081;
        reqSourceCode = (requirements & 0x100101) == 0x100101;
        reqFingerPrint = (requirements & 0x100201) == 0x100201;
        boolean bl = reqPostData = (requirements & 0x100020) == 0x100020;
        if (showRequirements) {
            System.out.print("-> Links      : ");
            System.out.println(reqLinks);
            System.out.print("-> Inclusions : ");
            System.out.println(reqInclusions);
            System.out.print("-> Parse      : ");
            System.out.println(reqParse);
            System.out.print("-> Keywords   : ");
            System.out.println(reqKeywords);
            System.out.print("-> BodyText   : ");
            System.out.println(reqBodyText);
            System.out.print("-> SourceCode : ");
            System.out.println(reqSourceCode);
            System.out.print("-> FingerPrint: ");
            System.out.println(reqFingerPrint);
            System.out.print("-> Post Data  : ");
            System.out.println(reqPostData);
        }
    }

    public void parse(TokenInputStream in, TokenOutputStream out) {
        try {
            NetNode baseNode = null;
            HtmlNode document = null;
            Link link = null;
            StringBuffer bodytext = new StringBuffer();
            StringBuffer sourcecode = new StringBuffer();
            StringBuffer title = new StringBuffer();
            StringBuffer author = new StringBuffer();
            StringBuffer keywords = new StringBuffer();
            StringBuffer description = new StringBuffer();
            StringBuffer abstractText = new StringBuffer();
            StringBuffer linkText = new StringBuffer();
            StringBuffer linkAltText = new StringBuffer();
            StringBuffer emailText = new StringBuffer();
            boolean readTitle = false;
            boolean isLink = false;
            boolean isEmail = false;
            boolean foundAbstractBody = false;
            int abstractWordCount = 0;
            int linkCount = 0;
            int extLinkCount = 0;
            int images = 0;
            long space = 0L;
            int words = 0;
            int paragraphs = 0;
            int emailCount = 0;
            String now = String.valueOf(new Date().getTime());
            this.hasFrames = false;
            this.hasForms = false;
            this.hasJavascript = false;
            this.hasPlugins = false;
            this.hasAnimation = false;
            this.hasSound = false;
            LanguageAnalyzer ca = new LanguageAnalyzer();
            baseNode = (NetNode)in.getMetaInfo().get("baseNode");
            if (reqParse) {
                document = HtmlNodeCache.get(baseNode);
                in.getMetaInfo().put("htmlDocument", document);
            }
            if (reqPostData) {
                System.out.println(in.getMetaInfo().get("postData"));
            }
            Token t = null;
            HtmlTagToken tag = null;
            boolean written = false;
            while ((t = in.read()) != null) {
                written = false;
                try {
                    block100: {
                        block111: {
                            block110: {
                                block109: {
                                    block107: {
                                        block108: {
                                            block105: {
                                                block106: {
                                                    block103: {
                                                        block104: {
                                                            block102: {
                                                                block101: {
                                                                    if (reqSourceCode || reqFingerPrint) {
                                                                        sourcecode.append(t.toString());
                                                                    }
                                                                    if (!(t instanceof HtmlTagToken)) break block100;
                                                                    tag = (HtmlTagToken)t;
                                                                    if (reqInclusions) {
                                                                        this.findInclusion(tag, baseNode, reqInclusions);
                                                                    }
                                                                    if (tag.getTagType() == 51 && tag.getParam("type") != null && !tag.getParam("type").equalsIgnoreCase("hidden")) {
                                                                        this.hasForms = true;
                                                                    }
                                                                    if (tag.getTagType() == 1) break block101;
                                                                    if (tag.getTagType() != 6) break block102;
                                                                }
                                                                if (tag.hasParam("href")) {
                                                                    if (tag.getParam("href").indexOf("#") != 0) {
                                                                        ++linkCount;
                                                                    }
                                                                    if (reqParse) {
                                                                        if (tag.getTagType() == 1 && tag.getParam("href").indexOf("mailto:") >= 0) {
                                                                            ++emailCount;
                                                                            isEmail = true;
                                                                        }
                                                                    }
                                                                    if (reqLinks) {
                                                                        try {
                                                                            SimpleUri uri = new SimpleUri(baseNode.getSUri(), tag.getParam("href"));
                                                                            NetNode to = NetNodeCache.get(uri);
                                                                            link = LinkCache.get(baseNode, to, uri.getFragment(), tag);
                                                                            LinkToken lt = new LinkToken(tag, link);
                                                                            linkText = new StringBuffer();
                                                                            linkAltText = new StringBuffer();
                                                                            if (!to.toHostString().equals(baseNode.toHostString())) {
                                                                                ++extLinkCount;
                                                                            }
                                                                            if (tag.getTagType() == 6) {
                                                                                if (lt.hasParam("alt") && lt.getParam("alt").length() > 0) {
                                                                                    linkAltText = new StringBuffer(lt.getParam("alt"));
                                                                                }
                                                                                link.setAlt(HtmlStreamTokenizer.unescape((String)linkAltText.toString()));
                                                                                link.store();
                                                                            } else {
                                                                                isLink = true;
                                                                            }
                                                                            out.write(lt);
                                                                            written = true;
                                                                        }
                                                                        catch (Exception exc_rlt) {
                                                                            ErrorLog.log(this, "handleRequest", "error in reqlinks", exc_rlt);
                                                                        }
                                                                    }
                                                                }
                                                            }
                                                            if (isLink) {
                                                                if (tag.getTagType() == 1 && tag.isEndTag() && reqLinks) {
                                                                    if (reqParse && !foundAbstractBody && abstractWordCount <= 6 && linkText.length() < 25) {
                                                                        abstractWordCount = 0;
                                                                        abstractText.setLength(0);
                                                                    }
                                                                    if (linkAltText != null && linkAltText.length() != 0 && link.getAlt().length() < linkAltText.length()) {
                                                                        link.setAlt(HtmlStreamTokenizer.unescape((String)linkAltText.toString()));
                                                                        linkAltText = null;
                                                                    }
                                                                    if (linkText != null && linkText.length() != 0 && link.getLinkText().length() < linkText.length()) {
                                                                        link.setLinkText(HtmlStreamTokenizer.unescape((String)linkText.toString()));
                                                                        linkText = null;
                                                                    }
                                                                    link.store();
                                                                    isLink = false;
                                                                }
                                                            }
                                                            if (isEmail) {
                                                                if (tag.getTagType() == 1 && tag.isEndTag()) {
                                                                    isEmail = false;
                                                                }
                                                            }
                                                            if (!reqParse) break block100;
                                                            if (tag.getTagType() == 73 && !tag.isEndTag()) {
                                                                ++paragraphs;
                                                            }
                                                            if (tag.getTagType() == 62 && tag.hasParam("name") && tag.hasParam("content")) {
                                                                String name = tag.getParam("name").toLowerCase();
                                                                String content = tag.getParam("content");
                                                                if (name.equals("author")) {
                                                                    author.append(HtmlStreamTokenizer.unescape((String)content));
                                                                } else if (name.equals("keywords")) {
                                                                    keywords.append(HtmlStreamTokenizer.unescape((String)content));
                                                                } else if (name.equals("description") && content.length() > 1) {
                                                                    if (description.length() > 1) {
                                                                        description.append("\n");
                                                                    }
                                                                    description.append(HtmlStreamTokenizer.unescape((String)content).replace('\n', ' ').replace('\r', ' '));
                                                                }
                                                            }
                                                            if (tag.getTagType() == 97 && !tag.isEndTag()) {
                                                                readTitle = true;
                                                            }
                                                            if (readTitle) {
                                                                if (tag.getTagType() == 97 && tag.isEndTag()) {
                                                                    readTitle = false;
                                                                    document.setTitle(HtmlStreamTokenizer.unescape((String)new String(title)));
                                                                }
                                                            }
                                                            if (title.length() != 0) break block103;
                                                            if (tag.getTagType() == 40) break block104;
                                                            if (tag.getTagType() == 41) break block104;
                                                            if (tag.getTagType() != 42) break block103;
                                                        }
                                                        if (!tag.isEndTag()) {
                                                            readTitle = true;
                                                        }
                                                    }
                                                    if (!readTitle) break block105;
                                                    if (tag.getTagType() == 40) break block106;
                                                    if (tag.getTagType() == 41) break block106;
                                                    if (tag.getTagType() != 42) break block105;
                                                }
                                                if (tag.isEndTag()) {
                                                    readTitle = false;
                                                    document.setTitle(new String(title));
                                                }
                                            }
                                            if (foundAbstractBody || words >= 30 || tag.isEndTag()) break block107;
                                            if (tag.getTagType() == 40) break block108;
                                            if (tag.getTagType() == 41) break block108;
                                            if (tag.getTagType() == 42) break block108;
                                            if (tag.getTagType() == 43) break block108;
                                            if (tag.getTagType() != 36 || tag.getParam("size") == null || !tag.getParam("size").equals("+2") && !tag.getParam("size").equals("+3") && !tag.getParam("size").equals("+4") && !tag.getParam("size").equals("6") && !tag.getParam("size").equals("7") && !tag.getParam("size").equals("8")) break block107;
                                        }
                                        abstractText.setLength(0);
                                        abstractWordCount = 0;
                                        foundAbstractBody = true;
                                    }
                                    if (tag.getTagType() == 73) break block109;
                                    if (tag.getTagType() == 92) break block109;
                                    if (tag.getTagType() != 71) break block110;
                                }
                                if (!foundAbstractBody && abstractWordCount <= 3) {
                                    abstractText.setLength(0);
                                    abstractWordCount = 0;
                                }
                            }
                            if (tag.getTagType() == 50 && !tag.isEndTag()) {
                                ++images;
                                int width = 0;
                                int height = 0;
                                if (space != -1L) {
                                    if (tag.hasParam("width") && tag.hasParam("height")) {
                                        try {
                                            width = Integer.parseInt(tag.getParam("width"));
                                            height = Integer.parseInt(tag.getParam("height"));
                                            space += (long)(width * height);
                                        }
                                        catch (NumberFormatException nfe) {
                                            space = -1L;
                                        }
                                    } else {
                                        space = -1L;
                                    }
                                }
                                if (isLink && tag.hasParam("alt")) {
                                    linkAltText = new StringBuffer(tag.getParam("alt"));
                                }
                            }
                            if (tag.getTagType() == 73) break block111;
                            if (tag.getTagType() == 17) break block111;
                            if (tag.getTagType() == 57) break block111;
                            if (tag.getTagType() == 25) break block111;
                            if (tag.getTagType() == 40 && tag.isEndTag()) break block111;
                            if (tag.getTagType() == 41 && tag.isEndTag()) break block111;
                            if (tag.getTagType() == 42 && tag.isEndTag()) break block111;
                            if (tag.getTagType() != 92) break block100;
                        }
                        if (reqBodyText) {
                            bodytext.append('\n');
                        }
                        if (abstractWordCount > 0 && abstractText.length() > 5 && !abstractText.substring(abstractText.length() - 1, abstractText.length()).equals("\n")) {
                            abstractText.append('\n');
                        }
                    }
                    if (t instanceof HtmlTextToken) {
                        HtmlTextToken tt = (HtmlTextToken)t;
                        if (reqParse || reqBodyText) {
                            if (readTitle) {
                                title.append(tt.getUnescapedText());
                                title.append(' ');
                            } else {
                                ++words;
                                if (reqBodyText) {
                                    bodytext.append(tt.getUnescapedText());
                                    bodytext.append(' ');
                                }
                                if (abstractWordCount < 25 && abstractText.length() + tt.getUnescapedText().length() < 255) {
                                    if (tt.getUnescapedText().length() > 2 || tt.getUnescapedText().charAt(0) > ' ' && tt.getUnescapedText().charAt(0) < '\u0080') {
                                        abstractText.append(tt.getUnescapedText());
                                        abstractText.append(' ');
                                        ++abstractWordCount;
                                    }
                                } else if (abstractWordCount == 25 && abstractText.length() < 252 && !abstractText.substring(abstractText.length() - 1, abstractText.length()).equals(".")) {
                                    ++abstractWordCount;
                                    abstractText.append("...");
                                }
                            }
                            ca.countWord(tt.getText());
                        }
                        if (isLink) {
                            linkText.append(tt.getUnescapedText());
                            linkText.append(' ');
                        }
                        if (isEmail && emailCount == 1) {
                            emailText.append(tt.getUnescapedText());
                            emailText.append(' ');
                        }
                    }
                    if (written) continue;
                    out.write(t);
                }
                catch (Exception exc_wt) {
                    ErrorLog.log(this, "handleRequest", "error in while loop", exc_wt);
                }
            }
            if (reqParse) {
                if (author.length() > 0) {
                    document.setAuthor(new String(author));
                } else if (author.length() > 7 && author.length() < 40 && author.toString().indexOf("omment") < 0 && author.toString().indexOf("feeback") < 0 && author.toString().indexOf("ontact") < 0 && emailCount <= 2) {
                    document.setAuthor(new String(emailText));
                }
                if (bodytext.length() > 1000000) {
                    bodytext.setLength(1000000);
                }
                if (reqBodyText) {
                    document.setBodyText(new String(bodytext));
                }
                if (reqFingerPrint) {
                    document.setFingerprint(SimpleUri.getHexHashCode(new String(sourcecode)));
                }
                if (sourcecode.length() > 1000000) {
                    sourcecode.setLength(1000000);
                }
                if (reqSourceCode) {
                    document.setSourceCode(new String(sourcecode));
                }
                if (description.length() > 255) {
                    description.setLength(255);
                }
                document.setDescription(new String(description));
                if (abstractText.length() > 255) {
                    abstractText.setLength(255);
                }
                document.setAbstract(new String(abstractText));
                document.setLanguage(ca.getLanguage());
                document.setNumberOfLinks(String.valueOf(linkCount));
                document.setNumberOfExternalLinks(String.valueOf(extLinkCount));
                document.setNumberOfImages(String.valueOf(images));
                document.setSpaceOfImages(String.valueOf(space));
                document.setNumberOfWords(String.valueOf(words));
                document.setNumberOfParagraphs(String.valueOf(paragraphs));
                document.setNumberOfParagraphs(String.valueOf(paragraphs));
                document.setFrames(this.hasFrames);
                document.setForms(this.hasForms);
                document.setJavaScript(this.hasJavascript);
                document.setPlugins(this.hasPlugins);
                document.setAnimation(this.hasAnimation);
                document.setSound(this.hasSound);
                if (reqKeywords) {
                    Keyword kw = null;
                    String k = "";
                    StringTokenizer st = new StringTokenizer(keywords.toString(), " \t\n\r\f,;.:");
                    while (st.hasMoreTokens()) {
                        k = st.nextToken();
                        if (k.length() <= 0) continue;
                        kw = KeywordCache.get(k);
                        Node2KeyCache.associate(baseNode, kw);
                    }
                }
                document.store();
            }
            baseNode.store();
            if (baseNode.getPath().equals("/") && baseNode.getFile().equals("")) {
                Server s = ServerCache.get(baseNode.getHost());
                s.setTitle(new String(title));
                s.setDescription(new String(description));
                s.access();
            }
        }
        catch (Exception exc) {
            ErrorLog.log(this, "handleRequest", "Fatal Error!", exc);
        }
    }

    private void findInclusion(HtmlTagToken tag, NetNode baseNode, boolean reqInclusions) {
        try {
            int type = tag.getTagType();
            switch (type) {
                case 50: {
                    if (reqInclusions) {
                        this.inclusion(baseNode, tag, "src", "alt");
                    }
                    if (reqInclusions) {
                        this.inclusion(baseNode, tag, "lowsrc");
                    }
                    if (!reqInclusions) break;
                    this.inclusion(baseNode, tag, "dynsrc");
                    break;
                }
                case 33: {
                    if (reqInclusions) {
                        this.inclusion(baseNode, tag, "src");
                    }
                    this.hasPlugins = true;
                    if (tag == null || !tag.hasParam("src")) break;
                    String param = tag.getParam("src");
                    if (param.indexOf(".swf") > 0) {
                        this.hasAnimation = true;
                        break;
                    }
                    if (param.indexOf(".mov") > 0 || param.indexOf(".avi") > 0) {
                        this.hasAnimation = true;
                        break;
                    }
                    if (param.indexOf(".mid") > 0 || param.indexOf(".au") > 0 || param.indexOf(".wav") > 0) {
                        this.hasSound = true;
                    }
                    break;
                }
                case 12: {
                    if (reqInclusions) {
                        this.inclusion(baseNode, tag, "src");
                    }
                    this.hasSound = true;
                    break;
                }
                case 5: {
                    if (tag.hasParam("codebase")) break;
                    if (reqInclusions) {
                        this.inclusion(baseNode, tag, "src", "name");
                    }
                    if (!reqInclusions) break;
                    this.inclusion(baseNode, tag, "archive");
                    break;
                }
                case 69: {
                    if (reqInclusions) {
                        this.inclusion(baseNode, tag, "data", "type");
                    }
                    this.hasPlugins = true;
                    break;
                }
                case 38: {
                    if (reqInclusions) {
                        this.inclusion(baseNode, tag, "src", "name");
                    }
                    this.hasFrames = true;
                    break;
                }
                case 81: {
                    if (reqInclusions) {
                        this.inclusion(baseNode, tag, "src", "language");
                    }
                    this.hasJavascript = true;
                    break;
                }
                case 58: {
                    if (!reqInclusions || !tag.hasParam("rel")) break;
                    if (tag.getParam("rel").equalsIgnoreCase("stylesheet")) {
                        this.inclusion(baseNode, tag, "href");
                        break;
                    }
                    if (!tag.getParam("rel").equalsIgnoreCase("fontdef")) break;
                    this.inclusion(baseNode, tag, "src");
                }
            }
        }
        catch (Exception exc) {
            ErrorLog.log(this, "findInclusion", "error in finding an inclusion", exc);
        }
    }

    private void inclusion(NetNode baseNode, HtmlTagToken tag, String parameter, String infoParameter) {
        if (tag.hasParam(parameter)) {
            NetNode node = NetNodeCache.get(baseNode, tag.getParam(parameter));
            Inclusion inc = InclusionCache.get(baseNode, node);
            inc.setTag(tag.getTagString().toUpperCase());
            if (infoParameter != null && tag.hasParam(infoParameter)) {
                inc.setInfo(tag.getParam(infoParameter));
            }
        }
    }

    private void inclusion(NetNode baseNode, HtmlTagToken tag, String parameter) {
        this.inclusion(baseNode, tag, parameter, null);
    }
}

