package evaluator.departureexperiment;


import scone.netobjects.NetNode;
import scone.proxy.HtmlTokenEditor;
import scone.util.ErrorLog;
import scone.util.tokenstream.HtmlTagToken;
import scone.util.tokenstream.SconePipe;
import scone.util.tokenstream.Token;
import scone.util.tokenstream.TokenInputStream;
import scone.util.tokenstream.TokenOutputStream;


/**
 * class HeiseNewsContentExtractor
 * <p>Description: Show only the content of Heise news pages without navigation areas.</p>
 * <p>Copyright: Copyright (C) 2003</p>
 * <p>Company: University of Hamburg, Germany</p>
 * @author Hartmut Obendorf, http://asi-www.informatik.uni-hamburg.de/personen/obendorf/
 * @author Harald Weinreich, http://vsis-www.informatik.uni-hamburg.de/~weinreic/
 * @version 1.0
 */

public class HeiseNewsContentExtractor extends HtmlTokenEditor {

    public void handleRequest(SconePipe pipe) {
        try {
            TokenInputStream in = pipe.getTokenInputStream();
            TokenOutputStream out = pipe.getTokenOutputStream();
         
            Token t = null;
            HtmlTagToken tag = null;
            boolean write = true;          // content found
            boolean indexFilter = false;   // Heise News index page
            boolean contentFilter = false; // Heise News content pages
            int pNumber = 0;               // Counter of P-Tags
         
            NetNode baseNode = (NetNode) in.getMetaInfo().get("baseNode");

            if (baseNode.getPath().equals("/newsticker/")) {
                indexFilter = true;
            }
            if (baseNode.getPath().startsWith("/newsticker/data") || // Bei heise.de
                    baseNode.getPath().startsWith("/heise")
                    || baseNode.getFile().startsWith("dep_")
                    || baseNode.getFile().startsWith("heise"))    // lokal
            { 
                contentFilter = true; 
            }
            while ((t = in.read()) != null) {
            
                if ((indexFilter || contentFilter) && t instanceof HtmlTagToken) {
                    tag = (HtmlTagToken) t;
                    // Insert css-definition before BODY-Tag.
                    if ((indexFilter || contentFilter)
                            && tag.getTagType() == tag.T_BODY) {
                        HtmlTagToken css = new HtmlTagToken("link");    // Create link Tag

                        // insert attributes
                        css.setParam("href", "http://local.scone.de/eval.css");
                        css.setParam("type", "text/css");
                        css.setParam("rel", "stylesheet");
                        out.write(css);       // write tag
                    }
                    // start filtering after body tag...
                    if ((indexFilter || contentFilter)
                            && tag.getTagType() == tag.T_DIV) {
                        write = false;   
                    }
                    // stop filtering when content starts...
                    if (tag.getTagString().equalsIgnoreCase("heisetext")
                            && !tag.isEndTag()) {
                        HtmlTagToken br = new HtmlTagToken("br");    // Create Tag

                        out.write(br);       // write tag
                        // out.write(br);       // write tag
                        write = true;
                    }
                    // restart filtering content pages if </heisetext> appears
                    if (contentFilter
                            && tag.getTagString().equalsIgnoreCase("heisetext")
                            && tag.isEndTag()) {
                        write = false;
                    }
                    // restart filtering content pages after third <P>
                    // if (contentFilter && write==true && tag.getTagString().equalsIgnoreCase("p")) {
                    // pNumber++;
                    // if (pNumber>5)
                    // write=false;
                    // }
                    // restart filtering index page...
                    if (indexFilter && tag.getTagType() == tag.T_P
                            && tag.isEndTag()) {
                        write = false;
                    }
                    // remove <I>-Tags and <B>-Tags
                    if (tag.getTagType() == tag.T_I
                            || tag.getTagType() == tag.T_B) {  // (indexFilter || contentFilter) && 
                        t = new HtmlTagToken("x");
                    }
                }
                if (write) {      
                    out.write(t);
                }
            }
        } catch (Exception exc) {
            ErrorLog.log(this, "handleRequest()", "", exc);
        }
    }
}
