/*
 * Scone - The Web Enhancement Framework
 * Copyright (C) 2009 Harald Weinreich, Volkert Buchmann, Frank Wollenweber, Torsten Ha
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
 */
package scone.util.tokenstream;


import java.io.Reader;

import adc.parser.HtmlStreamTokenizer;
import adc.parser.HtmlTag;


/**
 * parses an HTML document into Tokens
 */

public class HtmlTokenizer implements Tokenizer {

    // the real HtmlTokenizer
    adc.parser.HtmlStreamTokenizer tokenizer = null;

    /**
     * creates a new HtmlTokenizer.
     */
    public HtmlTokenizer() {}

    /**
     * assigns the Reader to the Tokenizer
     * @param r the Reader of the HTML document
     */
    public void assign(Reader r) {
        // create a new tokenizer from the Reader
        tokenizer = new adc.parser.HtmlStreamTokenizer(r);
    }

    /**
     * returns the next Token or null, if there are no more Tokens
     * @return the next Token or null if there are no more Tokens
     */
    public Token nextToken() {
        // is there still a token?
        try {
            if (tokenizer.nextToken() == HtmlStreamTokenizer.TT_EOF) { 
                return null;
            }
            // yes...
            switch (tokenizer.getTokenType()) {
            case HtmlStreamTokenizer.TT_TAG: 
                HtmlTag tag = new HtmlTag();

                tokenizer.parseTag(tokenizer.getStringValue(), tag);
    		      
                if (tag.getTagString() == null) {   // Empty tag!
                    System.out.print("uh-oh, tag is empty: '");
                    System.out.print(tokenizer.getStringValue()+"' - Type: " );
                    System.out.println(tag.getTagType());
                    return new HtmlTextToken(" < >");  
                    // return new HtmlTextToken("error!");
                }
    		      
                return new HtmlTagToken(tag, tokenizer.getWhiteSpace());
    		      
            case HtmlStreamTokenizer.TT_TEXT: 
                return new HtmlTextToken(tokenizer.getStringValue().toString(), tokenizer.getWhiteSpace());
    	         
            case HtmlStreamTokenizer.TT_COMMENT: 
                return new HtmlCommentToken(tokenizer.getStringValue().toString(), tokenizer.getWhiteSpace());
    		      
            case HtmlStreamTokenizer.TT_SCRIPT: {
                    HtmlTag script = new HtmlTag();
                    String scriptString = tokenizer.getStringValue().toString();
                    String tagString = scriptString.substring(scriptString.indexOf("<") + 1, scriptString.indexOf(">"));

                    tokenizer.parseTag(new StringBuffer(tagString), script);
                    scriptString = scriptString.substring(scriptString.indexOf(">") + 1, scriptString.lastIndexOf("<"));
                    return new HtmlScriptToken(script, scriptString, tokenizer.getWhiteSpace());
                }
    	          
            case HtmlStreamTokenizer.TT_STYLE: {
                    HtmlTag style = new HtmlTag();
                    String styleString = tokenizer.getStringValue().toString();
                    String tagString = styleString.substring(styleString.indexOf("<") + 1, styleString.indexOf(">"));

                    tokenizer.parseTag(new StringBuffer(tagString), style);
                    styleString = styleString.substring(styleString.indexOf(">") + 1, styleString.lastIndexOf("<"));
                    return new HtmlStyleToken(style, styleString, tokenizer.getWhiteSpace());    	          
                }

            default:
                return new HtmlTextToken(tokenizer.getStringValue().toString(), tokenizer.getWhiteSpace());
            }
        } catch (Exception exc) {
            exc.printStackTrace();
        }
        return null;
    }
    
}
