/*  Screem:  html.c,
 *  This file provides functions for manipulation of the html
 * 
 *  Copyright (C) 1999, 2000  David A Knight
 *
 *  This program is free software; you can redistribute it and/or modify
 *  it under the terms of the GNU General Public License as published by
 *  the Free Software Foundation; either version 2 of the License, or
 *  (at your option) any later version.
 *
 *  This program is distributed in the hope that it will be useful,
 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *  GNU General Public License for more details.
 *
 *  You should have received a copy of the GNU General Public License
 *  along with this program; if not, write to the Free Software
 *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 *
 *  For contact information with the author of this source code please see
 *  the AUTHORS file.  If there is no AUTHORS file present then check the
 *  about box under the help menu for a contact address
 */

#include <ctype.h>
#include <gnome.h>
#include <string.h>

#include <sys/stat.h>
#include <unistd.h>

#ifdef HAVE_GNOME_VFS
#include <libgnomevfs/gnome-vfs-uri.h>
#endif

#include "fileops.h"
#include "htmlfuncs.h"

#include "dtd.h"

/*
 * Taken from gnome-xml's HTML parser
 * start tags that imply the end of current element
 */
static const gchar *htmlStartClose[] = {
"form",         "form", "p", "hr", "h1", "h2", "h3", "h4", "h5", "h6",
                "dl", "ul", "ol", "menu", "dir", "address", "pre",
                "listing", "xmp", "head", NULL,
"head",         "p", NULL,
"title",        "p", NULL,
"body",         "head", "style", "link", "title", "p", NULL,
"li",           "p", "h1", "h2", "h3", "h4", "h5", "h6", "dl", "address",
                "pre", "listing", "xmp", "head", "li", NULL,
"hr",           "p", "head", NULL,
"h1",           "p", "head", NULL,
"h2",           "p", "head", NULL,
"h3",           "p", "head", NULL,
"h4",           "p", "head", NULL,
"h5",           "p", "head", NULL,
"h6",           "p", "head", NULL,
"dir",          "p", "head", NULL,
"address",      "p", "head", "ul", NULL,
"pre",          "p", "head", "ul", NULL,
"listing",      "p", "head", NULL,
"xmp",          "p", "head", NULL,
"blockquote",   "p", "head", NULL,
"dl",           "p", "dt", "menu", "dir", "address", "pre", "listing",
                "xmp", "head", NULL,
"dt",           "p", "menu", "dir", "address", "pre", "listing", "xmp",
                "head", "dd", NULL,
"dd",           "p", "menu", "dir", "address", "pre", "listing", "xmp",
                "head", "dt", NULL,
"ul",           "p", "head", "ol", "menu", "dir", "address", "pre",
                "listing", "xmp", NULL,
"ol",           "p", "head", "ul", NULL,
"menu",         "p", "head", "ul", NULL,
"p",            "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", NULL,
"div",          "p", "head", NULL,
"noscript",     "p", "head", NULL,
"center",       "font", "b", "i", "p", "head", NULL,
"a",            "a", NULL,
"caption",      "p", NULL,
"colgroup",     "caption", "colgroup", "col", "p", NULL,
"col",          "caption", "col", "p", NULL,
"table",        "p", "head", "h1", "h2", "h3", "h4", "h5", "h6", "pre",
                "listing", "xmp", "a", NULL,
"th",           "th", "td", NULL,
"td",           "th", "td", "p", NULL,
"tr",           "th", "td", "tr", "caption", "col", "colgroup", "p", NULL,
"thead",        "caption", "col", "colgroup", NULL,
"tfoot",        "th", "td", "tr", "caption", "col", "colgroup", "thead",
                "tbody", "p", NULL,
"tbody",        "th", "td", "tr", "caption", "col", "colgroup", "thead",
                "tfoot", "tbody", "p", NULL,
"optgroup",     "option", NULL,
"fieldset",     "legend", "p", "head", "h1", "h2", "h3", "h4", "h5", "h6",
                "pre", "listing", "xmp", "a", NULL,
NULL
};

typedef int(*Alter)(int c);

static GList *optional;

static gboolean screem_html_free_node_memory( GNode *node, gpointer data );
static void screem_html_add_node( ScreemDTD *dtd, GNode *parent, 
				  const gchar *text, gint *pos );

static gboolean screem_html_link_fix( GNode *node, gchar **info );


/**
 * in_attribute:
 *
 * @text:  the text to check in
 * @pos:   the position of the cursor
 *
 * checks to see if the cursor is in an attribute
 *
 * returns:  FALSE, or the start position of the attribute data
 */
gint in_attribute( const gchar *text, gint pos )
{
	gint ipos = pos;

	gchar a = '"';
   	gchar c = '\0';
	gchar d;

       	/* go back until start of text, or we hit a '<' or '>' */
      	while( c != a && (--pos) >= 0 ) {
		c = text[ pos ];
		if( c == '<' || c == '>' )
			return FALSE;
	}
    	if( pos <= 0 )
		return FALSE;

	if( text[ pos - 1 ] != '=' )
		return FALSE;
     
	/* we have an opening, but do we have a closing? */
	d = '\0';
    	while( ipos < strlen( text ) ) {
		c = text[ ipos ];
		if( c == a ) {
			if( d != '=' )
				return pos + 1;
			else
				return FALSE;
		} else if( c == '<' || c == '>' )
			return FALSE;
		ipos ++;
		d = c;
	}

	return FALSE;
}

/**
 * in_entity:
 *
 * @text:  the text to check in
 * @pos:   the position of the cursor
 *
 * checks to see if the cursor is in an entity
 *
 * returns: FALSE if not, or the position of the end of the entity
 */
gint in_entity( const gchar *text, gint pos )
{
	gint ipos = pos;

	gchar a = '&';
	gchar b = ';';
	gchar c = '\0';

       	/* go back until start of text, or we hit a '<' or '>' */
      	while( c != a && (--pos) >= 0 ) {
		c = text[ pos ];
		if( c == b ||  c == '<' || c == '>' || isspace( c ) )
			return FALSE;
	}
	if( pos < 0 )
		return FALSE;

	/* we have an opening, but do we have a closing? */
     	while( ipos < strlen( text ) ) {
		c = text[ ipos ];
		if( c == b )
			return pos + 1;
		else if( c == a || c == '<' || c == '>' || isspace( c ) )
			return FALSE;
		ipos ++;
	}

	return FALSE;
}

/**
 * in_tag:
 *
 * @text:  the text to check in
 * @pos:   the position of the cursor
 *
 * checks to see if the cursor is in tag
 *
 * returns: FALSE if not, or the position of the end of the tag
 */
gint in_tag( const gchar *text, gint pos )
{
	gint ipos = pos;

	gchar a = '<';
	gchar b = '>';

       	/* go back until start of text, or we hit a '<' or '>' */
      	while( (--pos) >= 0 ) {
		if( text[ pos ] == a )
			break;
		else if( text[ pos ] == b )
			return FALSE;
	}
	if( pos < 0 )
		return FALSE;

	/* we have an opening, but do we have a closing? */
	while( ipos < strlen( text ) ) {
		if( text[ ipos ] == a )
			return FALSE;
		if( text[ ipos ] == b )
			return pos + 1;
		ipos ++;
	}
	return FALSE;
}

/**
 * next_tag:
 *
 * @text:  the text to check in
 * @pos:   the position of the cursor
 * @name:  a pointer to where to store the tag name
 *
 * obtains the next tag in the text
 *
 * returns:  NULL, or the tag, user must free the result
 */
gchar* next_tag( const gchar *text, gint *pos, gchar **name )
{
        gchar *tag;
	gchar *temp;

	if( ! ( tag = strchr( text, '<' ) ) )
		return NULL;

	temp = tag + 1;
	if( ! strncmp( "!--", temp, 3 ) ) {
		if( name )
			*name = g_strdup( "!--" );
		temp = strstr( temp, "-->" );
	} else {
		if( name ) {
			while( *temp != ' ' && *temp != '>' && *temp != '\0' )
				temp ++;
			*name = g_strndup( tag + 1, temp - tag - 1 );
		}
		if( *temp != '>' )
			temp = strchr( temp, '>' );
	}

	if( ! temp ) {
		if( name )
			g_free( *name );
		return NULL;
	}

	*pos += (temp - text);

        return g_strndup( tag, temp - tag + 1 );
}

/**
 * previous_tag:
 *
 * @text:  the text to check in
 * @pos:   the position of the cursor
 *
 * obtains the previous tag in the text
 *
 * returns:  NULL, or the tag, user must free the result
 */
gchar *previous_tag( const gchar *text, gint *pos )
{
        gint i, j;
        gchar *tag;

        i = *pos;
        while( i > 0 ) {
                if( text[ i ] == '>' )
                        break;
                i --;
        }

        /* we came to the end ? */
        if( ! i )
            return NULL;

        /* we've got a possible tag */
        j = i;
        while( j > 0 ) {
                if( text[ j ] == '<' )
                        break;
                j --;
        }

        /* we came to the end ? */
        if( j < 0 )
            return NULL;

        tag = g_strndup( &text[ j ], i - j + 1 );

	*pos = j;

        return tag;
}

/**
 * tag_name:
 *
 * @tag:  the tag we want the name for
 *
 * given a tag returns the name of that tag.
 *
 * returns:  the name of the tag, user must free the result.
 */
gchar *tag_name( const gchar *tag )
{
	gchar *name;
	const gchar *temp;
	gint off;

	if( ! tag )
		return NULL;

	temp = tag + 1;

  	while( ! isspace( *temp ) && *temp != '>' )
		temp ++;

	if( ! strncmp( "!--", tag + 1, strlen( "!--" ) ) ) {
		/* treat as comment, including ssi directives etc */
		name = g_strdup( "!--" );
	} else {
		off = ( *tag == '>' );
		name = g_strndup( tag + off, temp - tag + 1 );
	}

	return name;
}

GNode* screem_html_parse_tree_find_pos( GNode *node, gint pos, gboolean allow_text )
{
	Node *n;
	GNode *next;

	if( ! node )
		return NULL;

	n = (Node*)node->data;

	if( pos <= n->cpos && pos >= n->pos ) {
		/* its in this node somewhere */
		if( ! node->children ) {
			if( n->name || allow_text )
				return node;
			else
				/* text node and we don't want them */
				return node->parent;
		}
		next = screem_html_parse_tree_find_pos( node->children,
							pos, allow_text );
		if( next )
			node = next;
		return node;
	} else if( pos > n->cpos ) {
		node = screem_html_parse_tree_find_pos( node->next,
							pos, allow_text );
		return node;
	} else {
		return NULL;
	}
}

GNode* screem_html_parse_tree_find_node( GNode *current, const gchar *name )
{
	GNode *node;
	Node *data;

	g_return_val_if_fail( name != NULL, NULL );

	if( ! current )
		return NULL;

	data = (Node*)current->data;
	if( data && data->name && ! g_strcasecmp( name, data->name ) )
		return current;

	/* check children */
	node = screem_html_parse_tree_find_node( current->children, name );
	if( node ) {
		data = (Node*)node->data;
		if( data && data->name && ! g_strcasecmp( name, data->name ) )
			return node;
	}

	/* check siblings */
	node = screem_html_parse_tree_find_node( current->next, name );
	if( node ) {
		data = (Node*)node->data;
		if( data && data->name && ! g_strcasecmp( name, data->name ) )
			return node;
	}

	return NULL;
}

gchar* screem_html_autoclose( const gchar *text, gint pos )
{
	gchar *tag;
	GNode *node;
	GNode *root;
	Node *n;
	ScreemDTD *dtd;

	g_return_val_if_fail( text != NULL, NULL );

	tag = NULL;

	dtd = screem_get_doctype( text );
	root = node = screem_html_build_parse_tree( dtd, text, 0 );
	node = screem_html_parse_tree_find_pos( node, pos, FALSE );

	while( node ) {
		if( ( n = (Node*)node->data ) ) {
			if( n->state == FORBIDDEN ) {
				node = node->parent;
			} else {
				tag = g_strdup( n->name );
				break;
			}
		} else
			break;
	}

	screem_html_destroy_parse_tree( root );
       
	return tag;
}

GList* screem_html_attribute_from_list( GList *attributes, const gchar *name )
{
	GList *list;

	g_return_val_if_fail( attributes != NULL, NULL );
	g_return_val_if_fail( name != NULL, NULL );

	for( list = attributes; list; list = list->next ) {
		if( ! strcasecmp( name, (gchar*)list->data ) ) {
			break;
		}
		list = list->next;
	}

	return list;
}

GList* screem_html_build_attributes_list( const gchar *tag, GList *list )
{
	gchar *temp;
	gchar *equals;

	gchar *name;
	gchar *value;
	
	g_return_val_if_fail( tag != NULL, list );

	/* bypass tag name */
	temp = strchr( tag, ' ' );
	/* if no space then no attributes */
	if( ! temp )
		return list;
	temp ++;

	while( temp && ( *temp != '>' ) ) {
		/* temp == an attribute name */
		equals = strchr( temp, '=' );
		/* if no equals sign assume its something like noshade */
		if( ! equals )
			equals = strchr( temp,' ' );
		if( ! equals )
			equals = strchr( temp, '>' );
		if( ! equals )
			break;
	
		/* we have an attribute name, from temp to equals */
		name = g_strndup( temp, equals - temp );
		if( equals[ 0 ] == ' ' || equals[ 0 ] == '>' ) {
			value = NULL;
			temp = equals;
		} else {
			value = equals + 1;
			if( value[ 0 ] == '"' ) {
				temp = strchr( value + 1, '"' );
				value ++;
			} else {
				temp = strchr( value, ' ' );
				if( ! temp )
					temp = strchr( value, '>' );
			}
			if( ! temp ) {
				g_free( name );
				break;
			}
			value = g_strndup( value, temp - value );
		}
		
		/* add to list */
		name = g_strchug( name );
		list = g_list_append( list, name );
		list = g_list_append( list, value );

		temp = strchr( temp, ' ' );
		if( temp )
			temp ++;
	}

	return list;
}

void screem_tags_change_case( gchar *text, gboolean upper )
{
	gint pos = 0;
	gchar *next;
	gchar *tag;
	gchar *name;

	gboolean ok;
	gboolean in_attr;
	gchar attr_term = '\0';

	Alter alter;

	alter = upper ? toupper : tolower;

	while( ( next = next_tag( &text[ pos ], &pos, &name ) ) ) {
		/* non markup tag, so we don't change case */
		ok = g_strncasecmp( "!", name, 1 );

		/* pos == the end of the tag we found */
		tag = &text[ pos ];
		while( *(--tag) != '<' );

		/* tag is now at the start */
		for( tag ++, in_attr = FALSE; ok && ( *tag != '>' ); tag ++ ) {
			if( in_attr ) {
				if( attr_term == '\0') 
					attr_term = (*tag == '"') ?  '"' : ' ';
				else
					in_attr = ( *tag != attr_term );
			} else if( isalpha( *tag ) ) {
				*tag = alter( *tag );
			} else if( *tag == '=' ) {
				in_attr = TRUE;
				attr_term = '\0';
			}
		}
		g_free( name );
		g_free( next );
	}
}

GList *screem_html_get_links( const gchar *text )
{
	GList *links = NULL;
	GList *attr;
	GList *list;

	gchar *data;
	gint pos = 0;
	gchar *next;
    
	data = g_strdup( text );

	while( ( next = next_tag( &data[ pos ], &pos, NULL ) ) ) {
		/* get tag attributes */
		attr = screem_html_build_attributes_list( next, NULL );
	
		/* any links among them? */
		for( list = attr; list; list = list->next ) {
			list = list->next;

			/* does it match one of the link attr names? */
			if( ! strcasecmp( "src", (gchar*)list->prev->data ) ||
			    ! strcasecmp( "href",(gchar*)list->prev->data ) ) {
				links = g_list_append( links, 
						       (gchar*)list->data );
			}
		}
	
		g_free( next );
	}
	
	g_free( data );

	return links;
}

/* does next force tag to be closed */
gboolean screem_html_close_tag( const gchar *tag, const gchar *next )
{
	gint i;

	/* first find tag */
	for( i = 0;  htmlStartClose[ i ]; ) {
		if( ! strcasecmp( next, htmlStartClose[ i ] ) ) {
			/* found it */
			break;
		}
		/* not found, move on till we hit a NULL */
		while( htmlStartClose[ ++ i ] );
		i ++;
	}

	/* now, is next one of the ones that closes tag */
	while( htmlStartClose[ i ] ) {
		if( ! strcasecmp( tag, htmlStartClose[ i ] ) ) {
			break;
		}
		i ++;
	}

	return (gboolean)( htmlStartClose[ i ] );
}

void screem_html_parse_uri( const gchar *uri, 
			    gchar **protocol, gchar **address,
			    gchar **port, gchar **path )
{
#ifdef HAVE_GNOME_VFS
	GnomeVFSURI *vfs_uri;
	gint port_num;
	
	vfs_uri = gnome_vfs_uri_new( uri );

	if( protocol )
		*protocol = gnome_vfs_uri_get_scheme( vfs_uri );
	if( address )
		*address = gnome_vfs_uri_get_host_name( vfs_uri );
	if( port ) {
		port_num = gnome_vfs_uri_get_host_port( vfs_uri );
		port = g_strdup_printf( "%i", port_num );
	}
	if( path )
		*path = gnome_vfs_uri_get_path( vfs_uri );

	gnome_vfs_uri_unref( vfs_uri );
#else
	const gchar *p;
	const gchar *pt;

	if( protocol )
		*protocol = NULL;
	if( address )
		*address = NULL;
	if( port )
		*port = NULL;
	if( path )
		*path = NULL;

  	p = uri;
	while( isalnum( *p ) )
                p++;
	if( *p != ':' ) {
		/* uri is just 1 big path */
		if( path )
			*path = g_strdup( uri );
	} else {
		/* we have a protocol */
		if( protocol )
			*protocol = g_strndup( uri, p - uri );

		/* move past protocol definition */
		p ++;
		while( *p == '/' && *p )
			p ++;
	    
		/* p now points at the address */
		pt = p;
		while( *pt != ':' && *pt != '/' && *pt )
			pt ++;

		if( address )
			*address = g_strndup( p, pt - p );

		if( *pt == ':' ) {
			/* we have the port */
			p = pt;
			while( *pt && *pt != '/' )
				pt ++;
			if( port )
				*port = g_strndup( p + 1, pt - p - 1);
		}

		if( pt && path )
			*path = g_strdup( pt );
	}
#endif
}

GNode *screem_html_build_parse_tree( ScreemDTD *dtd,
				     const gchar *text, gint start )
{
	GNode *root;
	gint pos;
	
	root = g_node_new( NULL );
	pos = start;

	optional = NULL;

	screem_html_add_node( dtd, root, text, &pos );

	return g_node_first_child( root );
}

void screem_html_destroy_parse_tree( GNode *node )
{
	GNode *root;

	if( ! node )
		return;
	
	/* make sure this is the root node */
	while( node->parent )
		node = node->parent;

	while( node->prev )
		node = node->prev;

	root = node;

	while( node ) {
		g_node_traverse( node, G_PRE_ORDER, G_TRAVERSE_MASK, -1,
				 screem_html_free_node_memory, NULL );
		node = node->next;
	}

	g_node_destroy( root );
}

static gboolean screem_html_free_node_memory( GNode *node, gpointer data )
{
	Node *n;
	GList *list;

	n = (Node*)node->data;
	if( n ) {
		g_free( n->name );
		g_free( n->content );
		for( list = n->attributes; list; list = list->next )
			g_free( list->data );
		g_list_free( n->attributes );
	   	g_free( n );
	}
	node->data = NULL;

	return FALSE;
}

static void screem_html_add_node( ScreemDTD *dtd, GNode *parent, 
				  const gchar *text, gint *pos )
{
	gchar *tag;
	gchar *name;
	CloseState state;
	gboolean close;
	GNode *tnode;

	const gchar *prev;
	const gchar *content;

	Node *node;
	Node *cnode;
	gint apos;
	
	const gchar *end;

	g_return_if_fail( parent != NULL );
	
	/* get the next tag */
	end = text + strlen( text );
	prev = content = &text[ *pos ];
	apos = *pos;

	while( *prev != '\0' && *prev != '<' ) {
		prev ++;
		(*pos) ++;
	}
	
	if( prev - content - 1 > 0 ) {
		/* FIXME: if we are in a style context, then the content
		   will be a collection of styles, we should parse
		   them as such, if we are in a script then
		   should the script be displayed?
		*/
		node = g_new0( Node, 1 );
		node->content = g_strndup( content, prev - content );
		node->pos = apos;
		node->cpos = (*pos) - 1;
		tnode = g_node_new( node );
		g_node_append( parent, tnode );
	}
	
       	tag = next_tag( prev, pos, &name );
	if( !tag ) 
		return;

	close = tag_is_close_tag( tag );

	if( name[ 0 ] == '!' || name[ 0 ] == '?' || name[ 0 ] == '%' )
		state = FORBIDDEN;
	else
		state = screem_dtd_element_get_close_state( dtd, name );

     	if( ! close ) {
		while( optional ) {
			node = (Node*)optional->data;
			if( screem_html_close_tag( node->name, name ) ) {
				/* the closing node doesn't exist,
				   but we need to put a fake one in the parse
				   tree */
				node = (Node*)parent->data;
				node->cpos = *pos - strlen( tag );
				parent = parent->parent;
				optional = g_list_remove( optional, node );
			} else
				break;
		}
		node = g_new0( Node, 1 );
		node->content = tag;
		node->name = name;
		node->pos = *pos;
		node->state = state;

		node->attributes = 
			screem_html_build_attributes_list( tag, NULL );
	
		if( state != FORBIDDEN )
			node->cpos = strlen( text );
		else
			node->cpos = node->pos + 1;

       		tnode = g_node_new( node );
		g_node_append( parent, tnode );
	
		(*pos)++;
		switch( state ) {
		case FORBIDDEN:
			screem_html_add_node( dtd, parent, text, pos );
			break;
		case OPTIONAL:		
		case REQUIRED:		
			optional = g_list_prepend( optional, node );
			screem_html_add_node( dtd, tnode, text, pos );
			break;
		}

	} else {
		while( optional ) {
			node = (Node*)optional->data;
			if( node->state == OPTIONAL ||
			    ! strcasecmp( node->name, name + 1 ) ) {
				node = (Node*)parent->data;
				node->cpos = *pos - strlen( tag );
				parent = parent->parent;
				optional = g_list_remove( optional, node );
				
		
			} else
				/* weren't expecting that */
				break;
			
			if( ! strcasecmp( node->name, name + 1 ) )
				break;
		}
		       
		g_free( name );
		g_free( tag );
		
		(*pos)++;
		screem_html_add_node( dtd, parent, text, pos );
		return;
	}
}

gchar* screem_html_key_to_ent( guint keyval )
{
	gchar *tag;

	switch( keyval ) {
	case GDK_ampersand:
		tag = "amp";
		break;
	case GDK_Agrave:
		tag = "Agrave";
		break ;
	case GDK_Aacute:
		tag = "Aacute";
		break ;
	case GDK_Acircumflex:
		tag = "Acirc";
		break ;
	case GDK_Atilde:
		tag = "Atilde";
		break ;
	case GDK_Adiaeresis:
		tag = "Auml";
		break ;
	case GDK_Aring:
		tag = "Aring";
		break ;
	case GDK_AE:
		tag = "AElig";
		break ;
	case GDK_Ccedilla:
		tag = "Ccedil";
		break ;
	case GDK_Egrave:
		tag = "Egrave";
		break ;
	case GDK_Eacute:
		tag = "Eacute";
		break ;
	case GDK_Ecircumflex:
		tag = "Ecirc";
		break ;
	case GDK_Ediaeresis:
		tag = "Euml";
		break ;
	case GDK_Igrave:
		tag = "Igrave";
		break ;
	case GDK_Iacute:
		tag = "Iacute";
		break ;
	case GDK_Icircumflex:
		tag = "Icirc";
		break ;
	case GDK_Idiaeresis:
		tag = "Iuml";
		break ;
	case GDK_ETH:
		tag = "ETH";
		break ;
	case GDK_Ntilde:
		tag = "Ntilde";
		break ;
	case GDK_Ograve:
		tag = "Ograve";
		break ;
	case GDK_Oacute:
		tag = "Oacute";
		break ;
	case GDK_Ocircumflex:
		tag = "Ocirc";
		break ;
	case GDK_Otilde:
		tag = "Otilde";
		break ;
	case GDK_Odiaeresis:
		tag = "Ouml";
		break ;
	case GDK_Ooblique:
		tag = "Oslash";
		break ;
	case GDK_Ugrave:
		tag = "Ugrave";
		break ;
	case GDK_Uacute:
		tag = "Uacute";
		break ;
	case GDK_Ucircumflex:
		tag = "Ucirc";
		break ;
	case GDK_Udiaeresis:
		tag = "Uuml";
		break ;
	case GDK_Yacute:
		tag = "Yacute";
		break ;
	case GDK_THORN:
		tag = "THORN";
		break ;
	case GDK_ssharp:
		tag = "szlig";
		break ;
	case GDK_agrave:
		tag = "agrave";
		break ;
	case GDK_aacute:
		tag = "aacute";
		break ;
	case GDK_acircumflex:
		tag = "acirc";
		break ;
	case GDK_atilde:
		tag = "atilde";
		break ;
	case GDK_adiaeresis:
		tag = "auml";
		break ;
	case GDK_aring:
		tag = "aring";
		break ;
	case GDK_ae:
		tag = "aelig";
		break ;
	case GDK_ccedilla:
		tag = "ccedil";
		break ;
	case GDK_egrave:
		tag = "egrave";
		break ;
	case GDK_eacute:
		tag = "eacute";
		break ;
	case GDK_ecircumflex:
		tag = "ecirc";
		break ;
	case GDK_ediaeresis:
		tag = "euml";
		break ;
	case GDK_igrave:
		tag = "igrave";
		break ;
	case GDK_iacute:
		tag = "iacute";
		break ;
	case GDK_icircumflex:
		tag = "icirc";
		break ;
	case GDK_idiaeresis:
		tag = "iuml";
		break ;
	case GDK_eth:
		tag = "eth";
		break ;
	case GDK_ntilde:
		tag = "ntilde";
		break ;
	case GDK_ograve:
		tag = "ograve";
		break ;
	case GDK_oacute:
		tag = "oacute";
		break ;
	case GDK_ocircumflex:
		tag = "ocirc";
		break ;
	case GDK_otilde:
		tag = "otilde";
		break ;
	case GDK_odiaeresis:
		tag = "ouml";
		break ;
	case GDK_oslash:
		tag = "oslash";
		break ;
	case GDK_ugrave:
		tag = "ugrave";
		break ;
	case GDK_uacute:
		tag = "uacute";
		break ;
	case GDK_ucircumflex:
		tag = "ucirc";
		break ;
	case GDK_udiaeresis:
		tag = "uuml";
		break ;
	case GDK_yacute:
		tag = "yacute";
		break ;
	case GDK_thorn:
		tag = "thorn";
		break ;
	case GDK_ydiaeresis:
		tag = "yuml";
		break ;
	case GDK_exclamdown:
		tag = "iexcl";
		break ;
	case GDK_cent:
		tag = "cent";
		break ;
	case GDK_sterling:
		tag = "pound";
		break ;
	case GDK_currency:
		tag = "curren";
		break ;
	case GDK_yen:
		tag = "yen";
		break ;
	case GDK_brokenbar:
		tag = "brkbar";
		break ;
	case GDK_section:
		tag = "sect";
		break ;
	case GDK_diaeresis:
		tag = "uml";
		break ;
	case GDK_copyright:
		tag = "copy";
		break ;
	case GDK_ordfeminine:
		tag = "ordf";
		break ;
	case GDK_guillemotleft:
		tag = "laqo";
		break ;
	case GDK_notsign:
		tag = "not";
		break ;
	case GDK_hyphen:
		tag = "shy";
		break ;
	case GDK_registered:
		tag = "reg";
		break ;
	case GDK_macron:
		tag = "macr";
		break ;
	case GDK_degree:
		tag = "deg";
		break ;
	case GDK_plusminus:
		tag = "plusmn";
		break ;
	case GDK_twosuperior:
		tag = "suo2";
		break ;
	case GDK_threesuperior:
		tag = "suo3";
		break ;
	case GDK_acute:
		tag = "acute";
		break ;
	case GDK_mu:
		tag = "micro";
		break ;
	case GDK_paragraph:
		tag = "para";
		break ;
	case GDK_periodcentered:
		tag = "middot";
		break ;
	case GDK_cedilla:
		tag = "cedil";
		break ;
	case GDK_onesuperior:
		tag = "sup1";
		break ;
	case GDK_masculine:
		tag = "ordm";
		break ;
	case GDK_guillemotright:
		tag = "raquo";
		break ;
	case GDK_onequarter:
		tag = "fraq14";
		break ;
	case GDK_onehalf:
		tag = "fraq12";
		break ;
	case GDK_threequarters:
		tag = "fraq34";
		break ;
	case GDK_questiondown:
		tag = "iquest";
		break ;
	case GDK_quotedbl:
		tag = "quot";
		break;
	case GDK_Tab:
		tag = "nbsp";
		break;
	case GDK_less:
		tag = "lt";
		break;
	case GDK_greater:
		tag = "gt";
		break;
	default:
		tag = NULL;
	}

	return tag;
}


/**
 * screem_html_fix_links:
 *
 * @text: the text to fix links in
 * @path: the pathname of the page the text belongs in
 * @spath: the pathname of the site the page belongs to
 * @src:  the src path of the link to fix
 * @dest: the new path of src
 *
 * fix links in a page after src has been changed to dest
 *
 * returns: the new text for the page
 */
gchar *screem_html_fix_links( const gchar *text, const gchar *path,
			      const gchar *spath,
			      const gchar *src, const gchar *dest )
{
	GNode *node;
	GNode *root;

	gchar *info[ 6 ];
	ScreemDTD *dtd;

	info[ 0 ] = g_strdup( text );
	info[ 1 ] = path;
	info[ 2 ] = spath;
	info[ 3 ] = src;
	info[ 4 ] = dest;
	info[ 5 ] = 0;

	dtd = screem_get_doctype( text );
	root = node = screem_html_build_parse_tree( dtd, text, 0 );
   
	while( node ) {
		g_node_traverse( node, G_PRE_ORDER, G_TRAVERSE_MASK, -1,
				 screem_html_link_fix, info );
		node = node->next;
	}

	screem_html_destroy_parse_tree( root );

	return info[ 0 ];
}

static gboolean screem_html_link_fix( GNode *node, gchar **info )
{
	Node *n;
	GList *list;

	gchar *chunk;
	gchar *chunk2;
	gchar *attr;
	gchar *pattr;

	struct stat s;

	gboolean is_full;
	gchar *path;
	gchar *new_path;

	gchar *dest_dir;
	gchar *page_dir;
	gchar cwd[ 16384 ];

	gint offset;
	gint orig;
	gint newlen;

	n = (Node*)node->data;

	/* FIXME: I don't like explicitly excluding the following tags
	   but it doesn't work if I don't.  It probably also needs
	   to exclude ?php ? and % as well, which would lead to 
	   a problem if the script used the pathname */
 	if( ! n->attributes || 
	    ! g_strncasecmp( "!", n->name, 1 ) ||
	    ! g_strncasecmp( "?", n->name, 1 ) )
		return FALSE;

	offset = (gint)info[ 5 ];

	chunk = info[ 0 ] + n->pos + offset;
	while( *chunk != '<' )
		chunk --;
	while( *chunk != ' ' )
		chunk ++;

	chunk = g_strndup( info[ 0 ], chunk - info[ 0 ] );
	chunk2 = g_strdup( info[ 0 ] + n->pos + offset );

	dest_dir = g_dirname( info[ 4 ] );
	page_dir = g_dirname( info[ 1 ] );
	getcwd( cwd, 16384 );
	
	attr = g_strdup( "" );
	for( list = n->attributes; list; list = list->next ) {
		list = list->next;
		pattr = attr;
		if( ! list->data ) {
			/* single word attribute */
		       	attr = g_strconcat( pattr, " ", list->prev->data,
					    NULL );
			g_free( pattr );
			continue;
		}
		if( ! strcmp( info[ 1 ], info[ 3 ] ) ) {
			/* src is the same as path, all the links need
			   fixing */
			if( ! stat( (gchar*)list->data, &s ) ) {
				is_full = 
					g_path_is_absolute((gchar*)list->data);
				if( ! is_full ) {
					path = relative_to_full( (gchar*)list->data );
					chdir( dest_dir );
					new_path = relative_path( path,
								  info[ 2 ] );
					chdir( cwd );
					g_free( path );
				} else
					new_path =g_strdup((gchar*)list->data);

			} else
				new_path = g_strdup( (gchar*)list->data );
			attr = g_strconcat( pattr, " ", list->prev->data,
					    "=\"", new_path, "\"", NULL );
			g_free( pattr );
			g_free( new_path );
		} else {
			/* fix links where list->data == info[ 3 ] */

			is_full = g_path_is_absolute( (gchar*)list->data );

			chdir( page_dir );
			if( ( path = paths_match( (gchar*)list->data, 
						  info[ 3 ] ) ) ) {
				/* it is a link to src */
				new_path = g_strconcat( info[ 4 ], path +
							strlen( info[ 3 ] ),
							NULL);
				g_free( path );
				if( ! is_full ) {
					chdir( page_dir );
					path = relative_path( new_path, 
							      info[ 2 ] );
					g_free( new_path );
				} else
					path = new_path;
				attr = g_strconcat( pattr, " ",
						    list->prev->data,
						    "=\"", path, "\"", NULL );
				g_free( pattr );
				g_free( path );
			} else {
				attr = g_strconcat( pattr, " ", 
						    list->prev->data,
						    "=\"", list->data, "\"", 
						    NULL );
				g_free( pattr );
			}
			chdir( cwd );
		}
	}
      	orig = strlen( n->content ) - strlen( n->name ) - strlen( "< >" );
	newlen = strlen( attr );
	offset += ( newlen - orig - 1 );

	g_free( info[ 0 ] );

	info[ 0 ] = g_strconcat( chunk, attr, chunk2, NULL );
	info[ 5 ] = (gchar*)offset;

	g_free( chunk );
	g_free( chunk2 );
	g_free( attr );
	g_free( dest_dir );
	g_free( page_dir );

	return FALSE;
}


gchar* screem_html_encode_text( const gchar *text )
{
	gchar *temp;
	gchar *temp2;
	gchar *temp3;
	gint i;
	gchar *ent;
	guint val;

	temp = g_strdup( text );
	for( i = 0; temp[ i ]; i ++ ) {
		val = (guint)temp[ i ];
		ent = screem_html_key_to_ent( val );
		if( ent && ! in_entity( temp, i + 1 ) ) {
			if( i > 0 )
				temp2 = g_strndup( temp, i );
			else
				temp2 = g_strdup( "" );
			temp3 = g_strconcat( temp2, "&", ent, ";",
					     &temp[ i + 1 ], NULL );
			g_free( temp2 );
			g_free( temp );
			temp = temp3;
		}
	}

	return temp;
}

gboolean screem_html_next_tag_close( const gchar *text, gchar *tname, gint pos )
{
	gchar *next;
	gchar *name;
	gboolean ret;

	next = next_tag( text + pos, &pos, &name );

	if( ! next )
		return FALSE;
   
	ret = ( name[ 0 ] == '/' && ! g_strcasecmp( name + 1, tname ) );

	g_free( name );
	g_free( next );

	return ret;
}
