/*

    mp_synhi.c

    Syntax higlighters.

    mp - Programmer Text Editor

    Copyright (C) 1991-2001 Angel Ortega <angel@triptico.com>

    This program is free software; you can redistribute it and/or
    modify it under the terms of the GNU General Public License
    as published by the Free Software Foundation; either version 2
    of the License, or (at your option) any later version.

    This program is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.

    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

    http://www.triptico.com

    ---

    Why here and not in a config file? Well, mp wants to be small and
    independent. Upto version 3.0.95, this used to be in /etc/mp-synhi.conf.
    But having a config file just for holding this configuration wasn't very
    pleasant. Now is more compact and everyone is happy and the sun shines
    brighter. See 'The mp syntax highlighters authoring Mini-HOWTO' for
    details about updating this file.

    Angel

*/

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "mp_core.h"
#include "mp_video.h"
#include "mp_synhi.h"


/******************
	Data
*******************/

/* html, xml, sgml */

static char * _html_exts[] = { ".html", ".htm", ".artemus", NULL };
static char * _html_magics_1[] = { "-*- Mode: HTML", NULL };
static char * _html_magics_2[] = { "<html>", "<body>", "<i>", "<b>", NULL };
static char * _html_tokens[] = { "a", "abbr", "acronym", "address",
				"area", "b", "base", "bdo", "big",
				"blockquote", "body", "br", "button",
				"caption", "center", "cite", "code", "col",
				"colgroup", "dd", "del", "dfn", "div",
				"dl", "dt", "em", "fieldset", "form",
				"h1", "h2", "h3", "h4", "h5", "h6",
				"head", "hr", "html", "i", "img",
				"input", "ins", "kbd", "label", "legend",
				"li", "link", "map", "meta", "noscript",
				"object", "ol", "optgroup", "option",
				"p", "param", "pre", "q", "samp",
				"script", "select", "small", "span",
				"strong", "style", "sub", "sup", "table",
				"tbody", "td", "textarea", "tfoot", "th",
				"thead", "title", "tr", "tt", "ul",
				"var", "!DOCTYPE", "class", "type",
				"cellspacing", "cellpadding",
				"href", "align", "valign", "name", "lang",
				"value", "action", "width", "height",
				"content", "http-equiv", "src", "alt",
				"bgcolor", "text", "link", "vlink", "alink",
				NULL };

static char * _xml_exts[] = { ".xml", ".sgml", NULL };
static char * _xml_magics_1[] = { "<?xml", "-*- Mode: XML", NULL };

/* C, C++ */

static char * _c_exts[] = { ".c", ".h", ".cpp", ".hpp", ".c++", NULL };
static char * _c_magics_1[] = { "-*- Mode: C", NULL };
static char * _c_magics_2[] = { "#include", "int main", "/*" , NULL };
static char * _c_tokens[] = { "for", "while", "if", "switch", "case", "do",
			      "else", "break", "continue", "return",
			      "default", "goto", "main", "fopen", "fclose",
			      "fgets", "fgetc", "fputs", "fputc", "fprintf",
			      "putc", "printf", "sprintf", "strcpy", "strcat",
			      "strcmp", "strncmp", "strtok", "stricmp", "strchr",
			      "strrchr", "strlen", "memcmp", "memcpy", "malloc",
			      "free", "strncpy", "strncat", "strstr", "memset",
			      "memcpy", "#include", "#define", "#ifdef", "#ifndef",
			      "#if", "#else", "#elif", "#endif", "#pragma",
			      "#undef", "{", "}", "putchar", "fflush", NULL };
static char * _c_vars[]= { "char", "int", "long", "struct", "union", "const",
			   "void", "unsigned", "signed", "auto", "volatile",
			   "enum", "typedef", "float", "double", "extern",
			   "register", "short", "sizeof", "static", "far",
			   "near", "defined", NULL }; 
static char * _c_helpers[] = { "man 2 %s", "man 3 %s", "./localhelp.sh %s",
				"man ./%s.3", "man ./man/%s.3", NULL };

/* Perl */

static char * _perl_exts[] = { ".pl", ".pm", ".PL", NULL };
static char * _perl_magics_1[] = { "#!/usr/bin/perl", "-*- Mode: Perl", NULL };
static char * _perl_tokens[] = { "for", "if", "next", "last", "else", "elsif",
				 "unless", "while", "shift", "unshift", "push",
				 "pop", "delete", "new", "bless", "return",
				 "foreach", "keys", "values", "sort", "grep",
				 "tr", "length", "system", "exec", "fork", "map",
				 "print", "write", "open", "close", "chop",
				 "chomp", "exit", "sleep", "split", "join",
				 "sub", "printf", "s", "tr", NULL };
static char * _perl_vars[] = { "scalar", "my", "local", "undef", "defined",
			       "use", "package", "require", "pod", "head1",
			       "head2", "item", "cut", "over", "back", "$ENV",
			       "$SIG", "ref", "qw", "qq", "eq", "ne", "or",
			       "and", "not", "import", NULL };
static char * _perl_helpers[] = { "perldoc -f %s", NULL };

/* Man Pages */

static char * _man_exts[] = { ".man", NULL };
static char * _man_tokens[] = { "NAME", "SYNOPSIS", "DESCRIPTION", 
				"OPTIONS", "BUGS", "AUTHOR", "FILES",
				"SEE", "ALSO", "RETURN", "VALUE",
				"NOTES", "PORTABILITY", NULL };

static char * _man_helpers[] = { "man %s", "./localhelp.sh %s",
				"man ./%s.3", "man ./man/%s.3", NULL };

/* RFC822 mail message */

static char * _rfc822_tokens[] = { "From", "To", "Cc", "Bcc", "Subject",
				   "Reply-To", "In-Reply-To", "Received",
				   "Date", "Message-Id", "Return-Path",
				   "Apparently-To", "Delivered-To",
				   "Organization", NULL };
static char * _rfc822_magics_2[] = { "From", "Subject", "Reply-To",
				   "Message-Id", "Return-Path", "Apparently-To",
				   "Delivered-To", NULL };

/* Shell */
/* Shell syntax hilighter by Sergey P. Vazulia */

static char * _sh_exts[] = { ".sh", NULL };
static char * _sh_magics_1[] = { "!/bin/sh", "-*- Mode: sh", NULL };
static char * _sh_tokens[] = { "if", "then", "else", "elif", "fi", "case", "do",
				"done", "esac", "for", "until", "while", "break",
				"in", "source", "alias", "cd", "continue",
				"echo", "eval", "exec", "exit", "export", "kill",
				"logout", "printf", "pwd", "read", "return",
				"shift", "test", "trap", "ulimit", "umask",
				"unset", "wait", NULL };
static char * _sh_vars[] = { "$", "local", "let", "set", NULL };
static char * _sh_helpers[] = { "man %s", NULL };

/* Ruby */
/* Ruby syntax hilighter by Gabriel Emerson */
 
static char * _ruby_exts[] = { ".rb", NULL };
static char * _ruby_magics_1[] = { "#!/usr/bin/ruby", "-*- Mode: Ruby", NULL };
static char * _ruby_tokens[] = { "BEGIN", "END", "alias", "and", "begin",
				 "break", "case", "class", "def", "defined",
				 "do", "else", "elsif", "end", "ensure", 
				 "false", "for", "if", "in", "module", "next",
				 "nil", "not", "or", "redo", "rescue", "retry",
				 "return", "self", "super", "then", "true",
				 "undef", "unless", "until", "when", "while",
				 "yield", NULL };
static char * _ruby_vars[] = { "load", "require","%q", "%!", "%Q", "%r", 
			       "%x", "=begin", "=end", NULL };
static char * _ruby_helpers[] = { "man %s", NULL };

/* PHP */
/* PHP syntax hilighter by Geoff Youngs */

static char * _php_exts[] = { ".php", ".inc", NULL };
static char * _php_magics_1[] = { "-*- Mode: PHP", NULL };
static char * _php_magics_2[] = { "<?php", "<?",  NULL };
static char * _php_tokens[] = { "^=", "<<=", "<<", "<=", "===", "==", "=>", "+",
				">=", ">>=", ">>", "|=", "||", "-=", "--", "-",
				"::", "!==", "!=", "<>", "?>", "/=", "/*", "*",
				".=", "{", "}", "(", ")", "=", "*=", "&=", "&&",
				"%=", "%>" "+=", "++", "and", "array", "as",
				"bool", "boolean", "break", "case", "class",
				"const", "continue", "declare", "default",
				"die", "do", "double", "echo", "else", "elseif",
				"empty", "enddeclare", "endfor", "endforeach",
				"endif", "endswitch", "endwhile", "eval",
				"exit", "extends", "__FILE__", "float", "for",
				"foreach", "function", "cfunction", "global",
				"if", "include", "include_once", "int",
				"integer", "isset", "__LINE__", "list", "new",
				"object", "old_function", "or", "print", "real",
				"require", "require_once", "return",
				"static", "string", "switch", "unset", "use",
				"var", "while", "xor", NULL };
static char * _php_vars[]= { "$", NULL }; 
static char * _php_helpers[] = { "man %s", NULL };

/* plain text, no decorations */

static char * _plain_magics_1[] = { "-*- Mode: plain", NULL };



struct mps_synhi _mps_synhi[]=
{
	/* None */
	{ "Plain", "plain", NULL, NULL, NULL, NULL,
		_plain_magics_1, NULL, NULL, NULL, NULL, NULL,
		"", 0, 0 },

	/* HTML */
	{ "HTML", "html", "{&\"'", "};\"'", "!--", "--",
		_html_magics_1, _html_magics_2, _html_exts,
		_html_tokens, NULL, NULL, "/<>", 0, 1 },

	/* XML/SGML */
	{ "XML/SGML", "xml", "<&\"'", ">;\"'", "!--", "--",
		_xml_magics_1, NULL, _xml_exts, NULL, NULL, NULL, NULL, 1, 1 },

	/* C/C++: usual quotes */
	{ "C/C++", "c", "\"'", "\"'", "/*", "*/",
		_c_magics_1, _c_magics_2, _c_exts, _c_tokens,
		_c_vars, _c_helpers, NULL, 1, 1  },

	/* Perl: usual quotes + backticks */
	{ "Perl", "perl", "\"'`", "\"'`", "#", NULL,
		_perl_magics_1, NULL, _perl_exts, _perl_tokens,
		_perl_vars, _perl_helpers, NULL, 1, 1 },

	/* Man pages: use tokens as magic */
	{ "Man Page", "man", NULL, NULL, NULL, NULL,
		NULL, _man_tokens, _man_exts, _man_tokens,
		NULL, _man_helpers, NULL, 1, 0 },

	/* RFC822 mail message */
	{ "RFC822 Mail", "mail", "<\"", ">\"", ">", NULL,
		NULL, _rfc822_magics_2, NULL, _rfc822_tokens, NULL,
		NULL, NULL, 0, 0 },

	/* Shell: usual quotes */
	/* Shell syntax hilighter by Sergey P. Vazulia */
	{ "Shell", "sh", "\"'`{([", "\"'`})]", "#", NULL,
		_sh_magics_1, NULL, _sh_exts, _sh_tokens, _sh_vars,
		_sh_helpers, NULL, 1, 0 },

	/* Ruby: usual quotes + backticks */
	/* Ruby syntax hilighter by Gabriel Emerson */
	{ "Ruby", "ruby", "\"'`", "\"'`", "#", NULL,
		_ruby_magics_1, NULL, _ruby_exts, _ruby_tokens,
		_ruby_vars, _ruby_helpers, NULL, 1, 0 },

	/* PHP: usual quotes + backticks */
	/* PHP syntax hilighter by Geoff Youngs */
	{ "PHP", "php", "\"'`", "\"'`", "/*", "*/",
		_php_magics_1, _php_magics_2, _php_exts, _php_tokens,
		_php_vars, _php_helpers, NULL, 1, 0  },

	/* ... yours here ... */

	/* End of syntax hilighters */
	{ NULL }
};


/* quoting flag */
int _draw_quoting=0;

/* in comment flag */
int _in_comment=0;

/* override */
int _override_synhi=0;

/* local tokens */
char * _local_tokens[MAX_LOCAL_TOKENS];
int _num_local_tokens=0;
int _token_reset=1;


/******************
	Code
*******************/

/**
 * _wrd_cmp - qsort compare function
 * @s1: first string
 * @s2: second string
 *
 * Compare function (qsort and bsearch) to search a word.
 * Returns -1, 0, or 1. Internal (do not use).
 */
static int _wrd_cmp(const void * s1, const void * s2)
{
	struct mps_wsynhi * w1, * w2;

	w1=(struct mps_wsynhi *) s1;
	w2=(struct mps_wsynhi *) s2;

	if(w1->word==NULL || w2->word==NULL) return(0);

	return(strcmp(w1->word, w2->word));
}


/**
 * _wrd_icmp - qsort compare function
 * @s1: first string
 * @s2: second string
 *
 * Compare function (qsort and bsearch) to search a word.
 * Case insensitive version.
 * Returns -1, 0, or 1. Internal (do not use).
 */
static int _wrd_icmp(const void * s1, const void * s2)
{
	struct mps_wsynhi * w1, * w2;

	w1=(struct mps_wsynhi *) s1;
	w2=(struct mps_wsynhi *) s2;

	if(w1->word==NULL || w2->word==NULL) return(0);

	return(mpv_strcasecmp(w1->word, w2->word));
}


/**
 * _local_cmp - qsort compare function
 * @s1: first string
 * @s2: second string
 *
 * Compare function (qsort and bsearch) to search a local token.
 * Returns -1, 0, or 1. Internal (do not use).
 */
static int _local_cmp(const void * s1, const void * s2)
{
	char ** w1, ** w2;

	w1=(char **)s1;
	w2=(char **)s2;

	return(strcmp(*w1,*w2));
}


/**
 * mps_is_sep - separator test
 * @c: character to test
 * @synhi: syntax highlighter index
 *
 * Tests if c is a character separator, using mp_is_sep()
 * first and then the local syntax hilighter separator.
 * Returns 1 if it is.
 */
int mps_is_sep(char c, int synhi)
{
	if(mp_is_sep(c)) return(1);
	if(synhi==0) return(0);

	--synhi;
	if(_mps_synhi[synhi].seps==NULL)
		return(0);

	if(strchr(_mps_synhi[synhi].seps, c)!=NULL)
		return(1);

	return(0);
}


/**
 * mps_auto_synhi - Autodetects syntax hilight mode
 * @txt: text to inspect
 *
 * Tries to detect the type of the document in txt by
 * the file extension and the content in the first
 * lines. If a type is matched, internal synhi index
 * txt->synhi is set.
 */
void mps_auto_synhi(mp_txt * txt)
{
	int n,c;
	mp_txt * ttxt;
	mp_txt * wtxt;
	char * ext;
	char ** ptr;

	if(txt->synhi) return;

	/* overriding mode? */
	if(_override_synhi)
	{
		txt->synhi=_override_synhi;
		return;
	}

	/* nothing by now */
	txt->synhi=0;

	/* transfer some bytes */
	ttxt=mp_get_tmp_txt(txt);
	mp_move_bof(ttxt);
	wtxt=mp_create_sys_txt(NULL);

	for(n=0;n < 1000;n++)
	{
		c=mp_get_char(ttxt);
		mp_put_char(wtxt,c,1);
	}

	mp_end_tmp_txt();

	/* test magic_1 first */
	for(n=0;_mps_synhi[n].type!=NULL && txt->synhi==0;n++)
	{
		for(ptr=_mps_synhi[n].magic_1;ptr!=NULL && *ptr!=NULL;ptr++)
		{
			mp_move_bof(wtxt);

			if(mp_seek(wtxt,*ptr))
			{
				txt->synhi=n+1;
				mp_delete_sys_txt(wtxt);
				return;
			}
		}
	}

	/* test extensions next */
	if((ext=strrchr(txt->name,'.'))!=NULL)
	{
		for(n=0;_mps_synhi[n].type!=NULL;n++)
		{
			for(ptr=_mps_synhi[n].exts;
				ptr!=NULL && *ptr!=NULL;ptr++)
			{
				if(strcmp(*ptr,ext)==0)
				{
					txt->synhi=n+1;
					mp_delete_sys_txt(wtxt);
					return;
				}
			}
		}
	}

	/* try desperately magic_2 */
	for(n=0;_mps_synhi[n].type!=NULL && txt->synhi==0;n++)
	{
		for(ptr=_mps_synhi[n].magic_2;ptr!=NULL && *ptr!=NULL;ptr++)
		{
			mp_move_bof(wtxt);

			if(mp_seek(wtxt,*ptr))
			{
				txt->synhi=n+1;
				break;
			}
		}
	}

	mp_delete_sys_txt(wtxt);
}


/**
 * mps_word_color - returns the color associated to the word
 * @synhi: syntax hilighter index
 * @word: the word to search
 * @line: the line being drawn
 *
 * Returns the color associated to the word, or MP_COLOR_NORMAL
 * if the word has nothing special.
 */
int mps_word_color(int synhi, char * word, int line)
{
	int n;
	struct mps_wsynhi wd;
	struct mps_wsynhi * w;

	/* if text hasn't (yet?) a syntax highlighter,
	   just return normal color */
	if(synhi==0) return(MP_COLOR_NORMAL);

	synhi--;

	/* test if inside comments */
	if(_in_comment)
	{
		if(_mps_synhi[synhi].c_end!=NULL)
		{
			char * ptr;

			/* test if this word ends with the comment end */
			if((ptr=strstr(word,_mps_synhi[synhi].c_end))!=NULL)
				if(strcmp(ptr,_mps_synhi[synhi].c_end)==0)
					_in_comment=0;

			return(MP_COLOR_COMMENT);
		}
		else
		{
			/* c_end is NULL; test if line is different */
			if(line+1 != _in_comment)
				_in_comment=0;
			else
				return(MP_COLOR_COMMENT);
		}
	}

	/* is this a new start of comment? */
	if(! _draw_quoting && _mps_synhi[synhi].c_start!=NULL)
	{
		if(strncmp(word,_mps_synhi[synhi].c_start,
			strlen(_mps_synhi[synhi].c_start))==0)
		{
			_in_comment=line+1;
			return(MP_COLOR_COMMENT);
		}
	}

	/* are numbers treated as strings (literals)? */
	if(_mps_synhi[synhi].numbers)
	{
		if(word[0]=='-' || (word[0]>='0' && word[0]<='9'))
			return(MP_COLOR_STRING);
	}

	/* test if word is a local token */
	if(bsearch(&word,_local_tokens,_num_local_tokens,
		sizeof(char *),_local_cmp)!=NULL)
			return(MP_COLOR_LOCAL);

	/* if case insensitive and word is all caps... */
	if(_mps_synhi[synhi].casesig)
	{
		for(n=0;word[n];n++)
		{
			if(word[n]=='_')
				continue;
			if(word[n]>='0' && word[n]<='9')
				continue;

			if(word[n]<'A' || word[n]>'Z')
				break;
		}

		if(!word[n]) return(MP_COLOR_CAPS);
	}

	if(_mps_synhi[synhi].wi==0)
		return(MP_COLOR_NORMAL);

	wd.word=word;

	/* test if special word */
	w=bsearch(&wd,_mps_synhi[synhi].w,
		_mps_synhi[synhi].wi,
		sizeof(struct mps_wsynhi),
		_mps_synhi[synhi].casesig ? _wrd_cmp : _wrd_icmp);

	if(w!=NULL)
		return(w->color);

	return(MP_COLOR_NORMAL);
}


/**
 * mps_quoting - Test if we are inside quotes
 * @c: character to test
 * @color: color previously calculated
 * @synhi: syntax hilighter index
 *
 * If current text is between quotes, returns the
 * quoting color.
 */
int mps_quoting(int c, int color, int synhi)
{
	char * ptr1;
	char * ptr2;
	static char _prev_char=' ';
	
	if(synhi==0) return(color);
	if(_in_comment) return(color);

	synhi--;
	if((ptr1=_mps_synhi[synhi].q_start)==NULL) return(color);
	if((ptr2=_mps_synhi[synhi].q_end)==NULL) return(color);

	if(_draw_quoting)
	{
		if(c==_draw_quoting && _prev_char!='\\')
			_draw_quoting='\0';

		if(_prev_char=='\\' && c=='\\')
			_prev_char=' ';
		else
			_prev_char=c;

		color=MP_COLOR_STRING;
	}
	else
	{
		while(*ptr1)
		{
			if(c==*ptr1)
			{
				_draw_quoting=*ptr2;
				color=MP_COLOR_STRING;
				_prev_char=' ';
				break;
			}
			ptr1++;
			ptr2++;
		}
	}

	return(color);
}


/**
 * mps_set_override_mode - Forces the syntax hilight mode
 * @mode: mode name to set
 *
 * Forces the syntax hilight to be the one
 * named as mode. If mode is not found, 0 is returned.
 */
int mps_set_override_mode(char * mode)
{
	int n;

	for(n=0;_mps_synhi[n].type!=NULL;n++)
	{
		if(strcmp(_mps_synhi[n].mode,mode)==0)
		{
			_override_synhi=n+1;
			return(1);
		}
	}

	return(0);
}


/**
 * mps_enumerate_modes - Returns the available synhi modes
 *
 * Returns a pointer to a static buffer containing the names,
 * concatenated by spaces, of the available syntax hilighters.
 */
char * mps_enumerate_modes(void)
{
	static char modes[1024];
	int n;

	/* buffer overflow would be tested */

	modes[0]='\0';
	for(n=0;_mps_synhi[n].type!=NULL;n++)
	{
		strcat(modes,_mps_synhi[n].mode);
		strcat(modes," ");
	}

	return(modes);
}


/**
 * mps_add_local_token - Adds a local token
 * @token: the token
 *
 * Adds a @token to the local database. This database
 * is sorted and searched to be highlighted with the
 * MP_COLOR_LOCAL color. Called when reading the
 * 'tags' file. If @token is NULL, it's taken as the
 * end of local tokens (and the next calling to this
 * function resets the database to start again).
 */
void mps_add_local_token(char * token)
{
	if(token==NULL)
	{
		qsort(_local_tokens, _num_local_tokens,
			sizeof(char *), _local_cmp);

		_token_reset=1;

		return;
	}

	if(_token_reset)
	{
		_num_local_tokens=0;
		_token_reset=0;
	}

	if(_num_local_tokens==MAX_LOCAL_TOKENS)
		return;

	if(_local_tokens[_num_local_tokens]!=NULL)
		free(_local_tokens[_num_local_tokens]);

	_local_tokens[_num_local_tokens]=(char *)malloc(strlen(token)+1);
	strcpy(_local_tokens[_num_local_tokens],token);

	_num_local_tokens++;
}


/**
 * mps_startup - Syntax hilight engine startup
 *
 * Initializes the syntax highlighting engine.
 */
void mps_startup(void)
{
	int n,m;
	char ** ptr;
	struct mps_synhi * s;

	for(n=0;_mps_synhi[n].type!=NULL;n++)
	{
		m=0;

		s=&_mps_synhi[n];

		for(ptr=s->tokens;ptr!=NULL &&
			*ptr!=NULL;ptr++)
		{
			s->w[m].word=*ptr;
			s->w[m].color=MP_COLOR_TOKEN;

			if(++m == MAX_WORDS_PER_SYNHI)
			{
				printf("Too much synhi words. Please increment ");
				printf("MAX_WORDS_PER_SYNHI in mp_synhi.h and recompile.\n");
				exit(0);
			}
		}
		for(ptr=s->vars;
			ptr!=NULL && *ptr!=NULL;ptr++)
		{
			s->w[m].word=*ptr;
			s->w[m].color=MP_COLOR_VAR;

			if(++m == MAX_WORDS_PER_SYNHI)
			{
				printf("Too much synhi words. Please increment ");
				printf("MAX_WORDS_PER_SYNHI in mp_synhi.h and recompile.\n");
				exit(0);
			}
		}

		s->wi=m;

		qsort(s->w, s->wi, sizeof(struct mps_wsynhi), _wrd_cmp);
	}

	memset(_local_tokens,'\0',sizeof(_local_tokens));
}
