/***************************************************************************
** Although considerable effort has been expended to make this software   **
** correct and reliable, no warranty is implied; the author disclaims any **
** obligation or liability for damages, including but not limited to      **
** special, indirect, or consequential damages arising out of or in       **
** connection with the use or performance of this software.               **
***************************************************************************/

/*
 *	This file contains routines for creating, deleting, adding
 *	entries to, and looking up entries in a symbol table.
 */

#include "debug.h"
#include "types.h"

/*
 *
 *	The symbol table is implemented as a trie. This is a special
 *	kind of M-tree in which nodes represent specific strings; the
 *	path to each node forms all prefixes of each string. As an
 *	example:
 *
 *				  ROOT
 *			A			B
 *		    D   L   S		    E   I   Y
 *		   D   L M		   N T  N
 *
 *	can describe the words "add", "all", "alm", "as", "be", "ben",
 *	"bet", "bin" and "by". The non-words "ad", "al" and "bi" could
 *	also be represented. In this example, each node represents one
 *	character. Since we are not limiting ourselves to the letters
 *	of the alphabet or any other subset of the ASCII character set,
 *	this technique consumes an enormous amount of memory when
 *	implemented in its simplest form, with a vector of 256 nodes
 *	existing at each defined level so that a particular character
 *	can be located with a simple array index.
 *
 *	To save space, but slow down the search process, each node in the
 *	trie can contain one or more characters, and the nodes at the same
 *	level are connected by a doubly-linked list, pointing to the node's
 *	left and right siblings. Each node has a child pointer, which
 *	defines the next path to follow assuming all characters up to the
 *	current node have matched.
 *
 *	To speed things up a little bit, the first level of the tree,
 *	corresponding to the first byte of the symbol name, is set up
 *	as a vector, indexed by that first byte. The trie should, for
 *	reasonably well behaved name spaces, thin out rapidly beyond
 *	one or two levels. However, the vector approach can only be
 *	used at the first level, where 256 nodes are required (another
 *	level would require 65536 nodes, which is too much memory).
 *
 *	Each node in the trie has a value which is supplied by the caller.
 *	It is used to identify the node or to point to some other data
 *	structure that provides additional node information. The value
 *	occupies one longword; values of zero are not permitted, since
 *	a value of zero is used to indicate a node that does not correspond
 *	to a valid symbol (but rather a prefix of a valid symbol).
 *
 *	Special "limited" versions of each routine are provided that start
 *	their scan at the vector entry defined by the 'First' parameter.
 *	This might have some utility where an application uses the first
 *	character of the name to partition the name space into (up to) 256
 *	separate name spaces, avoiding the relatively large overhead in
 *	creating separate symbol tables for each name space (for example,
 *	structure member names in the "C" programming language).
 *	This is included only because this particular symbol table
 *	implementation permits it.
 */

struct Trie_Node {
	struct Trie_Node *Child;
	struct Trie_Node *Left;
	struct Trie_Node *Right;
	unsigned long Node_Value;
	unsigned short Node_Key_Length;
	unsigned char Node_Key[];	/* First two are "free" */
};

struct Trie_Header {
	unsigned long Total_Nodes;
	unsigned long Used_Nodes;
	struct {
		struct Trie_Node *Root_Node;
		unsigned long Node_Value;
	} Node_Vector[256];
};

/*
 *	Routine Create_Symbol_Table creates and initializes a new symbol
 *	table trie:
 */

struct Trie_Header *Create_Symbol_Table ()
{
	auto   struct Trie_Header *Trie_Ptr;
	auto   unsigned int Index;
	extern char *Mem_Alloc();

	Trie_Ptr = (struct Trie_Header *) Mem_Alloc (sizeof (struct Trie_Header));
	for (Index = 0; Index < 256; Index++) {
		Trie_Ptr->Node_Vector[Index].Root_Node = 0;
		Trie_Ptr->Node_Vector[Index].Node_Value = 0;
	}
	Trie_Ptr->Total_Nodes = 256;
	Trie_Ptr->Used_Nodes = 0;
	return (Trie_Ptr);
}

/*
 *	Routine Lookup_Symbol locates an entry in the trie and returns its
 *	value. A value of zero is returned if symbol table name is not
 *	found.
 */

unsigned long Lookup_Symbol (Name, Name_Length, Trie_Ptr)
unsigned char *Name;
unsigned short Name_Length;
struct Trie_Header *Trie_Ptr;
{
	auto   unsigned long Value;
	extern unsigned long Lookup_Limited();

	if (Name_Length == 0)	/* This better not happen */
		Value = 0;
	else if (Name_Length == 1)
		Value = Trie_Ptr->Node_Vector[*Name].Node_Value;
	else
		Value = Lookup_Limited (*Name, &Name[1], Name_Length-1, Trie_Ptr);
	return (Value);
}

unsigned long Lookup_Limited (First, Name, Name_Length, Trie_Ptr)
unsigned char First, *Name;
unsigned short Name_Length;
struct Trie_Header *Trie_Ptr;
{
	auto   struct Trie_Node *Node_Ptr;
	extern struct Trie_Node *Lookup_Trie_Node();

	Node_Ptr = Lookup_Trie_Node (Name, Name_Length, Trie_Ptr->Node_Vector[First].Root_Node);
	return ((Node_Ptr == 0) ? 0 : Node_Ptr->Node_Value);
}

/*
 *	Routine Delete_Symbol locates an entry in the trie and sets its
 *	value to zero.
 */

Delete_Symbol (Name, Name_Length, Trie_Ptr)
unsigned char *Name;
unsigned short Name_Length;
struct Trie_Header *Trie_Ptr;
{
	if (Name_Length == 0)
		;
	else if (Name_Length == 1)
		Trie_Ptr->Node_Vector[*Name].Node_Value = 0;
	else
		Delete_Limited (*Name, &Name[1], Name_Length-1, Trie_Ptr);
}

Delete_Limited (First, Name, Name_Length, Trie_Ptr)
unsigned char First, *Name;
unsigned short Name_Length;
struct Trie_Header *Trie_Ptr;
{
	auto   struct Trie_Node *Node_Ptr;
	extern struct Trie_Node *Lookup_Trie_Node();

	Node_Ptr = Lookup_Trie_Node (Name, Name_Length, Trie_Ptr->Node_Vector[First].Root_Node);
	if (Node_Ptr != 0)
		Node_Ptr->Node_Value = 0;
}

struct Trie_Node *Lookup_Trie_Node (Name, Name_Length, Root)
unsigned char *Name;
unsigned short Name_Length;
struct Trie_Node *Root;
{
	auto   struct Trie_Node *Node_Ptr;
	auto   unsigned long Indicator;
	auto   unsigned short Count;
	extern unsigned long Compare_Trie_Key();

	if ((Node_Ptr = Root) == 0)
		return (0);
loop:	if ((Indicator = Compare_Trie_Key (Name, Name_Length, Node_Ptr)) < 65536)
		;
	else if ((Count = Indicator & 0xFFFF) != 0) {
		if (Count == Node_Ptr->Node_Key_Length)
			Node_Ptr = Lookup_Trie_Node (&Name[Count], Name_Length-Count, Node_Ptr->Child);
		else
			Node_Ptr = 0;
	} else if ((Indicator & 0x00010000) != 0 || Node_Ptr->Right == 0)
		Node_Ptr = 0;
	else {
		Node_Ptr = Node_Ptr->Right;
		goto loop;
	}
	return (Node_Ptr);
}

/*
 *	Routine Install_Symbol installs a symbol into the trie. If the
 *	symbol is already present, the current value is returned;
 *	otherwise the new value is returned.
 */

unsigned long Install_Symbol (Name, Name_Length, Value, Trie_Ptr)
unsigned char *Name;
unsigned short Name_Length;
unsigned long Value;
struct Trie_Header *Trie_Ptr;
{
	auto   unsigned long N_Value, *Ptr;
	extern unsigned long Install_Limited();

#ifdef DEBUG_SYMTAB
	Show_Name (Name, Name_Length);
#endif
	if (Name_Length == 0)		/* Should never happen */
		N_Value = 0;
	else if (Name_Length == 1) {
		Ptr = &Trie_Ptr->Node_Vector[*Name].Node_Value;
		if (*Ptr == 0) {
			*Ptr = Value;
			Trie_Ptr->Used_Nodes++;
		}
		N_Value = *Ptr;
	} else
		N_Value = Install_Limited (*Name, &Name[1], Name_Length-1, Value, Trie_Ptr);
#ifdef DEBUG_SYMTAB
	printf ("\n");
#endif
	return (N_Value);
}

unsigned long Install_Limited (First, Name, Name_Length, Value, Trie_Ptr)
unsigned char First, *Name;
unsigned short Name_Length;
unsigned long Value;
struct Trie_Header *Trie_Ptr;
{
	auto   struct Trie_Node *Node_Ptr;
	extern struct Trie_Node *Create_Trie_Node();

	Node_Ptr = Create_Trie_Node (Name, Name_Length, &Trie_Ptr->Node_Vector[First].Root_Node, Trie_Ptr);
	if (Node_Ptr->Node_Value == 0) {
		Node_Ptr->Node_Value = Value;
		Trie_Ptr->Used_Nodes++;
	}
	return (Node_Ptr->Node_Value);
}

struct Trie_Node *Create_Trie_Node (Name, Name_Length, Parent, Trie_Ptr)
unsigned char *Name;
unsigned short Name_Length;
struct Trie_Node **Parent;
struct Trie_Header *Trie_Ptr;
{
	auto   struct Trie_Node *Node_Ptr;
	auto   unsigned long Indicator;
	auto   unsigned short Count;
	extern struct Trie_Node *Split_Trie_Node(), *Link_Trie_Sibling();
	extern struct Trie_Node *Make_Trie_Node();
	extern unsigned long Compare_Trie_Key();

	if ((Node_Ptr = *Parent) == 0) {	/* End of the line */
		*Parent = Node_Ptr = Make_Trie_Node (Name, Name_Length);
		Trie_Ptr->Total_Nodes++;
		return (Node_Ptr);
	}
loop:	if ((Indicator = Compare_Trie_Key (Name, Name_Length, Node_Ptr)) < 65536) /* Exact */
		;
	else if ((Count = Indicator & 0xFFFF) != 0) {	/* Some match, but not all */
		if (Count < Node_Ptr->Node_Key_Length) {
			Split_Trie_Node (Node_Ptr, Count);
			Trie_Ptr->Total_Nodes++;
		}
		if (Count < Name_Length)
			Node_Ptr = Create_Trie_Node (&Name[Count], Name_Length-Count,
						     &Node_Ptr->Child, Trie_Ptr);
	} else if ((Indicator & 0x00010000) != 0) {	/* Name < Key */
		Node_Ptr = Link_Trie_Sibling (Name, Name_Length, Node_Ptr, 0);
		Trie_Ptr->Total_Nodes++;
		if (Node_Ptr->Left == 0)	/* Left-most entry */
			*Parent = Node_Ptr;
	} else if (Node_Ptr->Right == 0) {
		Node_Ptr = Link_Trie_Sibling (Name, Name_Length, Node_Ptr, 1);
		Trie_Ptr->Total_Nodes++;
	} else {
		Node_Ptr = Node_Ptr->Right;
		goto loop;
	}
	return (Node_Ptr);
}

/*
 *	Routine Compare_Trie_Key compares an input name with a trie
 *	node name. If any character are equal, the number of equal
 *	characters is returned in the low order 16 bits of the result.
 *	Less than is indicated by bit 16 being on, greater than
 *	indicated by bit 17 being on.
 */

unsigned long Compare_Trie_Key (Name, Name_Length, Node_Ptr)
unsigned char *Name;
unsigned short Name_Length;
struct Trie_Node *Node_Ptr;
{
	auto   unsigned char *Ptr1, *Ptr2;
	auto   unsigned long Count;
	auto   unsigned short L1, L2;

	Count = 0;
	Ptr2 = &Node_Ptr->Node_Key[0];
	L2 = Node_Ptr->Node_Key_Length;
	Ptr1 = Name;
	L1 = Name_Length;
	for (; L1 > 0 && L2 > 0 && *Ptr1 == *Ptr2; L2--, L1--) {
		Ptr1++;
		Ptr2++;
		Count++;
	}
	if (L1 < L2) {
		if (L1 == 0 || *Ptr1 < *Ptr2)
			Count |= 0x00010000;
		else
			Count |= 0x00020000;
	} else if (L1 > L2) {
		if (L2 == 0 || *Ptr1 > *Ptr2)
			Count |= 0x00020000;
		else
			Count |= 0x00010000;
	} else if (L1 > 0) {
		if (*Ptr1 < *Ptr2)
			Count |= 0x00010000;
		else
			Count |= 0x00020000;
	}
	return (Count);
}

/*
 *	Routine Split_Trie_Node splits a node into two separate nodes,
 *	the first consisting of a common prefix, the second has the
 *	remainder of the original.
 *
 *	Note that the unused part of the original name is left intact
 *	in the original node; the worth of dissolving the node and
 *	creating a new, smaller one, is dubious at best.
 */

struct Trie_Node *Split_Trie_Node (Node_Ptr, Prefix_Length)
struct Trie_Node *Node_Ptr;
unsigned short Prefix_Length;
{
	auto   struct Trie_Node *New_Node_Ptr;
	auto   unsigned char *Ptr1, *Ptr2;
	auto   unsigned short Count;
	extern char *Mem_Alloc();

	Count = Node_Ptr->Node_Key_Length - Prefix_Length;
	New_Node_Ptr = (struct Trie_Node *) Mem_Alloc (sizeof (struct Trie_Node) + Count);
	New_Node_Ptr->Node_Value = Node_Ptr->Node_Value;
	Node_Ptr->Node_Value = 0;
	New_Node_Ptr->Child = Node_Ptr->Child;
	Node_Ptr->Child = New_Node_Ptr;
	New_Node_Ptr->Left = 0;
	New_Node_Ptr->Right = 0;
	Node_Ptr->Node_Key_Length = Prefix_Length;
	New_Node_Ptr->Node_Key_Length = Count;
	Ptr1 = &Node_Ptr->Node_Key[Prefix_Length];
	Ptr2 = &New_Node_Ptr->Node_Key[0];
	for (; Count > 0; Count--)
		*Ptr2++ = *Ptr1++;
#ifdef DEBUG_SYMTAB
	printf (" split "); Show_Trie_Node (Node_Ptr); Show_Trie_Node (New_Node_Ptr);
#endif
	return (New_Node_Ptr);
}

/*
 *	Routine Link_Trie_Sibling links a new entry into a sibling
 *	list in the trie.
 */

struct Trie_Node *Link_Trie_Sibling (Name, Name_Length, Node_Ptr, Left_Right)
unsigned char *Name;
unsigned short Name_Length;
struct Trie_Node *Node_Ptr;
int Left_Right;
{
	auto   struct Trie_Node *New_Node_Ptr, *Temp_Node_Ptr;
	auto   unsigned char *Ptr1, *Ptr2;
	auto   unsigned short Count;
	extern char *Mem_Alloc();

	Count = Name_Length;
	New_Node_Ptr = (struct Trie_Node *) Mem_Alloc (sizeof (struct Trie_Node) + Count);
	New_Node_Ptr->Node_Value = 0;
	New_Node_Ptr->Child = 0;
	New_Node_Ptr->Node_Key_Length = Count;
	Ptr1 = Name;
	Ptr2 = &New_Node_Ptr->Node_Key[0];
	for (; Count > 0; Count--)
		*Ptr2++ = *Ptr1++;
	if (Left_Right == 0) {		/* Link left */
		if ((New_Node_Ptr->Left = Temp_Node_Ptr = Node_Ptr->Left) != 0)
			Temp_Node_Ptr->Right = New_Node_Ptr;
		New_Node_Ptr->Right = Node_Ptr;
		Node_Ptr->Left = New_Node_Ptr;
	} else {			/* Link right */
		if ((New_Node_Ptr->Right = Temp_Node_Ptr = Node_Ptr->Right) != 0)
			Temp_Node_Ptr->Left = New_Node_Ptr;
		New_Node_Ptr->Left = Node_Ptr;
		Node_Ptr->Right = New_Node_Ptr;
	}
#ifdef DEBUG_SYMTAB
	printf (" link %s ", (Left_Right == 0) ? "left" : "right"); Show_Trie_Node (New_Node_Ptr);
#endif
	return (New_Node_Ptr);
}

/*
 *	Routine Make_Trie_Node creates a single child node.
 */

struct Trie_Node *Make_Trie_Node (Name, Name_Length)
unsigned char *Name;
unsigned short Name_Length;
{
	auto   struct Trie_Node *New_Node_Ptr;
	auto   unsigned char *Ptr1, *Ptr2;
	auto   unsigned short Count;
	extern char *Mem_Alloc();

	Count = Name_Length;
	New_Node_Ptr = (struct Trie_Node *) Mem_Alloc (sizeof (struct Trie_Node) + Count);
	New_Node_Ptr->Child = 0;
	New_Node_Ptr->Left = 0;
	New_Node_Ptr->Right = 0;
	New_Node_Ptr->Node_Value = 0;
	New_Node_Ptr->Node_Key_Length = Count;
	Ptr1 = Name;
	Ptr2 = &New_Node_Ptr->Node_Key[0];
	for (; Count > 0; Count--)
		*Ptr2++ = *Ptr1++;
#ifdef DEBUG_SYMTAB
	printf (" make "); Show_Trie_Node (New_Node_Ptr);
#endif
	return (New_Node_Ptr);
}

/*
 *	Routine Scan_Symbol_Table recursively scans the symbol table
 *	and calls a user routine for each symbol in the table.
 *
 *	The routine that is called must return an int value: zero if
 *	the scan is to be stopped; non-zero if the scan is to continue.
 *	the final value of this condition is returned to the caller.
 *	Therefore, if the return value is 0, the scan was stopped by
 *	the caller's routine; otherwise it was not.
 *
 *	If the scratch 'Name' string provided is not long enough for
 *	a symbol, the symbol name will be truncated, possibly resulting
 *	in duplicate names being output.
 */

int Scan_Symbol_Table (Name, Max_Name_Length, Func, Arg, Trie_Ptr)
unsigned char *Name;
unsigned short Max_Name_Length;
int (*Func)();
unsigned long Arg;
struct Trie_Header *Trie_Ptr;
{
	auto   unsigned int Index;
	auto   int Go;
	extern int Scan_Trie_Node();

	Go = 1;
	for (Index = 0; Index < 256 && Go != 0; Index++) {
		*Name = (unsigned char) Index;
		if (Trie_Ptr->Node_Vector[Index].Node_Value != 0)
			Go = (*Func) (Name, 1, Trie_Ptr->Node_Vector[Index].Node_Value, Arg);
		if (Go != 0)
			Go = Scan_Trie_Node (Name, 1, Max_Name_Length,
					     Trie_Ptr->Node_Vector[Index].Root_Node, Func, Arg);
	}
	return (Go);
}

/*
 *	Scan_Limited is similar, but scans only one portion of the symbol
 *	table, calling the output routine with only the 2nd through Nth
 *	characters of each complete symbol table name.
 */

int Scan_Limited (First, Name, Max_Name_Length, Func, Arg, Trie_Ptr)
unsigned char First, *Name;
unsigned short Max_Name_Length;
int (*Func)();
unsigned long Arg;
struct Trie_Header *Trie_Ptr;
{
	extern int Scan_Trie_Node();

	return (Scan_Trie_Node (Name, 0, Max_Name_Length, Trie_Ptr->Node_Vector[First].Root_Node, Func, Arg));
}

int Scan_Trie_Node (Name, Name_Length, Max_Name_Length, Root, Func, Arg)
unsigned char *Name;
unsigned short Name_Length, Max_Name_Length;
struct Trie_Node *Root;
int (*Func)();
unsigned long Arg;
{
	auto   struct Trie_Node *Node_Ptr;
	auto   unsigned char *Ptr1, *Ptr2;
	auto   int Go;
	auto   unsigned short New_Length, Count;

	Go = 1;
	for (Node_Ptr = Root; Node_Ptr != 0 && Go != 0; Node_Ptr = Node_Ptr->Right) {
		New_Length = Name_Length;
		Ptr1 = &Node_Ptr->Node_Key[0];
		Ptr2 = &Name[New_Length];
		for (Count = Node_Ptr->Node_Key_Length; Count > 0 && New_Length < Max_Name_Length; Count--) {
			*Ptr2++ = *Ptr1++;
			New_Length++;
		}
		if (Node_Ptr->Node_Value != 0)
			Go = (*Func) (Name, New_Length, Node_Ptr->Node_Value, Arg);
		if (Go != 0)
			Go = Scan_Trie_Node (Name, New_Length, Max_Name_Length, Node_Ptr->Child, Func, Arg);
	}
	return (Go);
}

/*
 *	Routine Dissolve_Symbol_Table releases all storage occupied by
 *	the symbol table, using a depth-first traversal.
 */

Dissolve_Symbol_Table (Trie_Ptr)
struct Trie_Header *Trie_Ptr;
{
	auto   unsigned int Index;

	for (Index = 0; Index < 256; Index++)
		Dissolve_Trie_Node (Trie_Ptr->Node_Vector[Index].Root_Node);
	Mem_Free (Trie_Ptr);
}

Dissolve_Limited (First, Trie_Ptr)
unsigned char First;
struct Trie_Header *Trie_Ptr;
{
	auto   unsigned int Index;

	Index = (unsigned int) First;
	Dissolve_Trie_Node (Trie_Ptr->Node_Vector[Index].Root_Node);
	Trie_Ptr->Node_Vector[Index].Root_Node = 0;
	Trie_Ptr->Node_Vector[Index].Node_Value = 0;
}

Dissolve_Trie_Node (Root)
struct Trie_Node *Root;
{
	auto   struct Trie_Node *Node_Ptr, *Left_Ptr;

	Left_Ptr = Root;
	while ((Node_Ptr = Left_Ptr) != 0) {
		Dissolve_Trie_Node (Node_Ptr->Child);
		Left_Ptr = Node_Ptr->Right;
		Mem_Free (Node_Ptr);
	}
}

Symtab_Occupancy (Trie_Ptr, Used, Total)
struct Trie_Header *Trie_Ptr;
unsigned long *Used, *Total;
{
	*Used = Trie_Ptr->Used_Nodes;
	*Total = Trie_Ptr->Total_Nodes;
}

#ifdef DEBUG_SYMTAB
Show_Trie_Node (Node_Ptr)
struct Trie_Node *Node_Ptr;
{
	Show_Name (Node_Ptr->Node_Key, Node_Ptr->Node_Key_Length);
}

Show_Name (Name, Name_Length)
unsigned char *Name;
unsigned short Name_Length;
{
	auto   unsigned char *Ptr;
	auto   short Count;

	printf ("\"");
	Ptr = Name;
	for (Count = Name_Length; Count > 0; Count--)
		if (*Ptr >= '\040' && *Ptr <= '\176')
			printf ("%c", *Ptr++);
		else
			printf ("\\%03o", *Ptr++);
	printf ("\"");
}
#endif
