CParser Developer documentation

  1. Parts of a structure
  2. Parsenode structure definition
  3. Typereference of parsetree nodes
  4. Example parsetrees

Parts of a structure declaration

a structure/union typedef declaration (T_DECLARATION) consists of four main parts:
  1. a typespecifier (T_TYPESPECIFIER) - either "struct" or "union"
  2. a structure tag (T_IDENTIFIER)
  3. a body with a list of one or more structure declarations (T_STRUCTDECLARATION) enclosed in curly braces and seperated by a ';'
  4. a list of declarators (T_DECLARATOR) for the whole struct/union which
    than can be used in the program to define variables of the new type just introduced
typedef struct _EXAMPLE {
int nField1;
short int nField2, nField3;
} EXAMPLE,*LPEXAMPLE;


a structure declaration has two parts
  1. one or a valid combination of several type specifiers (T_TYPESPECIFIER)
  2. a list of one or more declarators (T_STRUCTDECLARATOR) seperated by a ','
typedef struct _EXAMPLE {
int nField1;
short int nField2, nField3;
} EXAMPLE,*LPEXAMPLE;


one can apply pointers (int *a - T_POINTER), subscripts (int a[5] - T_SUBSCRIPT) or bitfield (int a:8 - T_CONSTANT) modifiers to a declarator
(bitfields are limited to T_STRUCTDECLARATOR's -> declarators inside a structure)


Parsenode structure

typedef struct _PARSENODE {
NODETYPE nType;
union {
struct _PARSENODE *link[3];
char *String;
int Constant;
} un;
} PARSENODE, *LPPARSENODE;

where nType is one of the following:

typedef enum _NODETYPE {
T_VOID=1, T_CHAR, T_WCHAR, T_INT8, T_SHORT, T_INT, T_INT64, T_LONG, T_UNSIGNED,
T_SIGNED, T_FLOAT, T_DOUBLE, T_BOOL, T_STRUCT, T_UNION, T_ENUM, T_TYPENAME,
T_IDENTIFIER, T_STRUCTDECLARATION, T_STRUCTDECLARATOR, T_TYPESPECIFIER, T_SUBSCRIPT,
T_POINTER, T_CONSTANT, T_DECLARATION, T_DECLARATOR
} NODETYPE;


Types of parsetree nodes

the following short versions are used below to express which link element is used:

link1 = PARSENODE.un.link[0]
link2 = PARSENODE.un.link[1]
link3 = PARSENODE.un.link[2]

this is also handled like this in code with the following preprocessor macros:
#define link1 un.link[0]
#define link2 un.link[1]
#define link3 un.link[2]

if a link is ommitted it is always NULL
T_DECLARATION
link1 -> T_TYPESPECIFIER
link2 -> T_DECLARATOR
T_DECLARATOR
link1 -> NULL | T_DECLARATOR (next one in list)
link2 -> T_IDENTIFIER | T_POINTER | T_SUBSCRIPT
T_STRUCT
link1 -> T_STRUCT | T_UNION (without child nodes, to mark struct or union)
link2 -> NULL | T_IDENTIFER (structure tag)
link3 -> NULL | T_STRUCTDECLARATION (member declarations)
T_STRUCTDECLARATION
link1 -> NULL | T_STRUCTDECLARATION (next one in list)
link2 -> T_TYPESPECIFIER
link3 -> NULL (anonymous struct/union) | T_STRUCTDECLARATOR
T_STRUCTDECLARATOR
link1 -> NULL | T_STRUCTDECLARATOR (next e.g.: "struct A {int a, *b, c[10], d; }")
link2 -> T_IDENTIFIER | T_SUBSCRIPT | T_POINTER
link3 -> NULL | T_CONSTANT (for bitfields e.g.: int a:8)
T_TYPESPECIFIER
link1 -> T_VOID | T_CHAR | T_INT8 | T_SHORT | T_INT | T_INT64 | T_LONG | T_UNSIGNED |
T_SIGNED | T_FLOAT | T_DOUBLE |
T_STRUCT | T_UNION | T_ENUM | T_TYPENAME
link2 -> NULL | T_TYPESPECIFIER (next one in list e.g.: "unsigned int")
T_POINTER
link1 -> T_POINTER | T_IDENTIFIER | T_SUBSCRIPT
T_SUBSCRIPT
link1 -> T_SUBSCRIPT | T_IDENTIFIER
link2 -> T_CONSTANT
T_TYPENAME
String = Name of typedef
T_IDENTIFIER
String = Name of identifier
T_CONSTANT
Constant = constant value
T_XX
nType is one of the basic types:
T_VOID | T_CHAR | T_BOOL | T_INT8 | T_SHORT | T_INT | T_INT64 | T_LONG | T_UNSIGNED | T_SIGNED
| T_FLOAT | T_DOUBLE | T_BOOL
link1, link2 & link3 are always NULL

Example parsetrees:


typedef struct _RECT {
 LONG left;
 LONG top;
 LONG right;
 LONG bottom;
} RECT,*LPRECT;

if a link is not given it is assumed to be NULL
would result into the following parsetree:

T_DECLARATION
link1 -> T_TYPESPECIFIER
	link1 -> T_STRUCT
		link1 -> T_STRUCT
		link2 -> T_IDENTIFIER = _RECT
		link3 -> T_STRUCTDECLARATION
			link2 -> T_TYPESPECIFIER
				link1 -> T_TYPENAME = LONG
			link3 -> T_STRUCTDECLARATOR
				link2 -> T_IDENTIFIER = left
			link1 -> T_STRUCTDECLARATION
				link2 -> T_TYPESPECIFIER
					link1 -> T_TYPENAME = LONG
				link3 -> T_STRUCTDECLARATOR
					link2 -> T_IDENTIFIER = top
				link1 -> T_STRUCTDECLARATION
					link2 -> T_TYPESPECIFIER
						link1 -> T_TYPENAME = LONG
					link3 -> T_STRUCTDECLARATOR
						link2 -> T_IDENTIFIER = rigth
					link1 -> T_STRUCTDECLARATION
						link2 -> T_TYPESPECIFIER
							link1 -> T_TYPENAME = LONG
						link3 -> T_STRUCTDECLARATOR
							link2 -> T_IDENTIFIER = bottom
						link1 -> NULL (end of struct declaration list)
link2 -> T_DECLARATOR
	link2 -> T_IDENTIFIER = RECT
	link1 -> T_DECLARATOR
		link2 -> T_POINTER
			link1 -> T_IDENTIFER = LPRECT
		link1 -> NULL (end of declarator list)

a semantical equally structure as above, only typed differently:

typedef struct _RECT {
 LONG left, top, rigth, bottom;
} RECT,*LPRECT;


would result into this parsetree:

T_DECLARATION
link1 -> T_TYPESPECIFIER
	link1 -> T_STRUCT
		link1 -> T_STRUCT
		link2 -> T_IDENTIFIER = _RECT
		link3 -> T_STRUCTDECLARATION
			link2 -> T_TYPESPECIFIER
				link1 -> T_TYPENAME = LONG
			link3 -> T_STRUCTDECLARATOR
				link2 -> T_IDENTIFIER = left
				link1 -> T_STRUCTDECLARATOR
					link2 -> T_IDENTIFER = top
					link1 -> T_STRUCTDECLARATOR
						link2 -> T_IDENTIFIER = rigth
						link1 -> T_STRUCTDECLARATOR
							link2 -> T_IDENTIFIER = bottom
							link1 -> NULL (end of declarator list)
			link1 -> NULL (end of struct declaration list)
link2 -> T_DECLARATOR
	link2 -> T_IDENTIFIER = RECT
	link1 -> T_DECLARATOR
		link2 -> T_POINTER
			link1 -> T_IDENTIFER = LPRECT
		link1 -> NULL (end of declarator list)

a more complex example:

typedef struct _COMPLEX {
 LONG nLongArray[50];
 int nInt, *nIntPointer;
 long nLongBitField:8;
 struct _COMPLEX *nStructPointer;
 union {
  unsigned short int nUShortInt;
  signed int nSignedInt;
 } theUnion;
} COMPLEX,*LPCOMPLEX;

generates this one:

T_DECLARATION
link1 -> T_TYPESPECIFIER
	link1 -> T_STRUCT
		link1 -> T_STRUCT
		link2 -> T_IDENTIFIER = _COMPLEX
		link3 -> T_STRUCTDECLARATION
			link2 -> T_TYPESPECIFIER
				link1 -> T_TYPENAME = LONG
			link3 -> T_STRUCTDECLARATOR
				link2 -> T_SUBSCRIPT
					link1 -> T_IDENTIFIER = nLongArray
					link2 -> T_CONSTANT = 50
			link1 -> T_STRUCTDECLARATION
				link2 -> T_TYPESPECIFIER
					link1 -> T_INT
				link3 -> T_STRUCTDECLARATOR
					link2 -> T_IDENTIFIER = nInt
					link1 -> T_STRUCTDECLARATOR
						link2 -> T_POINTER
							link1 -> T_IDENTIFIER = nIntPointer
						link1 -> NULL
				link1 -> T_STRUCTDECLARATION
					link2 -> T_TYPESPECIFIER
						link1 -> T_LONG
					link3 -> T_STRUCTDECLARATOR
						link2 -> T_IDENTIFIER = nLongBitField
						link3 -> T_CONSTANT = 8
					link1 -> T_STRUCTDECLARATION
						link2 -> T_TYPESPECIFIER
							link1 -> T_STRUCT
								link1 -> T_STRUCT
								link2 -> T_IDENTIFIER = _COMPLEX
						link3 -> T_STRUCTDECLARATOR
								link2 -> T_POINTER
									link1 -> T_IDENTIFIER = nStructPointer
						link1 -> T_STRUCTDECLARATION
							link2 -> T_TYPESPECIFIER
								link1 -> T_STRUCT
									link1 -> T_UNION
									link3 -> T_STRUCTDECLARATION
										link2 -> T_TYPESPECIFIER
											link1 -> T_INT
											link2 -> T_TYPESPECIFIER
												link1 -> T_SHORT
												link2 -> T_TYPESPECIFIER
													link1 -> T_UNSIGNED
													link2 -> NULL
										link3 -> T_STRUCTDECLARATOR
											link2 -> T_IDENTIFIER = nUShortInt
										link1 -> T_STRUCTDECLARATION
											link2 -> T_TYPESPECIFIER
												link1 -> T_INT
												link2 -> T_TYPESPECIFIER
													link1 -> T_SIGNED
													link2 -> NULL
											link3 -> T_STRUCTDECLARATOR
												link2 -> T_IDENTIFIER = nInt
											link1 -> NULL
							link3 -> T_STRUCTDECLARATOR
								link2 -> T_IDENTIFIER = theUnion
							link1 -> NULL
link2 -> T_DECLARATOR
	link2 -> T_IDENTIFIER = COMPLEX
	link1 -> T_DECLARATOR
		link2 -> T_POINTER
			link1 -> T_IDENTIFIER = LPCOMPLEX

the parentheses game:

typedef struct _EXAMPLE4 {
 int *arrayofpointers[50];
 int (*pointertoarray)[50];
} EXAMPLE4;

the first declarator results into an array of pointers (since [] has higher precedene than *)
which would have an overall size of 200 bytes (4 bytes per pointer)

the second defines a single pointer to an array of 50 ints which would only need 4 bytes

T_DECLARATION
link1 -> T_TYPESPECIFIER
	link1 -> T_STRUCT
		link1 -> T_STRUCT
		link2 -> T_IDENTIFIER = _EXAMPLE4
		link3 -> T_STRUCTDECLARATION
			link2 -> T_TYPESPECIFIER
				link1 -> T_INT
			link3 -> T_STRUCTDECLARATOR
				link2 -> T_POINTER
					link1 -> T_SUBSCRIPT
						link1 -> T_IDENTIFIER = arrayofpointers
						link2 -> T_CONSTANT = 50
			link1 -> T_STRUCTDECLARATION
				link2 -> T_TYPESPECIFIER
					link1 -> T_INT
				link3 -> T_STRUCTDECLARATOR
					link2 -> T_SUBSCRIPT
						link1 -> T_POINTER
							link1 -> T_IDENTIFIER = pointertoarray
						link2 -> CONSTANT = 50
link2 -> T_DECLARATOR
	link2 -> T_IDENTIFIER = EXAMPLE4