CParser Developer documentation
- Parts of a structure
- Parsenode structure definition
- Typereference of parsetree nodes
- Example parsetrees
a structure/union typedef declaration (T_DECLARATION) consists of four main parts:
- a typespecifier (T_TYPESPECIFIER) - either "struct" or "union"
- a structure tag (T_IDENTIFIER)
- a body with a list of one or more structure declarations (T_STRUCTDECLARATION)
enclosed in curly braces and seperated by a ';'
- a list of declarators (T_DECLARATOR) for the whole struct/union which
than can be used in the program to define variables of the new type just introduced
typedef struct _EXAMPLE {
int nField1;
short int nField2, nField3;
} EXAMPLE,*LPEXAMPLE;
a structure declaration has two parts
- one or a valid combination of several type specifiers (T_TYPESPECIFIER)
- a list of one or more declarators (T_STRUCTDECLARATOR) seperated by a ','
typedef struct _EXAMPLE {
int nField1;
short int nField2, nField3;
} EXAMPLE,*LPEXAMPLE;
one can apply pointers (int *a - T_POINTER), subscripts (int a[5] - T_SUBSCRIPT) or
bitfield (int a:8 - T_CONSTANT) modifiers to a declarator
(bitfields are limited to T_STRUCTDECLARATOR's -> declarators inside a structure)
typedef struct _PARSENODE {
NODETYPE nType;
union {
struct _PARSENODE *link[3];
char *String;
int Constant;
} un;
} PARSENODE, *LPPARSENODE;
where nType is one of the following:
typedef enum _NODETYPE {
T_VOID=1, T_CHAR, T_WCHAR, T_INT8, T_SHORT, T_INT, T_INT64, T_LONG, T_UNSIGNED,
T_SIGNED, T_FLOAT, T_DOUBLE, T_BOOL, T_STRUCT, T_UNION, T_ENUM, T_TYPENAME,
T_IDENTIFIER, T_STRUCTDECLARATION, T_STRUCTDECLARATOR, T_TYPESPECIFIER, T_SUBSCRIPT,
T_POINTER, T_CONSTANT, T_DECLARATION, T_DECLARATOR
} NODETYPE;
the following short versions are used below to express which link element is used:
link1 = PARSENODE.un.link[0]
link2 = PARSENODE.un.link[1]
link3 = PARSENODE.un.link[2]
this is also handled like this in code with the following preprocessor macros:
#define link1 un.link[0]
#define link2 un.link[1]
#define link3 un.link[2]
if a link is ommitted it is always NULL
- T_DECLARATION
- link1 -> T_TYPESPECIFIER
- link2 -> T_DECLARATOR
- T_DECLARATOR
- link1 -> NULL | T_DECLARATOR (next one in list)
- link2 -> T_IDENTIFIER |
T_POINTER |
T_SUBSCRIPT
- T_STRUCT
- link1 -> T_STRUCT | T_UNION (without child nodes, to mark struct or union)
- link2 -> NULL | T_IDENTIFER (structure tag)
- link3 -> NULL | T_STRUCTDECLARATION (member declarations)
- T_STRUCTDECLARATION
- link1 -> NULL | T_STRUCTDECLARATION (next one in list)
- link2 -> T_TYPESPECIFIER
- link3 -> NULL (anonymous struct/union) | T_STRUCTDECLARATOR
- T_STRUCTDECLARATOR
- link1 -> NULL | T_STRUCTDECLARATOR (next e.g.: "struct A {int a, *b, c[10], d; }")
-
- link2 -> T_IDENTIFIER | T_SUBSCRIPT
| T_POINTER
- link3 -> NULL | T_CONSTANT (for bitfields e.g.: int a:8)
-
- T_TYPESPECIFIER
- link1 -> T_VOID | T_CHAR | T_INT8 | T_SHORT | T_INT | T_INT64 | T_LONG | T_UNSIGNED |
T_SIGNED | T_FLOAT | T_DOUBLE | T_STRUCT | T_UNION | T_ENUM | T_TYPENAME
-
- link2 -> NULL | T_TYPESPECIFIER (next one in list e.g.: "unsigned int")
- T_POINTER
- link1 -> T_POINTER | T_IDENTIFIER | T_SUBSCRIPT
- T_SUBSCRIPT
- link1 -> T_SUBSCRIPT | T_IDENTIFIER
- link2 -> T_CONSTANT
- T_TYPENAME
- String = Name of typedef
- T_IDENTIFIER
- String = Name of identifier
- T_CONSTANT
- Constant = constant value
- T_XX
- nType is one of the basic types:
T_VOID | T_CHAR | T_BOOL | T_INT8 | T_SHORT | T_INT | T_INT64 | T_LONG | T_UNSIGNED | T_SIGNED
| T_FLOAT | T_DOUBLE | T_BOOL
- link1, link2 & link3 are always NULL
-
typedef struct _RECT {
LONG left;
LONG top;
LONG right;
LONG bottom;
} RECT,*LPRECT;
if a link is not given it is assumed to be NULL
would result into the following parsetree:
T_DECLARATION
link1 -> T_TYPESPECIFIER
link1 -> T_STRUCT
link1 -> T_STRUCT
link2 -> T_IDENTIFIER = _RECT
link3 -> T_STRUCTDECLARATION
link2 -> T_TYPESPECIFIER
link1 -> T_TYPENAME = LONG
link3 -> T_STRUCTDECLARATOR
link2 -> T_IDENTIFIER = left
link1 -> T_STRUCTDECLARATION
link2 -> T_TYPESPECIFIER
link1 -> T_TYPENAME = LONG
link3 -> T_STRUCTDECLARATOR
link2 -> T_IDENTIFIER = top
link1 -> T_STRUCTDECLARATION
link2 -> T_TYPESPECIFIER
link1 -> T_TYPENAME = LONG
link3 -> T_STRUCTDECLARATOR
link2 -> T_IDENTIFIER = rigth
link1 -> T_STRUCTDECLARATION
link2 -> T_TYPESPECIFIER
link1 -> T_TYPENAME = LONG
link3 -> T_STRUCTDECLARATOR
link2 -> T_IDENTIFIER = bottom
link1 -> NULL (end of struct declaration list)
link2 -> T_DECLARATOR
link2 -> T_IDENTIFIER = RECT
link1 -> T_DECLARATOR
link2 -> T_POINTER
link1 -> T_IDENTIFER = LPRECT
link1 -> NULL (end of declarator list)
a semantical equally structure as above, only typed differently:
typedef struct _RECT {
LONG left,
top,
rigth,
bottom;
} RECT,*LPRECT;
would result into this parsetree:
T_DECLARATION
link1 -> T_TYPESPECIFIER
link1 -> T_STRUCT
link1 -> T_STRUCT
link2 -> T_IDENTIFIER = _RECT
link3 -> T_STRUCTDECLARATION
link2 -> T_TYPESPECIFIER
link1 -> T_TYPENAME = LONG
link3 -> T_STRUCTDECLARATOR
link2 -> T_IDENTIFIER = left
link1 -> T_STRUCTDECLARATOR
link2 -> T_IDENTIFER = top
link1 -> T_STRUCTDECLARATOR
link2 -> T_IDENTIFIER = rigth
link1 -> T_STRUCTDECLARATOR
link2 -> T_IDENTIFIER = bottom
link1 -> NULL (end of declarator list)
link1 -> NULL (end of struct declaration list)
link2 -> T_DECLARATOR
link2 -> T_IDENTIFIER = RECT
link1 -> T_DECLARATOR
link2 -> T_POINTER
link1 -> T_IDENTIFER = LPRECT
link1 -> NULL (end of declarator list)
a more complex example:
typedef struct _COMPLEX {
LONG nLongArray[50];
int nInt, *nIntPointer;
long nLongBitField:8;
struct _COMPLEX *nStructPointer;
union {
unsigned short int nUShortInt;
signed int nSignedInt;
} theUnion;
} COMPLEX,*LPCOMPLEX;
generates this one:
T_DECLARATION
link1 -> T_TYPESPECIFIER
link1 -> T_STRUCT
link1 -> T_STRUCT
link2 -> T_IDENTIFIER = _COMPLEX
link3 -> T_STRUCTDECLARATION
link2 -> T_TYPESPECIFIER
link1 -> T_TYPENAME = LONG
link3 -> T_STRUCTDECLARATOR
link2 -> T_SUBSCRIPT
link1 -> T_IDENTIFIER = nLongArray
link2 -> T_CONSTANT = 50
link1 -> T_STRUCTDECLARATION
link2 -> T_TYPESPECIFIER
link1 -> T_INT
link3 -> T_STRUCTDECLARATOR
link2 -> T_IDENTIFIER = nInt
link1 -> T_STRUCTDECLARATOR
link2 -> T_POINTER
link1 -> T_IDENTIFIER = nIntPointer
link1 -> NULL
link1 -> T_STRUCTDECLARATION
link2 -> T_TYPESPECIFIER
link1 -> T_LONG
link3 -> T_STRUCTDECLARATOR
link2 -> T_IDENTIFIER = nLongBitField
link3 -> T_CONSTANT = 8
link1 -> T_STRUCTDECLARATION
link2 -> T_TYPESPECIFIER
link1 -> T_STRUCT
link1 -> T_STRUCT
link2 -> T_IDENTIFIER = _COMPLEX
link3 -> T_STRUCTDECLARATOR
link2 -> T_POINTER
link1 -> T_IDENTIFIER = nStructPointer
link1 -> T_STRUCTDECLARATION
link2 -> T_TYPESPECIFIER
link1 -> T_STRUCT
link1 -> T_UNION
link3 -> T_STRUCTDECLARATION
link2 -> T_TYPESPECIFIER
link1 -> T_INT
link2 -> T_TYPESPECIFIER
link1 -> T_SHORT
link2 -> T_TYPESPECIFIER
link1 -> T_UNSIGNED
link2 -> NULL
link3 -> T_STRUCTDECLARATOR
link2 -> T_IDENTIFIER = nUShortInt
link1 -> T_STRUCTDECLARATION
link2 -> T_TYPESPECIFIER
link1 -> T_INT
link2 -> T_TYPESPECIFIER
link1 -> T_SIGNED
link2 -> NULL
link3 -> T_STRUCTDECLARATOR
link2 -> T_IDENTIFIER = nInt
link1 -> NULL
link3 -> T_STRUCTDECLARATOR
link2 -> T_IDENTIFIER = theUnion
link1 -> NULL
link2 -> T_DECLARATOR
link2 -> T_IDENTIFIER = COMPLEX
link1 -> T_DECLARATOR
link2 -> T_POINTER
link1 -> T_IDENTIFIER = LPCOMPLEX
the parentheses game:
typedef struct _EXAMPLE4 {
int *arrayofpointers[50];
int (*pointertoarray)[50];
} EXAMPLE4;
the first declarator results into an array of pointers (since [] has higher precedene than *)
which would have an overall size of 200 bytes (4 bytes per pointer)
the second defines a single pointer to an array of 50 ints which would only need 4 bytes
T_DECLARATION
link1 -> T_TYPESPECIFIER
link1 -> T_STRUCT
link1 -> T_STRUCT
link2 -> T_IDENTIFIER = _EXAMPLE4
link3 -> T_STRUCTDECLARATION
link2 -> T_TYPESPECIFIER
link1 -> T_INT
link3 -> T_STRUCTDECLARATOR
link2 -> T_POINTER
link1 -> T_SUBSCRIPT
link1 -> T_IDENTIFIER = arrayofpointers
link2 -> T_CONSTANT = 50
link1 -> T_STRUCTDECLARATION
link2 -> T_TYPESPECIFIER
link1 -> T_INT
link3 -> T_STRUCTDECLARATOR
link2 -> T_SUBSCRIPT
link1 -> T_POINTER
link1 -> T_IDENTIFIER = pointertoarray
link2 -> CONSTANT = 50
link2 -> T_DECLARATOR
link2 -> T_IDENTIFIER = EXAMPLE4