-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathsyntax_parser.c
More file actions
308 lines (289 loc) · 11.5 KB
/
syntax_parser.c
File metadata and controls
308 lines (289 loc) · 11.5 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
// 2025 04 30 attempt.
// Include header files for standard I/O, string handling, dynamic memory, and error handling
#include <stdio.h> // Standard I/O functions (printf, fgets, etc.)
#include <stdlib.h> // Standard library functions (atoi, exit, etc.)
#include <string.h> // String handling functions (strcmp, strtok, etc.)
#include <ctype.h> // Character functions (isdigit, isalpha, islower, etc.)
#include <setjmp.h> // Exception handling using setjmp and longjmp
// ----------------------------------------------------------------------
// Constant definitions: maximum tokens, outputs, symbols, variable length
// ----------------------------------------------------------------------
#define MAX_TOKENS 50000 // Maximum number of tokens per line
#define MAX_OUTPUTS 50000 // Maximum number of outputs per print statement
#define MAX_SYMBOLS 10000 // Maximum number of variables (symbols)
#define MAX_VAR_LEN 10 // Maximum length of variable name or number (10 chars)
// ----------------------------------------------------------------------
// Global variables
// ----------------------------------------------------------------------
char *tokens[MAX_TOKENS]; // Array to store split tokens (pointers to strings)
int token_count = 0; // Current number of tokens
int pos = 0; // Current parsing index
int outputs[MAX_OUTPUTS]; // Array to store integers to be printed
int outputs_count = 0; // Number of stored outputs
// Symbol (variable) table: stores variable name and corresponding value
typedef struct {
char name[MAX_VAR_LEN + 1]; // Variable name (up to 10 chars + null terminator)
int value; // Integer value stored in the variable
} Symbol;
Symbol symbols[MAX_SYMBOLS]; // Array for the symbol table
int symbol_count = 0; // Current number of stored variables
// Global jump buffer for setjmp/longjmp
jmp_buf jumpBuffer;
// ----------------------------------------------------------------------
// Error handling function: called when a syntax error occurs
// ----------------------------------------------------------------------
void error(void) {
// Control jumps back to the point where setjmp was called
longjmp(jumpBuffer, 1);
}
// ----------------------------------------------------------------------
// Function to retrieve symbol (variable) value: return 0 if not found
// ----------------------------------------------------------------------
int getSymbolValue(const char *name) {
for (int i = 0; i < symbol_count; i++) {
if (strcmp(symbols[i].name, name) == 0)
return symbols[i].value;
}
// Return default 0 if variable not found
return 0;
}
// ----------------------------------------------------------------------
// Function to set symbol (variable) value: update if exists, otherwise add
// ----------------------------------------------------------------------
void setSymbolValue(const char *name, int value) {
for (int i = 0; i < symbol_count; i++) {
if (strcmp(symbols[i].name, name) == 0) {
symbols[i].value = value;
return;
}
}
// Add new variable if under MAX_SYMBOLS
if (symbol_count < MAX_SYMBOLS) {
strncpy(symbols[symbol_count].name, name, MAX_VAR_LEN);
symbols[symbol_count].name[MAX_VAR_LEN] = '\0'; // Ensure null termination
symbols[symbol_count].value = value;
symbol_count++;
}
}
// ----------------------------------------------------------------------
// Parser function prototypes
// ----------------------------------------------------------------------
int parseExpr(void); // <expr> → <term> { + <term> | * <term> }
int parseTerm(void); // <term> → <factor> { - <factor> }
int parseFactor(void); // <factor> → [ - ] ( <number> | <var> | '(' <expr> ')' )
void parseStatement(void); // <statement> → <var> = <expr> ; | print <var> ;
// ----------------------------------------------------------------------
// parseFactor()
// - Handles unary '-', parentheses, numbers, and variables
// ----------------------------------------------------------------------
int parseFactor(void) {
// [Unary minus] If current token is "-", treat as unary negation
if (pos < token_count && strcmp(tokens[pos], "-") == 0) {
pos++; // Consume "-"
return - parseFactor(); // Recursive call, apply negation
}
// [Parentheses] If current token is "(", parse expression inside
else if (pos < token_count && strcmp(tokens[pos], "(") == 0) {
pos++; // Consume "("
int value = parseExpr(); // Parse expression inside parentheses
// Must have a closing ")"
if (pos >= token_count || strcmp(tokens[pos], ")") != 0)
error();
pos++; // Consume ")"
return value;
}
// [Number] If token starts with a digit
else if (pos < token_count && isdigit(tokens[pos][0])) {
// Check length limit
if (strlen(tokens[pos]) > MAX_VAR_LEN)
error();
// Ensure all chars are digits
for (int i = 0; tokens[pos][i] != '\0'; i++) {
if (!isdigit(tokens[pos][i]))
error();
}
int value = atoi(tokens[pos]); // Convert string to int
pos++; // Consume number
return value;
}
// [Variable] If token starts with a letter
else if (pos < token_count && isalpha(tokens[pos][0])) {
// Check length
if (strlen(tokens[pos]) > MAX_VAR_LEN)
error();
// Ensure variable name is all lowercase letters
for (int i = 0; tokens[pos][i] != '\0'; i++) {
if (!islower(tokens[pos][i]))
error();
}
char varname[MAX_VAR_LEN + 1];
strncpy(varname, tokens[pos], MAX_VAR_LEN);
varname[MAX_VAR_LEN] = '\0';
pos++; // Consume variable
// Get value from symbol table (default 0 if not exists)
return getSymbolValue(varname);
}
// Otherwise, syntax error
else {
error();
return 0; // Unreachable, avoids warning
}
}
// ----------------------------------------------------------------------
// parseTerm()
// - Implements <term> → <factor> { - <factor> }, left associative
// ----------------------------------------------------------------------
int parseTerm(void) {
int value = parseFactor(); // Parse first factor
// Process consecutive "-" operations
while (pos < token_count && strcmp(tokens[pos], "-") == 0) {
pos++; // Consume "-"
int rhs = parseFactor(); // Parse next factor
value = value - rhs; // Perform subtraction
}
return value;
}
// ----------------------------------------------------------------------
// parseExpr()
// - Implements <expr> → <term> { + <term> | * <term> }, left associative
// ----------------------------------------------------------------------
int parseExpr(void) {
int value = parseTerm(); // Parse first term
// Handle "+" or "*"
while (pos < token_count &&
(strcmp(tokens[pos], "+") == 0 || strcmp(tokens[pos], "*") == 0)) {
char op[3];
strcpy(op, tokens[pos]); // Save operator
pos++; // Consume operator
int rhs = parseTerm(); // Parse next term
if (strcmp(op, "+") == 0)
value = value + rhs;
else if (strcmp(op, "*") == 0)
value = value * rhs;
}
return value;
}
// ----------------------------------------------------------------------
// parseStatement()
// - Implements <statement> → <var> = <expr> ; | print <var> ;
// ----------------------------------------------------------------------
void parseStatement(void) {
// [print statement] If current token is "print"
if (pos < token_count && strcmp(tokens[pos], "print") == 0) {
pos++; // Consume "print"
// Next token must be a variable
if (pos >= token_count)
error();
if (!isalpha(tokens[pos][0]))
error();
// Check variable name
if (strlen(tokens[pos]) > MAX_VAR_LEN)
error();
for (int i = 0; tokens[pos][i] != '\0'; i++) {
if (!islower(tokens[pos][i]))
error();
}
char varname[MAX_VAR_LEN + 1];
strncpy(varname, tokens[pos], MAX_VAR_LEN);
varname[MAX_VAR_LEN] = '\0';
pos++; // Consume variable
// Must end with ";"
if (pos >= token_count || strcmp(tokens[pos], ";") != 0)
error();
pos++; // Consume ";"
// Lookup value
int val = getSymbolValue(varname);
// Save to outputs
outputs[outputs_count++] = val;
}
// [Assignment statement] If starts with variable
else if (pos < token_count && isalpha(tokens[pos][0])) {
// Check variable name
if (strlen(tokens[pos]) > MAX_VAR_LEN)
error();
for (int i = 0; tokens[pos][i] != '\0'; i++) {
if (!islower(tokens[pos][i]))
error();
}
char varname[MAX_VAR_LEN + 1];
strncpy(varname, tokens[pos], MAX_VAR_LEN);
varname[MAX_VAR_LEN] = '\0';
pos++; // Consume variable
// Must be followed by "="
if (pos >= token_count || strcmp(tokens[pos], "=") != 0)
error();
pos++; // Consume "="
// Parse RHS expression
int value = parseExpr();
// Must end with ";"
if (pos >= token_count || strcmp(tokens[pos], ";") != 0)
error();
pos++; // Consume ";"
// Save or update symbol table
setSymbolValue(varname, value);
}
// Otherwise syntax error
else {
error();
}
}
// ----------------------------------------------------------------------
// main()
// - Entry point of the program
// - Reads code line by line, tokenizes, parses, and executes
// - Prints "Syntax Error!" if code is invalid
// - Exits on empty line
// ----------------------------------------------------------------------
int main(void) {
char line[4096]; // Buffer for one line of input
// Main loop: continuously read input
while (1) {
// printf(">> ");
if (fgets(line, sizeof(line), stdin) == NULL)
break; // End of input
// If line is only whitespace, exit
int only_whitespace = 1;
for (int i = 0; line[i] != '\0'; i++) {
if (line[i] != ' ' && line[i] != '\t' && line[i] != '\n') {
only_whitespace = 0;
break;
}
}
if (only_whitespace)
break;
// If line is just newline, exit
if (line[0] == '\n')
break;
// Reset state for each line
token_count = 0;
pos = 0;
outputs_count = 0;
symbol_count = 0; // Reset variables each input
// Tokenize by spaces/tabs/newlines
char *token = strtok(line, " \t\n");
while (token != NULL) {
tokens[token_count++] = token;
token = strtok(NULL, " \t\n");
}
// Use setjmp for syntax error handling
if (setjmp(jumpBuffer) == 0) {
// Parse until all tokens are consumed
while (pos < token_count) {
parseStatement();
}
// Print outputs if any
if (outputs_count > 0) {
for (int i = 0; i < outputs_count; i++) {
printf("%d", outputs[i]);
if (i != outputs_count - 1)
printf(" ");
}
printf("\n");
}
} else {
// Syntax error detected
printf("Syntax Error!\n");
}
}
return 0; // Normal termination
}