Редизајниран лексер

2019-01-22 16:22:00 +01:00 · 2019-01-22 16:22:00 +01:00 · bf365a0013
commit bf365a0013
parent 405739c217
2 changed files with 101 additions and 101 deletions
--- a/3
+++ b/3
@ -7,6 +7,7 @@ VERSION = 0.5
 PREFIX = /usr/local
 # флегови за C компајлер и линкер
 CPPFLAGS = -D_POSIX_C_SOURCE=200809L
 CFLAGS = -g -std=c99 -pedantic -Wall -O0
 # CFLAGS  = -std=c99 -pedantic -Wall -O1
 LDFLAGS = -lm -lc
@ -19,7 +20,7 @@ OBJ = $(SRC:.c=.o)
 all: cirilisp
 .c.o:
-	$(CC) -c $(CFLAGS) $<
+	$(CC) -c $(CPPFLAGS) $(CFLAGS) $<
 $(OBJ): util.h read.h eval.h print.h symtable.h internals.h init.h
--- a/read.c
+++ b/read.c
@ -1,4 +1,3 @@
 #define _POSIX_C_SOURCE 200809L
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
@ -132,123 +131,123 @@ void append(token **head, token *tail)
 	*current = tail;
 }
 regex_t regNumberFrac, regNumberReal, regLParenthesis, regRParenthesis,
 	regSpace, regSymbol, regQuote;
 token *lex1Token(char *input, int *i)
 {
 	token *result = malloc(sizeof(token));
 	result->next = NULL;
-	regcomp(&regSpace, "^[[:space:]]*", REG_EXTENDED);
+void deleteTokenList(token *root)
 {
 	if (root->lexeme != NULL)
 	{
 		free(root->lexeme);
 	}
-	regcomp(&regNumberFrac, "^[-+]?[[:digit:]]+(/[[:digit:]]+)?",
+	if (root->next != NULL)
-			REG_EXTENDED);
+	{
-	regcomp(&regNumberReal, "^[-+]?[[:digit:]]*,[[:digit:]]+",
+		deleteTokenList(root->next);
-			REG_EXTENDED);
+	}
 	regcomp(&regSymbol, "^[-+/*_\\\\=<>!&?[:alnum:]]+", REG_EXTENDED);
 	regcomp(&regQuote, "^'", REG_EXTENDED);
 	regcomp(&regLParenthesis, "^\\(", REG_EXTENDED);
 	regcomp(&regRParenthesis, "^\\)", REG_EXTENDED);
-	const int nmatches = 1;
+	free(root);
 	regmatch_t a[nmatches];
 	regexec(&regSpace, input + *i, nmatches, a, 0);
 	*i += a[0].rm_eo;
 /* помера индекс да би се игнорисали почетни "вајт-спејс" карактери */
 	if (!regexec(&regNumberReal, input + *i, nmatches, a, 0))
 	{
 		result->type = numberRealToken;
 	}
 	else if (!regexec(&regSymbol, input + *i, nmatches, a, 0))
 	{
 		int tmp = a[0].rm_eo;
 		if (!regexec(&regNumberFrac, input + *i, nmatches, a, 0) &&
 				tmp == a[0].rm_eo)
 /* симбол може садржати цифре на било којој позицији али не може сам бити број
 * не постоји погодан начина да се ово путем regex-a запише стога овај if
 * исказ */
 		{
 			result->type = numberFracToken;
 		}
 		else
 		{
 			regexec(&regSymbol, input + *i, nmatches, a, 0);
 /* претходни regexec позив је променио вредност a[0].rm_eo, овиме се она враћа
 * на дужину нађеног симбола */
 			result->type = symbolToken;
 		}
 	}
 	else if (!regexec(&regNumberFrac, input + *i, nmatches, a, 0))
 	{
 			result->type = numberFracToken;
 	}
 	else if (!regexec(&regQuote, input + *i, nmatches, a, 0))
 	{
 		result->type = quoteToken;
 	}
 	else if (!regexec(&regLParenthesis, input + *i, nmatches, a, 0))
 	{
 		result->type = lParenthesisToken;
 	}
 	else if (!regexec(&regRParenthesis, input + *i, nmatches, a, 0))
 	{
 		result->type = rParenthesisToken;
 	}
 	else
 	{
 		result->type = undefinedToken;
 		result->lexeme = NULL;
 		goto skipStringCopy;
 	}
 	result->lexeme = malloc((a[0].rm_eo + 1) * sizeof(char));
 	strncpy(result->lexeme, input + *i, a[0].rm_eo);
 	result->lexeme[a[0].rm_eo] = '\0';
 	*i += a[0].rm_eo;
 	regexec(&regSpace, input + *i, nmatches, a, 0);
 	*i += a[0].rm_eo;
 /* игнорисање крајњих вајт-спејс карактера */
 	skipStringCopy:
 	regfree(&regSpace);
 	regfree(&regNumberFrac);
 	regfree(&regNumberReal);
 	regfree(&regSymbol);
 	regfree(&regLParenthesis);
 	regfree(&regRParenthesis);
 	return result;
 }
 token *lexLine(char *input)
 {
-	int i = 0, n;
+	regex_t regSpace, regTokenGeneral, regNumberFrac, regNumberReal,
-	n = strlen(input);
+		regLParenthesis, regRParenthesis, regSymbol, regQuote;
 	regcomp(&regSpace, "^[[:space:]]*", REG_EXTENDED);
 	regcomp(&regTokenGeneral,"^(\\(|\\)|'|[-,.+/*_\\\\=<>!&?[:alnum:]]+)",
 			REG_EXTENDED);
 	const int nmatches = 1;
 	regmatch_t a[nmatches];
 	token *root = NULL, **new;
 	new = &root;
 	int i = 0, n;
 	n = strlen(input);
 	regexec(&regSpace, input + i, nmatches, a, 0);
 	i += a[0].rm_eo;
 /* помера индекс да би се игнорисали почетни "вајт-спејс" карактери */
 	while (i < n)
 	{
-		*new = lex1Token(input, &i);
+		*new = malloc(sizeof(token));
-		if ((*new)->type == undefinedToken)
+		(*new)->next = NULL;
 		if (!regexec(&regTokenGeneral, input + i, nmatches, a, 0))
 		{
-/* уколико се у реду нађе токен који је лексички погрешан, штампа се место тог
+			(*new)->lexeme = malloc((a[0].rm_eo + 1) *
- * токена у реду и бришу се сви токени нађени у реду, функција враћа NULL*/
+					sizeof(char));
-			fprintf(stderr, "Невалидан токен на месту %d\n", i);
+			strncpy((*new)->lexeme, input + i, a[0].rm_eo);
-			new = &root;
+			(*new)->lexeme[a[0].rm_eo] = '\0';
-			while (*new != NULL)
+			i += a[0].rm_eo;
 			{
 				free(root->lexeme);
 				new = &((*new)->next);
 				free(root);
 				root = *new;
 		}
 		else
 		{
 /* уколико се у реду нађе карактер који се не може прихватити, штампа се место
 * тог каракетера у реду и бришу се сви токени тог реда, функција враћа NULL*/
 			fprintf(stderr, "Невалидан карактер на месту %d\n", i);
 			(*new)->lexeme = NULL;
 			deleteTokenList(root);
 			return NULL;
 		}
 		regexec(&regSpace, input + i, nmatches, a, 0);
 		i += a[0].rm_eo;
 		new = &((*new)->next);
 	}
 /* у овој петљи су нађени сви токени у реду и њихови лексеми су копирани у
 * листу, међутим још није одређен њихов тип, нити чак то да су валидни */
 	regcomp(&regNumberFrac, "^[-+]?[[:digit:]]+(/[[:digit:]]+)?$",
 			REG_EXTENDED);
 	regcomp(&regNumberReal, "^[-+]?[[:digit:]]*,[[:digit:]]+$",
 			REG_EXTENDED);
 	regcomp(&regSymbol, "^[-+/*_\\\\=<>!&?[:alnum:]]+$", REG_EXTENDED);
 	regcomp(&regQuote, "^'$", REG_EXTENDED);
 	regcomp(&regLParenthesis, "^\\($", REG_EXTENDED);
 	regcomp(&regRParenthesis, "^\\)$", REG_EXTENDED);
 	new = &root;
 	while ((*new) != NULL)
 	{
 		if (!regexec(&regNumberFrac, (*new)->lexeme, nmatches, a, 0))
 		{
 			(*new)->type = numberFracToken;
 		}
 		else if (!regexec(&regNumberReal, (*new)->lexeme, nmatches, a,
 			0))
 		{
 			(*new)->type = numberRealToken;
 		}
 		else if (!regexec(&regSymbol, (*new)->lexeme, nmatches, a, 0))
 		{
 			(*new)->type = symbolToken;
 		}
 		else if (!regexec(&regQuote, (*new)->lexeme, nmatches, a, 0))
 		{
 			(*new)->type = quoteToken;
 		}
 		else if (!regexec(&regLParenthesis, (*new)->lexeme, nmatches,
 			a, 0))
 		{
 			(*new)->type = lParenthesisToken;
 		}
 		else if (!regexec(&regRParenthesis, (*new)->lexeme, nmatches,
 			a, 0))
 		{
 			(*new)->type = rParenthesisToken;
 		}
 		else
 		{
 /* уколико се неки токен не може класификовати, штампа се лексем тог токена,
 * бришу се сви нађени токени у реду, и враћа се NULL */
 			fprintf(stderr, "Невалидан токен:\"%s\"\n",
 					(*new)->lexeme);
 			deleteTokenList(root);
 			return NULL;
 		}
 		new = &((*new)->next);
 	}
 	return root;
 }