Уведен лексер, још увек нестабилан и минималан

2019-01-06 12:30:07 +01:00 · 2019-01-06 12:30:07 +01:00 · 8d89aa622a
commit 8d89aa622a
parent 7cff23f9ca
5 changed files with 136 additions and 17 deletions
--- a/4
+++ b/4
@ -13,7 +13,7 @@ LDFLAGS = -lm -lc
 CC = cc
-SRC = cirilisp.c util.c
+SRC = cirilisp.c util.c lexer.c
 OBJ = $(SRC:.c=.o)
 all: cirilisp
@ -21,7 +21,7 @@ all: cirilisp
 .c.o:
 	$(CC) -c $(CFLAGS) $<
-$(OBJ): util.h
+$(OBJ): util.h lexer.h
 cirilisp: $(OBJ)
 	$(CC) -o $@ $(OBJ) $(LDFLAGS)
--- a/cirilisp.c
+++ b/cirilisp.c
@ -3,32 +3,38 @@
 #include <stdlib.h>
 #include "util.h"
 #include "lexer.h"
 int main(int argc, char **argv)
 {
-	// Омогућава библиотекама коришћеним у интерпретеру да протумаче
+/* Омогућава библиотекама коришћеним у интерпретеру да протумаче српску ћирилицу */
-	// српску ћирилицу
+	if (setlocale(LC_ALL, "sr_RS.utf8") == NULL)
-	// struct lconv *loc;
+	{
-	setlocale(LC_ALL, "sr_RS.utf8");
+		fprintf(stderr, "locale couldn't be set to \"sr_RS.utf8\", check if you've enabled it on your system\n");
-	// loc = localeconv();
+		exit(0);
-
+	}
 	// Бојлерплејт обавештења о окружењу
 	puts("Прост РЕПЛ:");
 	puts("Притисните ctrl+c да бисте изашли\n");
 	while (1)
 	{
-		char *input = readline("k> ");
+		char *input = readline("Л> ");
 		if (input == NULL)
 		{
 			putchar('\n');
 			printf("Крај улазног тока.\n");
 			// Превод
 			exit(0);
 		}
-		printf("НАПИСАЛИ СТЕ: %s\n", input);
+		token *tokenList, *current;
 		current = tokenList = lexLine(input);
 		int i = 0;
 		while (current != NULL)
 		{
 			printf("Токен бр. %d: \"%s\", тип:%s\n", i,
 current->lexeme, current->type == number ? "number" : (current->type == symbol 
 ? "symbol" : "parenthesis"));
 		}
 		free(input);
 	}
 	return 0;
 }
--- a/lexer.c
+++ b/lexer.c
@ -0,0 +1,95 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <regex.h>
 #include "lexer.h"
 token *lex1token(char *input, int *i);
 /* враћа показивач на једну token структуру, која означава један одређен токен,
 чита улазни стринг од i-тог карактера, и мења i тако да оно затим индексира
 следећи токен или крај стринга*/
 token *lexLine(char *input)
 {
 	int i = 0, n;
 	n = strlen(input);
 	token *root = NULL, **new;
 	new = &root;
 	while (i < n)
 	{
 		*new = lex1token(input, &i);
 		new = &((*new)->next);
 		if ((*new)->type == undefined)
 		{
 /* уколико се у реду нађе токен који је лексички погрешан, штампа се место тог
 токена у реду и брише се цела листа, функција враћа NULL*/
 			fprintf(stderr, "Невалидан токен на месту %d\n", i);
 			new = &root;
 			while (*new != NULL)
 			{
 				free(root->lexeme);
 				new = &((*new)->next);
 				free(root);
 				root = *new;
 			}
 			return NULL;
 		}
 	}
 	return root;
 }
 regex_t regNumber, regSymbol, regParenthesis, regSpace;
 token *lex1token(char *input, int *i)
 {
 	token *result = malloc(sizeof(token));
 	result->next = NULL;
 	regcomp(&regSpace, "^[:space:]*", REG_EXTENDED);
 	regcomp(&regNumber, "^[-+]?[:digit:]+", REG_EXTENDED);
 	regcomp(&regSymbol, "^[-+/*]", REG_EXTENDED);
 /* за сада подржава само симболе -, +, * и / */
 	regcomp(&regParenthesis, "^[()]", REG_EXTENDED);
 	const int nmatches = 1;
 	regmatch_t a[nmatches];
 	regexec(&regSpace, input + *i, nmatches, a, 0);
 	*i += a[0].rm_eo;
 /* помера индекс да би се игнорисали почетни "вајт-спејс" карактери */
 	if (!regexec(&regSymbol, input + *i, nmatches, a, 0))
 	{
 		result->type = symbol;
 	}
 	else if (!regexec(&regNumber, input + *i, nmatches, a, 0))
 	{
 		result->type = number;
 	}
 	else if (!regexec(&regParenthesis, input + *i, nmatches, a, 0))
 	{
 		result->type = parenthesis;
 	}
 	else
 	{
 		result->type = undefined;
 		goto skipStringCopy;
 	}
 	result->lexeme = malloc((a[0].rm_eo + 1) * sizeof(char));
 	strncpy(result->lexeme, input + *i, a[0].rm_eo);
 	result->lexeme[a[0].rm_eo] = '\0';
 	*i += a[0].rm_eo;
 	regexec(&regSpace, input + *i, nmatches, a, 0);
 	*i += a[0].rm_eo;
 /* игнорисање крајњих вајт-спејс карактера */
 	skipStringCopy:
 	regfree(&regSpace);
 	regfree(&regNumber);
 	regfree(&regSymbol);
 	regfree(&regParenthesis);
 	return result;
 }
--- a/lexer.h
+++ b/lexer.h
@ -0,0 +1,20 @@
 #pragma once
 typedef enum
 {
 	undefined,
 	number,
 	symbol,
 	parenthesis
 } tokenType ;
 typedef struct _Token
 {
 	tokenType type;
 	char *lexeme;
 	struct _Token *next;
 } token;
 /* функција lexLine као аргумент добија ред са стандардног улаза, а као излаз
 враћа лексичке елементе у повезаној листи */
 token *lexLine(char *input);
--- a/util.h
+++ b/util.h
@ -1,5 +1,3 @@
 #pragma once
 char *readline(char *prompt);
 void add_history(char *unused);