Потпуна реорганизација програма

2019-01-08 22:19:29 +01:00 · 2019-01-08 22:19:29 +01:00 · da13db05d3
commit da13db05d3
parent f066c2f221
10 changed files with 328 additions and 207 deletions
--- a/10
+++ b/10
@ -1,19 +1,19 @@
 # cirilisp - компајлер за ћирилични дијалекат лиспа
 # ћирилисп верзија
-VERSION = 0.1
+VERSION = 0.2
 # локација за инсталацију
 PREFIX = /usr/local
 # флегови за C компајлер и линкер
-# CFLAGS   = -g -std=c99 -pedantic -Wall -O0
+CFLAGS   = -g -std=c99 -pedantic -Wall -O0
-CFLAGS  = -std=c99 -pedantic -Wall -O1
+# CFLAGS  = -std=c99 -pedantic -Wall -O1
 LDFLAGS = -lm -lc
 CC = cc
-SRC = cirilisp.c readline.c lexer.c
+SRC = cirilisp.c read.c print.c
 OBJ = $(SRC:.c=.o)
 all: cirilisp
@ -21,7 +21,7 @@ all: cirilisp
 .c.o:
 	$(CC) -c $(CFLAGS) $<
-$(OBJ): readline.h lexer.h
+$(OBJ): util.h read.h print.h
 cirilisp: $(OBJ)
 	$(CC) -o $@ $(OBJ) $(LDFLAGS)
--- a/cirilisp.c
+++ b/cirilisp.c
@ -2,8 +2,9 @@
 #include <stdio.h>
 #include <stdlib.h>
-#include "readline.h"
+#include "util.h"
-#include "lexer.h"
+#include "read.h"
 #include "print.h"
 int main(int argc, char **argv)
 {
@ -15,29 +16,11 @@ int main(int argc, char **argv)
 		exit(0);
 	}
-	while (1)
+	for (;;)
 	{
-		char *input = readline("Л> ");
+		print(read("ШКЉ> "));
-		if (input == NULL)
+		/* append(&tokenList, lexLine(readline()));
-		{
+		printTokenList(tokenList); */
 			putchar('\n');
 			printf("Крај улазног тока.\n");
 			// Превод
 			exit(0);
 		}
 		token *tokenList, *current;
 		current = tokenList = lexLine(input);
 		int i = 0;
 		while (current != NULL)
 		{
 			printf("Токен бр. %d: \"%s\", тип:%s\n", i, current->lexeme, current->type == numberToken ? "number" : (current->type == symbolToken ? "symbol" : "parenthesis"));
 			current = current->next;
 			++i;
 		}
 		freeLexedLine(tokenList);
 		free(input);
 	}
 }
--- a/lexer.c
+++ b/lexer.c
@ -1,120 +0,0 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <regex.h>
 #include "lexer.h"
 token *lex1Token(char *input, int *i);
 /* враћа показивач на једну token структуру, која означава један одређен токен,
 чита улазни стринг од i-тог карактера, и мења i тако да оно затим индексира
 следећи токен или крај стринга*/
 token *lexLine(char *input)
 {
 	int i = 0, n;
 	n = strlen(input);
 	token *root = NULL, **new;
 	new = &root;
 	while (i < n)
 	{
 		*new = lex1Token(input, &i);
 		if ((*new)->type == undefinedToken)
 		{
 /* уколико се у реду нађе токен који је лексички погрешан, штампа се место тог
 токена у реду и брише се цела листа, функција враћа NULL*/
 			fprintf(stderr, "Невалидан токен на месту %d\n", i);
 			new = &root;
 			while (*new != NULL)
 			{
 				free(root->lexeme);
 				new = &((*new)->next);
 				free(root);
 				root = *new;
 			}
 			return NULL;
 		}
 		new = &((*new)->next);
 	}
 	return root;
 }
 regex_t regNumber, regSymbol, regLParenthesis, regRParenthesis, regSpace;
 token *lex1Token(char *input, int *i)
 {
 	token *result = malloc(sizeof(token));
 	result->next = NULL;
 	regcomp(&regSpace, "^[[:space:]]*", REG_EXTENDED);
 	regcomp(&regNumber, "^[-+]?[[:digit:]]+", REG_EXTENDED);
 	regcomp(&regSymbol, "^[-+/*]", REG_EXTENDED);
 /* за сада подржава само симболе -, +, * и / */
 	regcomp(&regLParenthesis, "^(", REG_EXTENDED);
 	regcomp(&regRParenthesis, "^)", REG_EXTENDED);
 	const int nmatches = 1;
 	regmatch_t a[nmatches];
 	regexec(&regSpace, input + *i, nmatches, a, 0);
 	*i += a[0].rm_eo;
 /* помера индекс да би се игнорисали почетни "вајт-спејс" карактери */
 	if (!regexec(&regNumber, input + *i, nmatches, a, 0))
 	{
 		result->type = numberToken;
 	}
 	else if (!regexec(&regSymbol, input + *i, nmatches, a, 0))
 	{
 		result->type = symbolToken;
 	}
 	else if (!regexec(&regLParenthesis, input + *i, nmatches, a, 0))
 	{
 		result->type = LparenthesisToken;
 	}
 	else if (!regexec(&regRParenthesis, input + *i, nmatches, a, 0))
 	{
 		result->type = RparenthesisToken;
 	}
 	else
 	{
 		result->type = undefinedToken;
 		result->lexeme = NULL;
 		goto skipStringCopy;
 	}
 	result->lexeme = malloc((a[0].rm_eo + 1) * sizeof(char));
 	strncpy(result->lexeme, input + *i, a[0].rm_eo);
 	result->lexeme[a[0].rm_eo] = '\0';
 	*i += a[0].rm_eo;
 	regexec(&regSpace, input + *i, nmatches, a, 0);
 	*i += a[0].rm_eo;
 /* игнорисање крајњих вајт-спејс карактера */
 	skipStringCopy:
 	regfree(&regSpace);
 	regfree(&regNumber);
 	regfree(&regSymbol);
 	regfree(&regLParenthesis);
 	regfree(&regRParenthesis);
 	return result;
 }
 void freeLexedLine(token *list)
 {
 	if (list == NULL)
 	{
 		return;
 	}
 	else
 	{
 		freeLexedLine(list->next);
 		if (list->lexeme != NULL)
 		{
 			free(list->lexeme);
 		}
 		free(list);
 		return;
 	}
 }
--- a/lexer.h
+++ b/lexer.h
@ -1,23 +0,0 @@
 #pragma once
 typedef enum
 {
 	undefinedToken,
 	numberToken,
 	symbolToken,
 	lParenthesisToken,
 	rParenthesisToken
 } tokenType;
 typedef struct _Token
 {
 	tokenType type;
 	char *lexeme;
 	struct _Token *next;
 } token;
 token *lexLine(char *input);
 /* као аргумент добија ред са стандардног улаза, а као излаз
 враћа лексичке елементе у повезаној листи */
 void freeLexedLine(token *list);
 /* ослобађа меморију коју је заузела листа токена узета из корисничког улаза */
--- a/print.c
+++ b/print.c
@ -0,0 +1,36 @@
 #include <stdio.h>
 #include "util.h"
 void printValue(object input);
 void print(object input)
 {
 	printf("\n; Value: ");
 	printValue(input);
 	printf("\n\n");
 }
 void printValue(object input)
 {
 	if (input.type == nilObject)
 	{
 		printf("nil");
 	}
 	else if (input.type == numberObject)
 	{
 		printf("%lld", *((long long *) input.address));
 	}
 	else if (input.type == symbolObject)
 	{
 		printf("%s", (char *) input.address);
 	}
 	else if (input.type == consObject)
 	{
 		printf("(");
 		printValue(((cons *) input.address)->car);
 		printf(" . ");
 		printValue(((cons *) input.address)->cdr);
 		printf(")");
 	}
 }
--- a/print.h
+++ b/print.h
@ -0,0 +1,3 @@
 #pragma once
 void print(object input);
--- a/read.c
+++ b/read.c
@ -0,0 +1,273 @@
 #define _POSIX_C_SOURCE 200809L
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <regex.h>
 #include "util.h"
 #include "read.h"
 #include "print.h"
 typedef enum
 {
 	undefinedToken,
 	numberToken,
 	symbolToken,
 	lParenthesisToken,
 	rParenthesisToken
 } tokenType;
 typedef struct _Token
 {
 	tokenType type;
 	char *lexeme;
 	struct _Token *next;
 } token;
 int completeSExpr(token **tokenList);
 char *readline();
 void append(token **head, token *appendix);
 token *lexLine(char *input);
 object parseObject(token **inputList);
 token *tokenList = NULL;
 object read(char *prompt)
 {
 	printf("%s", prompt);
 	while (!completeSExpr(&tokenList))
 	{
 		char *input = readline();
 		if (input == NULL) /* унесен је EOF сигнал */
 		{
 			printf("\nКрај улазног стрима.\n");
 			exit(0);
 		}
 		append(&tokenList, lexLine(input));
 	}
 	return parseObject(&tokenList);
 }
 int completeSExpr(token **tokenList)
 {
 	int result = 0, indentLevel = 0;
 	token *current = *tokenList;
 	while (current != NULL)
 	{
 		if (current->type == lParenthesisToken)
 		{
 			++indentLevel;
 		}
 		else if (current->type == rParenthesisToken)
 		{
 			if (indentLevel == 0)
 			{
 				token **deleteParen = tokenList;
 				while (*deleteParen != current)
 				{
 					deleteParen = &(*deleteParen)->next;
 				}
 				*deleteParen = current->next;
 				free(current);
 				current = *deleteParen;
 				continue;
 			}
 			else
 			{
 				if (indentLevel == 1)
 				{
 					result = 1;
 				}
 				--indentLevel;
 			}
 		}
 		else
 		{
 			if (indentLevel == 0)
 			{
 				result = 1;
 			}
 		}
 		current = current->next;
 	}
 	return result;
 }
 ssize_t bytesRead;
 size_t nbytes = 2048;
 char *buffer = NULL;
 char *readline()
 {
 	if (buffer == NULL)
 	{
 		buffer = (char *) malloc(nbytes + 1);
 	}
 	bytesRead = getline(&buffer, &nbytes, stdin);
 	if (bytesRead == -1)
 	{
 		return NULL;
 	}
 	char *cpy = malloc(strlen(buffer)+1);
 	strcpy(cpy, buffer);
 	cpy[strlen(cpy)-1] = '\0';
 	// Уклања завршни њу-лајн или ЕОФ у стрингу и копира га на ново место
 	return cpy;
 }
 void append(token **head, token *tail)
 {
 	token **current = head;
 	while (*current != NULL)
 	{
 		current = &(*current)->next;
 	}
 	*current = tail;
 }
 regex_t regNumber, regSymbol, regLParenthesis, regRParenthesis, regSpace;
 token *lex1Token(char *input, int *i)
 {
 	token *result = malloc(sizeof(token));
 	result->next = NULL;
 	regcomp(&regSpace, "^[[:space:]]*", REG_EXTENDED);
 	regcomp(&regNumber, "^[-+]?[[:digit:]]+", REG_EXTENDED);
 	regcomp(&regSymbol, "^[-+/*]", REG_EXTENDED);
 /* за сада подржава само симболе -, +, * и / */
 	regcomp(&regLParenthesis, "^\\(", REG_EXTENDED);
 	regcomp(&regRParenthesis, "^\\)", REG_EXTENDED);
 	const int nmatches = 1;
 	regmatch_t a[nmatches];
 	regexec(&regSpace, input + *i, nmatches, a, 0);
 	*i += a[0].rm_eo;
 /* помера индекс да би се игнорисали почетни "вајт-спејс" карактери */
 	if (!regexec(&regNumber, input + *i, nmatches, a, 0))
 	{
 		result->type = numberToken;
 	}
 	else if (!regexec(&regSymbol, input + *i, nmatches, a, 0))
 	{
 		result->type = symbolToken;
 	}
 	else if (!regexec(&regLParenthesis, input + *i, nmatches, a, 0))
 	{
 		result->type = lParenthesisToken;
 	}
 	else if (!regexec(&regRParenthesis, input + *i, nmatches, a, 0))
 	{
 		result->type = rParenthesisToken;
 	}
 	else
 	{
 		result->type = undefinedToken;
 		result->lexeme = NULL;
 		goto skipStringCopy;
 	}
 	result->lexeme = malloc((a[0].rm_eo + 1) * sizeof(char));
 	strncpy(result->lexeme, input + *i, a[0].rm_eo);
 	result->lexeme[a[0].rm_eo] = '\0';
 	*i += a[0].rm_eo;
 	regexec(&regSpace, input + *i, nmatches, a, 0);
 	*i += a[0].rm_eo;
 /* игнорисање крајњих вајт-спејс карактера */
 	skipStringCopy:
 	regfree(&regSpace);
 	regfree(&regNumber);
 	regfree(&regSymbol);
 	regfree(&regLParenthesis);
 	regfree(&regRParenthesis);
 	return result;
 }
 token *lexLine(char *input)
 {
 	int i = 0, n;
 	n = strlen(input);
 	token *root = NULL, **new;
 	new = &root;
 	while (i < n)
 	{
 		*new = lex1Token(input, &i);
 		if ((*new)->type == undefinedToken)
 		{
 /* уколико се у реду нађе токен који је лексички погрешан, штампа се место тог
 токена у реду и бришу се сви токени нађени у реду, функција враћа NULL*/
 			fprintf(stderr, "Невалидан токен на месту %d\n", i);
 			new = &root;
 			while (*new != NULL)
 			{
 				free(root->lexeme);
 				new = &((*new)->next);
 				free(root);
 				root = *new;
 			}
 			return NULL;
 		}
 		new = &((*new)->next);
 	}
 	return root;
 }
 object parseObject(token **inputList)
 {
 	object result;
 	token input = **inputList;
 	free(*inputList);
 	*inputList = input.next;
 	if (input.type == numberToken)
 	{
 		result.type = numberObject;
 		result.address = malloc(sizeof(long long int));
 		*((long long *) result.address) = atoll(input.lexeme);
 		return result;
 	}
 	else if (input.type == symbolToken)
 	{
 		result.type = symbolObject;
 		result.address = malloc((strlen(input.lexeme) + 1)
 					 * sizeof(char));
 		strcpy((char *) result.address, input.lexeme);
 		return result;
 	}
 	else if (input.type == lParenthesisToken)
 	{
 		object *listCurrent = &result;
 		while ((*inputList)->type != rParenthesisToken)
 		{
 			listCurrent->type = consObject;
 			listCurrent->address = malloc(sizeof(cons));
 			((cons *) listCurrent->address)->car =
 				parseObject(inputList);
 			listCurrent = &(((cons *) listCurrent->address)->cdr);
 		}
 		(*listCurrent).type = nilObject;
 		(*listCurrent).address = NULL;
 		input = **inputList;
 		free(*inputList);
 		*inputList = input.next;
 	}
 	return result;
 }
--- a/read.h
+++ b/read.h
@ -0,0 +1,3 @@
 #pragma once
 object read(char *prompt);
--- a/readline.c
+++ b/readline.c
@ -1,31 +0,0 @@
 #define _POSIX_C_SOURCE 200809L
 #include <stdio.h>
 #include <string.h>
 #include <stdlib.h>
 #include "util.h"
 ssize_t _bytesRead;
 size_t _nbytes = 2048;
 char *_buffer = NULL;
 char *readline(char *prompt)
 {
 	if (_buffer == NULL)
 	{
 		_buffer = (char *) malloc(_nbytes + 1);
 	}
 	fputs(prompt, stdout);
 	_bytesRead = getline(&_buffer, &_nbytes, stdin);
 	if (_bytesRead == -1)
 	{
 		return NULL;
 	}
 	char *cpy = malloc(strlen(_buffer)+1);
 	strcpy(cpy, _buffer);
 	cpy[strlen(cpy)-1] = '\0';
 	// Уклања завршни њу-лајн или ЕОФ у стрингу и копира га на ново место
 	return cpy;
 }
--- a/readline.h
+++ b/readline.h
@ -1,3 +0,0 @@
 #pragma once
 char *readline(char *prompt);
		`@ -1,3 +0,0 @@`
			`#pragma once`

			`char readline(char prompt);`