Потпуна реорганизација програма

2019-01-08 22:19:29 +01:00 · 2019-01-08 22:19:29 +01:00 · da13db05d3
commit da13db05d3
parent f066c2f221
10 changed files with 328 additions and 207 deletions
--- a/10
+++ b/10
@ -1,19 +1,19 @@
 # cirilisp - компајлер за ћирилични дијалекат лиспа

 # ћирилисп верзија
-VERSION = 0.1
+VERSION = 0.2

 # локација за инсталацију
 PREFIX = /usr/local

 # флегови за C компајлер и линкер
-# CFLAGS   = -g -std=c99 -pedantic -Wall -O0
-CFLAGS  = -std=c99 -pedantic -Wall -O1
+CFLAGS   = -g -std=c99 -pedantic -Wall -O0
+# CFLAGS  = -std=c99 -pedantic -Wall -O1
 LDFLAGS = -lm -lc

 CC = cc

-SRC = cirilisp.c readline.c lexer.c
+SRC = cirilisp.c read.c print.c
 OBJ = $(SRC:.c=.o)

 all: cirilisp
@ -21,7 +21,7 @@ all: cirilisp
 .c.o:
 	$(CC) -c $(CFLAGS) $<

-$(OBJ): readline.h lexer.h
+$(OBJ): util.h read.h print.h

 cirilisp: $(OBJ)
 	$(CC) -o $@ $(OBJ) $(LDFLAGS)
--- a/cirilisp.c
+++ b/cirilisp.c
@ -2,8 +2,9 @@
 #include <stdio.h>
 #include <stdlib.h>

-#include "readline.h"
-#include "lexer.h"
+#include "util.h"
+#include "read.h"
+#include "print.h"

 int main(int argc, char **argv)
 {
@ -15,29 +16,11 @@ int main(int argc, char **argv)
 		exit(0);
 	}

-	while (1)
+	for (;;)
 	{
-		char *input = readline("Л> ");
-		if (input == NULL)
-		{
-			putchar('\n');
-			printf("Крај улазног тока.\n");
-			// Превод
-			exit(0);
-		}
-
-		token *tokenList, *current;
-
-		current = tokenList = lexLine(input);
-		int i = 0;
-		while (current != NULL)
-		{
-			printf("Токен бр. %d: \"%s\", тип:%s\n", i, current->lexeme, current->type == numberToken ? "number" : (current->type == symbolToken ? "symbol" : "parenthesis"));
-			current = current->next;
-			++i;
-		}
-
-		freeLexedLine(tokenList);
-		free(input);
+		print(read("ШКЉ> "));
+		/* append(&tokenList, lexLine(readline()));
+		printTokenList(tokenList); */
 	}
 }
+
--- a/lexer.c
+++ b/lexer.c
@ -1,120 +0,0 @@
-#include <stdio.h>
-#include <stdlib.h>
-#include <string.h>
-#include <regex.h>
-
-#include "lexer.h"
-
-token *lex1Token(char *input, int *i);
-/* враћа показивач на једну token структуру, која означава један одређен токен,
-чита улазни стринг од i-тог карактера, и мења i тако да оно затим индексира
-следећи токен или крај стринга*/
-
-token *lexLine(char *input)
-{
-	int i = 0, n;
-	n = strlen(input);
-	token *root = NULL, **new;
-	new = &root;
-	while (i < n)
-	{
-		*new = lex1Token(input, &i);
-		if ((*new)->type == undefinedToken)
-		{
-/* уколико се у реду нађе токен који је лексички погрешан, штампа се место тог
-токена у реду и брише се цела листа, функција враћа NULL*/
-			fprintf(stderr, "Невалидан токен на месту %d\n", i);
-			new = &root;
-			while (*new != NULL)
-			{
-				free(root->lexeme);
-				new = &((*new)->next);
-				free(root);
-				root = *new;
-			}
-			return NULL;
-		}
-		new = &((*new)->next);
-	}
-	return root;
-}
-
-regex_t regNumber, regSymbol, regLParenthesis, regRParenthesis, regSpace;
-
-token *lex1Token(char *input, int *i)
-{
-	token *result = malloc(sizeof(token));
-	result->next = NULL;
-
-	regcomp(&regSpace, "^[[:space:]]*", REG_EXTENDED);
-	regcomp(&regNumber, "^[-+]?[[:digit:]]+", REG_EXTENDED);
-	regcomp(&regSymbol, "^[-+/*]", REG_EXTENDED);
-/* за сада подржава само симболе -, +, * и / */
-	regcomp(&regLParenthesis, "^(", REG_EXTENDED);
-	regcomp(&regRParenthesis, "^)", REG_EXTENDED);
-
-	const int nmatches = 1;
-	regmatch_t a[nmatches];
-
-	regexec(&regSpace, input + *i, nmatches, a, 0);
-	*i += a[0].rm_eo;
-/* помера индекс да би се игнорисали почетни "вајт-спејс" карактери */
-
-	if (!regexec(&regNumber, input + *i, nmatches, a, 0))
-	{
-		result->type = numberToken;
-	}
-	else if (!regexec(&regSymbol, input + *i, nmatches, a, 0))
-	{
-		result->type = symbolToken;
-	}
-	else if (!regexec(&regLParenthesis, input + *i, nmatches, a, 0))
-	{
-		result->type = LparenthesisToken;
-	}
-	else if (!regexec(&regRParenthesis, input + *i, nmatches, a, 0))
-	{
-		result->type = RparenthesisToken;
-	}
-	else
-	{
-		result->type = undefinedToken;
-		result->lexeme = NULL;
-		goto skipStringCopy;
-	}
-	result->lexeme = malloc((a[0].rm_eo + 1) * sizeof(char));
-	strncpy(result->lexeme, input + *i, a[0].rm_eo);
-	result->lexeme[a[0].rm_eo] = '\0';
-	*i += a[0].rm_eo;
-
-	regexec(&regSpace, input + *i, nmatches, a, 0);
-	*i += a[0].rm_eo;
-/* игнорисање крајњих вајт-спејс карактера */
-
-	skipStringCopy:
-	regfree(&regSpace);
-	regfree(&regNumber);
-	regfree(&regSymbol);
-	regfree(&regLParenthesis);
-	regfree(&regRParenthesis);
-
-	return result;
-}
-
-void freeLexedLine(token *list)
-{
-	if (list == NULL)
-	{
-		return;
-	}
-	else
-	{
-		freeLexedLine(list->next);
-		if (list->lexeme != NULL)
-		{
-			free(list->lexeme);
-		}
-		free(list);
-		return;
-	}
-}
--- a/lexer.h
+++ b/lexer.h
@ -1,23 +0,0 @@
-#pragma once
-
-typedef enum
-{
-	undefinedToken,
-	numberToken,
-	symbolToken,
-	lParenthesisToken,
-	rParenthesisToken
-} tokenType;
-
-typedef struct _Token
-{
-	tokenType type;
-	char *lexeme;
-	struct _Token *next;
-} token;
-
-token *lexLine(char *input);
-/* као аргумент добија ред са стандардног улаза, а као излаз
-враћа лексичке елементе у повезаној листи */
-void freeLexedLine(token *list);
-/* ослобађа меморију коју је заузела листа токена узета из корисничког улаза */
--- a/print.c
+++ b/print.c
@ -0,0 +1,36 @@
+#include <stdio.h>
+
+#include "util.h"
+
+void printValue(object input);
+
+void print(object input)
+{
+	printf("\n; Value: ");
+	printValue(input);
+	printf("\n\n");
+}
+
+void printValue(object input)
+{
+	if (input.type == nilObject)
+	{
+		printf("nil");
+	}
+	else if (input.type == numberObject)
+	{
+		printf("%lld", *((long long *) input.address));
+	}
+	else if (input.type == symbolObject)
+	{
+		printf("%s", (char *) input.address);
+	}
+	else if (input.type == consObject)
+	{
+		printf("(");
+		printValue(((cons *) input.address)->car);
+		printf(" . ");
+		printValue(((cons *) input.address)->cdr);
+		printf(")");
+	}
+}
--- a/print.h
+++ b/print.h
@ -0,0 +1,3 @@
+#pragma once
+
+void print(object input);
--- a/read.c
+++ b/read.c
@ -0,0 +1,273 @@
+#define _POSIX_C_SOURCE 200809L
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <regex.h>
+
+#include "util.h"
+#include "read.h"
+#include "print.h"
+
+typedef enum
+{
+	undefinedToken,
+	numberToken,
+	symbolToken,
+	lParenthesisToken,
+	rParenthesisToken
+} tokenType;
+
+typedef struct _Token
+{
+	tokenType type;
+	char *lexeme;
+	struct _Token *next;
+} token;
+
+int completeSExpr(token **tokenList);
+char *readline();
+void append(token **head, token *appendix);
+token *lexLine(char *input);
+object parseObject(token **inputList);
+
+token *tokenList = NULL;
+
+object read(char *prompt)
+{
+	printf("%s", prompt);
+
+	while (!completeSExpr(&tokenList))
+	{
+		char *input = readline();
+		if (input == NULL) /* унесен је EOF сигнал */
+		{
+			printf("\nКрај улазног стрима.\n");
+			exit(0);
+		}
+		append(&tokenList, lexLine(input));
+	}
+
+	return parseObject(&tokenList);
+}
+
+int completeSExpr(token **tokenList)
+{
+	int result = 0, indentLevel = 0;
+	token *current = *tokenList;
+
+	while (current != NULL)
+	{
+		if (current->type == lParenthesisToken)
+		{
+			++indentLevel;
+		}
+		else if (current->type == rParenthesisToken)
+		{
+			if (indentLevel == 0)
+			{
+				token **deleteParen = tokenList;
+				while (*deleteParen != current)
+				{
+					deleteParen = &(*deleteParen)->next;
+				}
+				*deleteParen = current->next;
+				free(current);
+				current = *deleteParen;
+				continue;
+			}
+			else
+			{
+				if (indentLevel == 1)
+				{
+					result = 1;
+				}
+				--indentLevel;
+			}
+		}
+		else
+		{
+			if (indentLevel == 0)
+			{
+				result = 1;
+			}
+		}
+		current = current->next;
+	}
+	return result;
+}
+
+ssize_t bytesRead;
+size_t nbytes = 2048;
+char *buffer = NULL;
+
+char *readline()
+{
+	if (buffer == NULL)
+	{
+		buffer = (char *) malloc(nbytes + 1);
+	}
+	bytesRead = getline(&buffer, &nbytes, stdin);
+	if (bytesRead == -1)
+	{
+		return NULL;
+	}
+	
+	char *cpy = malloc(strlen(buffer)+1);
+	strcpy(cpy, buffer);
+	cpy[strlen(cpy)-1] = '\0';
+	// Уклања завршни њу-лајн или ЕОФ у стрингу и копира га на ново место
+
+	return cpy;
+}
+
+void append(token **head, token *tail)
+{
+	token **current = head;
+
+	while (*current != NULL)
+	{
+		current = &(*current)->next;
+	}
+	*current = tail;
+}
+
+regex_t regNumber, regSymbol, regLParenthesis, regRParenthesis, regSpace;
+
+token *lex1Token(char *input, int *i)
+{
+	token *result = malloc(sizeof(token));
+	result->next = NULL;
+
+	regcomp(&regSpace, "^[[:space:]]*", REG_EXTENDED);
+
+	regcomp(&regNumber, "^[-+]?[[:digit:]]+", REG_EXTENDED);
+	regcomp(&regSymbol, "^[-+/*]", REG_EXTENDED);
+/* за сада подржава само симболе -, +, * и / */
+	regcomp(&regLParenthesis, "^\\(", REG_EXTENDED);
+	regcomp(&regRParenthesis, "^\\)", REG_EXTENDED);
+
+	const int nmatches = 1;
+	regmatch_t a[nmatches];
+
+	regexec(&regSpace, input + *i, nmatches, a, 0);
+	*i += a[0].rm_eo;
+/* помера индекс да би се игнорисали почетни "вајт-спејс" карактери */
+
+	if (!regexec(&regNumber, input + *i, nmatches, a, 0))
+	{
+		result->type = numberToken;
+	}
+	else if (!regexec(&regSymbol, input + *i, nmatches, a, 0))
+	{
+		result->type = symbolToken;
+	}
+	else if (!regexec(&regLParenthesis, input + *i, nmatches, a, 0))
+	{
+		result->type = lParenthesisToken;
+	}
+	else if (!regexec(&regRParenthesis, input + *i, nmatches, a, 0))
+	{
+		result->type = rParenthesisToken;
+	}
+	else
+	{
+		result->type = undefinedToken;
+		result->lexeme = NULL;
+		goto skipStringCopy;
+	}
+	result->lexeme = malloc((a[0].rm_eo + 1) * sizeof(char));
+	strncpy(result->lexeme, input + *i, a[0].rm_eo);
+	result->lexeme[a[0].rm_eo] = '\0';
+	*i += a[0].rm_eo;
+
+	regexec(&regSpace, input + *i, nmatches, a, 0);
+	*i += a[0].rm_eo;
+/* игнорисање крајњих вајт-спејс карактера */
+
+	skipStringCopy:
+	regfree(&regSpace);
+	regfree(&regNumber);
+	regfree(&regSymbol);
+	regfree(&regLParenthesis);
+	regfree(&regRParenthesis);
+
+	return result;
+}
+
+token *lexLine(char *input)
+{
+	int i = 0, n;
+	n = strlen(input);
+	token *root = NULL, **new;
+	new = &root;
+	while (i < n)
+	{
+		*new = lex1Token(input, &i);
+		if ((*new)->type == undefinedToken)
+		{
+/* уколико се у реду нађе токен који је лексички погрешан, штампа се место тог
+токена у реду и бришу се сви токени нађени у реду, функција враћа NULL*/
+			fprintf(stderr, "Невалидан токен на месту %d\n", i);
+			new = &root;
+			while (*new != NULL)
+			{
+				free(root->lexeme);
+				new = &((*new)->next);
+				free(root);
+				root = *new;
+			}
+			return NULL;
+		}
+		new = &((*new)->next);
+	}
+	return root;
+}
+
+object parseObject(token **inputList)
+{
+	object result;
+
+	token input = **inputList;
+	free(*inputList);
+	*inputList = input.next;
+
+	if (input.type == numberToken)
+	{
+		result.type = numberObject;
+		result.address = malloc(sizeof(long long int));
+		*((long long *) result.address) = atoll(input.lexeme);
+		return result;
+	}
+	else if (input.type == symbolToken)
+	{
+		result.type = symbolObject;
+		result.address = malloc((strlen(input.lexeme) + 1)
+					 * sizeof(char));
+		strcpy((char *) result.address, input.lexeme);
+		return result;
+	}
+	else if (input.type == lParenthesisToken)
+	{
+		object *listCurrent = &result;
+
+		while ((*inputList)->type != rParenthesisToken)
+		{
+			listCurrent->type = consObject;
+			listCurrent->address = malloc(sizeof(cons));
+
+			((cons *) listCurrent->address)->car =
+				parseObject(inputList);
+
+			listCurrent = &(((cons *) listCurrent->address)->cdr);
+		}
+
+		(*listCurrent).type = nilObject;
+		(*listCurrent).address = NULL;
+
+		input = **inputList;
+		free(*inputList);
+		*inputList = input.next;
+	}
+
+	return result;
+}
--- a/read.h
+++ b/read.h
@ -0,0 +1,3 @@
+#pragma once
+
+object read(char *prompt);
--- a/readline.c
+++ b/readline.c
@ -1,31 +0,0 @@
-#define _POSIX_C_SOURCE 200809L
-#include <stdio.h>
-#include <string.h>
-#include <stdlib.h>
-
-#include "util.h"
-
-ssize_t _bytesRead;
-size_t _nbytes = 2048;
-char *_buffer = NULL;
-
-char *readline(char *prompt)
-{
-	if (_buffer == NULL)
-	{
-		_buffer = (char *) malloc(_nbytes + 1);
-	}
-	fputs(prompt, stdout);
-	_bytesRead = getline(&_buffer, &_nbytes, stdin);
-	if (_bytesRead == -1)
-	{
-		return NULL;
-	}
-	
-	char *cpy = malloc(strlen(_buffer)+1);
-	strcpy(cpy, _buffer);
-	cpy[strlen(cpy)-1] = '\0';
-	// Уклања завршни њу-лајн или ЕОФ у стрингу и копира га на ново место
-
-	return cpy;
-}
--- a/readline.h
+++ b/readline.h
@ -1,3 +0,0 @@
-#pragma once
-
-char *readline(char *prompt);