cirilisp/read.c

274 lines
5.6 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#define _POSIX_C_SOURCE 200809L
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>
#include "util.h"
#include "read.h"
#include "print.h"
typedef enum
{
undefinedToken,
numberToken,
symbolToken,
lParenthesisToken,
rParenthesisToken
} tokenType;
typedef struct _Token
{
tokenType type;
char *lexeme;
struct _Token *next;
} token;
int completeSExpr(token **tokenList);
char *readline();
void append(token **head, token *appendix);
token *lexLine(char *input);
object parseObject(token **inputList);
token *tokenList = NULL;
object read(char *prompt)
{
printf("%s", prompt);
while (!completeSExpr(&tokenList))
{
char *input = readline();
if (input == NULL) /* унесен је EOF сигнал */
{
printf("\nКрај улазног стрима.\n");
exit(0);
}
append(&tokenList, lexLine(input));
}
return parseObject(&tokenList);
}
int completeSExpr(token **tokenList)
{
int result = 0, indentLevel = 0;
token *current = *tokenList;
while (current != NULL)
{
if (current->type == lParenthesisToken)
{
++indentLevel;
}
else if (current->type == rParenthesisToken)
{
if (indentLevel == 0)
{
token **deleteParen = tokenList;
while (*deleteParen != current)
{
deleteParen = &(*deleteParen)->next;
}
*deleteParen = current->next;
free(current);
current = *deleteParen;
continue;
}
else
{
if (indentLevel == 1)
{
result = 1;
}
--indentLevel;
}
}
else
{
if (indentLevel == 0)
{
result = 1;
}
}
current = current->next;
}
return result;
}
ssize_t bytesRead;
size_t nbytes = 2048;
char *buffer = NULL;
char *readline()
{
if (buffer == NULL)
{
buffer = (char *) malloc(nbytes + 1);
}
bytesRead = getline(&buffer, &nbytes, stdin);
if (bytesRead == -1)
{
return NULL;
}
char *cpy = malloc(strlen(buffer)+1);
strcpy(cpy, buffer);
cpy[strlen(cpy)-1] = '\0';
// Уклања завршни њу-лајн или ЕОФ у стрингу и копира га на ново место
return cpy;
}
void append(token **head, token *tail)
{
token **current = head;
while (*current != NULL)
{
current = &(*current)->next;
}
*current = tail;
}
regex_t regNumber, regSymbol, regLParenthesis, regRParenthesis, regSpace;
token *lex1Token(char *input, int *i)
{
token *result = malloc(sizeof(token));
result->next = NULL;
regcomp(&regSpace, "^[[:space:]]*", REG_EXTENDED);
regcomp(&regNumber, "^[-+]?[[:digit:]]+", REG_EXTENDED);
regcomp(&regSymbol, "^[-+/*]", REG_EXTENDED);
/* за сада подржава само симболе -, +, * и / */
regcomp(&regLParenthesis, "^\\(", REG_EXTENDED);
regcomp(&regRParenthesis, "^\\)", REG_EXTENDED);
const int nmatches = 1;
regmatch_t a[nmatches];
regexec(&regSpace, input + *i, nmatches, a, 0);
*i += a[0].rm_eo;
/* помера индекс да би се игнорисали почетни "вајт-спејс" карактери */
if (!regexec(&regNumber, input + *i, nmatches, a, 0))
{
result->type = numberToken;
}
else if (!regexec(&regSymbol, input + *i, nmatches, a, 0))
{
result->type = symbolToken;
}
else if (!regexec(&regLParenthesis, input + *i, nmatches, a, 0))
{
result->type = lParenthesisToken;
}
else if (!regexec(&regRParenthesis, input + *i, nmatches, a, 0))
{
result->type = rParenthesisToken;
}
else
{
result->type = undefinedToken;
result->lexeme = NULL;
goto skipStringCopy;
}
result->lexeme = malloc((a[0].rm_eo + 1) * sizeof(char));
strncpy(result->lexeme, input + *i, a[0].rm_eo);
result->lexeme[a[0].rm_eo] = '\0';
*i += a[0].rm_eo;
regexec(&regSpace, input + *i, nmatches, a, 0);
*i += a[0].rm_eo;
/* игнорисање крајњих вајт-спејс карактера */
skipStringCopy:
regfree(&regSpace);
regfree(&regNumber);
regfree(&regSymbol);
regfree(&regLParenthesis);
regfree(&regRParenthesis);
return result;
}
token *lexLine(char *input)
{
int i = 0, n;
n = strlen(input);
token *root = NULL, **new;
new = &root;
while (i < n)
{
*new = lex1Token(input, &i);
if ((*new)->type == undefinedToken)
{
/* уколико се у реду нађе токен који је лексички погрешан, штампа се место тог
токена у реду и бришу се сви токени нађени у реду, функција враћа NULL*/
fprintf(stderr, "Невалидан токен на месту %d\n", i);
new = &root;
while (*new != NULL)
{
free(root->lexeme);
new = &((*new)->next);
free(root);
root = *new;
}
return NULL;
}
new = &((*new)->next);
}
return root;
}
object parseObject(token **inputList)
{
object result;
token input = **inputList;
free(*inputList);
*inputList = input.next;
if (input.type == numberToken)
{
result.type = numberObject;
result.address = malloc(sizeof(long long int));
*((long long *) result.address) = atoll(input.lexeme);
return result;
}
else if (input.type == symbolToken)
{
result.type = symbolObject;
result.address = malloc((strlen(input.lexeme) + 1)
* sizeof(char));
strcpy((char *) result.address, input.lexeme);
return result;
}
else if (input.type == lParenthesisToken)
{
object *listCurrent = &result;
while ((*inputList)->type != rParenthesisToken)
{
listCurrent->type = consObject;
listCurrent->address = malloc(sizeof(cons));
((cons *) listCurrent->address)->car =
parseObject(inputList);
listCurrent = &(((cons *) listCurrent->address)->cdr);
}
(*listCurrent).type = nilObject;
(*listCurrent).address = NULL;
input = **inputList;
free(*inputList);
*inputList = input.next;
}
return result;
}