cirilisp/read.c

273 lines
5.6 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#define _POSIX_C_SOURCE 200809L
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>
#include "util.h"
#include "read.h"
#include "print.h"
typedef enum
{
undefinedToken,
numberToken,
symbolToken,
lParenthesisToken,
rParenthesisToken
} tokenType;
typedef struct _Token
{
tokenType type;
char *lexeme;
struct _Token *next;
} token;
int completeExpression(token **tokenQueue);
char *readline();
void append(token **head, token *appendix);
token *lexLine(char *input);
object parseExpression(token **inputList);
token *tokenQueue = NULL;
object read(char *prompt)
{
printf("%s", prompt);
while (!completeExpression(&tokenQueue))
{
char *input = readline();
if (input == NULL) /* унесен је EOF */
{
printf("\nКрај улазног стрима.\n");
exit(0);
}
append(&tokenQueue, lexLine(input));
}
return parseExpression(&tokenQueue);
}
int completeExpression(token **tokenQueue)
{
int result = 0, indentLevel = 0;
token *current = *tokenQueue;
while (current != NULL)
{
if (current->type == lParenthesisToken)
{
++indentLevel;
}
else if (current->type == rParenthesisToken)
{
if (indentLevel == 0)
{
token **deleteParen = tokenQueue;
while (*deleteParen != current)
{
deleteParen = &(*deleteParen)->next;
}
*deleteParen = current->next;
free(current);
current = *deleteParen;
continue;
}
else
{
if (indentLevel == 1)
{
result = 1;
}
--indentLevel;
}
}
else
{
if (indentLevel == 0)
{
result = 1;
}
}
current = current->next;
}
return result;
}
ssize_t bytesRead;
size_t nbytes = 2048;
char *buffer = NULL;
char *readline()
{
if (buffer == NULL)
{
buffer = (char *) malloc(nbytes + 1);
}
bytesRead = getline(&buffer, &nbytes, stdin);
if (bytesRead == -1)
{
return NULL;
}
buffer[strlen(buffer)-1] = '\0';
/* Уклања завршни њу-лајн или ЕОФ у стрингу
* и копира стринг на ново место */
return buffer;
}
void append(token **head, token *tail)
{
token **current = head;
while (*current != NULL)
{
current = &(*current)->next;
}
*current = tail;
}
regex_t regNumber, regSymbol, regLParenthesis, regRParenthesis, regSpace;
token *lex1Token(char *input, int *i)
{
token *result = malloc(sizeof(token));
result->next = NULL;
regcomp(&regSpace, "^[[:space:]]*", REG_EXTENDED);
regcomp(&regNumber, "^[-+]?[[:digit:]]+", REG_EXTENDED);
regcomp(&regSymbol, "^[-+/*[:alpha:]][-+/*[:alnum:]]*", REG_EXTENDED);
regcomp(&regLParenthesis, "^\\(", REG_EXTENDED);
regcomp(&regRParenthesis, "^\\)", REG_EXTENDED);
const int nmatches = 1;
regmatch_t a[nmatches];
regexec(&regSpace, input + *i, nmatches, a, 0);
*i += a[0].rm_eo;
/* помера индекс да би се игнорисали почетни "вајт-спејс" карактери */
if (!regexec(&regNumber, input + *i, nmatches, a, 0))
{
result->type = numberToken;
}
else if (!regexec(&regSymbol, input + *i, nmatches, a, 0))
{
result->type = symbolToken;
}
else if (!regexec(&regLParenthesis, input + *i, nmatches, a, 0))
{
result->type = lParenthesisToken;
}
else if (!regexec(&regRParenthesis, input + *i, nmatches, a, 0))
{
result->type = rParenthesisToken;
}
else
{
result->type = undefinedToken;
result->lexeme = NULL;
goto skipStringCopy;
}
result->lexeme = malloc((a[0].rm_eo + 1) * sizeof(char));
strncpy(result->lexeme, input + *i, a[0].rm_eo);
result->lexeme[a[0].rm_eo] = '\0';
*i += a[0].rm_eo;
regexec(&regSpace, input + *i, nmatches, a, 0);
*i += a[0].rm_eo;
/* игнорисање крајњих вајт-спејс карактера */
skipStringCopy:
regfree(&regSpace);
regfree(&regNumber);
regfree(&regSymbol);
regfree(&regLParenthesis);
regfree(&regRParenthesis);
return result;
}
token *lexLine(char *input)
{
int i = 0, n;
n = strlen(input);
token *root = NULL, **new;
new = &root;
while (i < n)
{
*new = lex1Token(input, &i);
if ((*new)->type == undefinedToken)
{
/* уколико се у реду нађе токен који је лексички погрешан, штампа се место тог
* токена у реду и бришу се сви токени нађени у реду, функција враћа NULL*/
fprintf(stderr, "Невалидан токен на месту %d\n", i);
new = &root;
while (*new != NULL)
{
free(root->lexeme);
new = &((*new)->next);
free(root);
root = *new;
}
return NULL;
}
new = &((*new)->next);
}
return root;
}
object parseExpression(token **inputList)
{
object result;
token input = **inputList;
free(*inputList);
*inputList = input.next;
/* скида први преостали токен са листе унесених, да би се могао
* прерадити */
if (input.type == numberToken)
{
TYPE(result) = numberObject;
NUM(result) = atoll(input.lexeme);
return result;
}
else if (input.type == symbolToken)
{
TYPE(result) = symbolObject;
SYM(result) = malloc((strlen(input.lexeme) + 1)
* sizeof(char));
/* Алокација стринга */
strcpy(SYM(result), input.lexeme);
return result;
}
else if (input.type == lParenthesisToken)
{
object *listCurrent = &result;
while ((*inputList)->type != rParenthesisToken)
{
TYPE(*listCurrent) = consObject;
CONS(*listCurrent) = malloc(sizeof(cons));
/* Алокација конс ћелије */
CAR(*listCurrent) = parseExpression(inputList);
listCurrent = &CDR(*listCurrent);
}
TYPE(*listCurrent) = nilObject;
input = **inputList;
free(*inputList);
*inputList = input.next;
}
return result;
}