cirilisp/read.c

508 lines
8.9 KiB
C
Raw Normal View History

#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>
#include <wctype.h>
#include <ctype.h>
#include <string.h>
#include <regex.h>
#include "util.h"
#include "read.h"
int isSerbAlpha(wchar_t c);
int isConstituent(wchar_t c);
int isMacroC(wchar_t c);
int isEscape(wchar_t c);
wint_t scanwc(FILE *stream);
wint_t unscanwc(wint_t c, FILE *stream);
object getToken();
object macroFunction(wchar_t m);
object read(char *prompt)
{
printf("%s", prompt);
wint_t c;
object result;
while (iswspace(c = scanwc(stdin)))
;
if (c == WEOF)
{
TYPE(result) = EOFObject;
}
else if (isMacroC(c))
{
result = macroFunction(c);
}
else if (isEscape(c) || isConstituent(c))
{
unscanwc(c, stdin);
result = getToken();
}
else
{
SIGERR(invalidCharacterError);
}
if (TYPE(result) == unspecifiedObject)
{
return read("");
/* уколико улаз функције није прави објекат (на пример уколико је учитан
* коментар) покушавамо прочитати опет */
}
else
{
return result;
}
}
int isSerbAlpha(wchar_t c)
{
return ((c) == L'Ђ') || (((c) >= L'Ј') && ((c) <= L'Ћ')) ||
(((c) >= L'Џ') && ((c) <= L'И')) || (((c) >= L'К') &&
((c) <= L'Ш')) || (((c) >= L'а') && ((c) <= L'и')) ||
(((c) >= L'к') && ((c) <= L'ш')) || ((c) == L'ђ') ||
((c >= L'ј') && (c <= L'ћ')) || (c == L'џ');
}
int isConstituent(wchar_t c)
{
return isSerbAlpha(c) || iswdigit(c) || ((c) == L'!') || ((c) == L'$')
|| ((c) == L'&') || ((c) == L'*') || ((c) == L'+') ||
(((c) >= L'-') && ((c) <= L'/')) || (((c) >= L'<') &&
((c) <= L'@')) || ((c) == L'^') || ((c) == L'\\') ||
((c) == L'_') || ((c) == L'~') || ((c) == L',');
}
int isMacroC(wchar_t c)
{
return ((c) == L'"') || ((c) == L'#') || ((c) == L'\'') ||
((c) == L'(') || ((c) == L')') || ((c) == L';') ||
((c) == L'`');
}
int isEscape(wchar_t c)
{
return (c) == L'|';
}
int bufferSize = 1024;
wchar_t *globalBuffer = NULL;
2019-01-22 16:22:00 +01:00
wchar_t *getBuffer()
{
if (globalBuffer == NULL)
2019-01-22 16:22:00 +01:00
{
globalBuffer = malloc(bufferSize * sizeof(wchar_t));
2019-01-22 16:22:00 +01:00
}
return globalBuffer;
}
wchar_t *increaseBuffer()
{
bufferSize += 1024;
return realloc(globalBuffer, bufferSize);
}
int eofStatus = 0;
wint_t scanwc(FILE *stream)
{
if (eofStatus)
2019-01-22 16:22:00 +01:00
{
return WEOF;
2019-01-22 16:22:00 +01:00
}
else
{
wint_t c = getwc(stream);
if (c == WEOF)
{
eofStatus = 1;
}
return c;
}
}
wint_t unscanwc(wint_t c, FILE *stream)
{
if (c == WEOF)
{
eofStatus = 1;
return WEOF;
}
else
{
return ungetwc(c, stream);
}
}
int lengthDigitArray(char *s)
{
int i;
for (i = 0; isdigit(s[i]); ++i)
;
return i;
2019-01-22 16:22:00 +01:00
}
object getToken()
{
object result;
wchar_t *buffer = getBuffer();
wint_t c;
int i = 0;
c = scanwc(stdin);
buffer[0] = towlower(c);
if (isEscape(c))
{
while ((c = scanwc(stdin)) != WEOF && !isEscape(c))
{
if (i + 2 >= bufferSize)
{
increaseBuffer();
}
buffer[++i] = c;
}
buffer[++i] = c;
buffer[++i] = L'\0';
if (c == WEOF)
{
SIGERR(unexpectedEOFError);
}
}
else
{
while (isConstituent(c = scanwc(stdin)))
{
if (i + 1 >= bufferSize)
{
increaseBuffer();
}
buffer[++i] = towlower(c);
}
unscanwc(c, stdin);
buffer[++i] = L'\0';
}
int n = wcstombs(NULL, buffer, 0) + 1;
char *s = malloc(n * sizeof(char));
wcstombs(s, buffer, n);
regex_t regNumberFrac, regNumberReal;
regcomp(&regNumberFrac, "^[-+]?[[:digit:]]+(/[[:digit:]]+)?$",
REG_EXTENDED);
regcomp(&regNumberReal, "^[-+]?[[:digit:]]*,[[:digit:]]+$",
REG_EXTENDED);
const int nmatches = 1;
regmatch_t a[nmatches];
if (!regexec(&regNumberFrac, s, nmatches, a, 0))
{
TYPE(result) = numberObject;
NUM_TYPE(result) = fractionNum;
NUM_NUMER(result) = atoll(s);
char *tmp;
NUM_DENOM(result) = (tmp = strchr(s, '/')) == NULL ?
1 : atoll(tmp + 1);
result = shortenFractionNum(result);
}
else if (!regexec(&regNumberReal, s, nmatches, a, 0))
{
TYPE(result) = numberObject;
NUM_TYPE(result) = realNum;
NUM_REAL(result) = strtold(s, NULL);
}
else
{
TYPE(result) = symbolObject;
SYM(result) = malloc((strlen(s) + 1) * sizeof(char));
strcpy(SYM(result), s);
}
regfree(&regNumberFrac);
regfree(&regNumberReal);
return result;
}
wchar_t escapedWChar(wchar_t c)
{
switch (c)
{
case L'n':
return L'\n';
break;
case L't':
return L'\t';
break;
case L'\\':
return L'\\';
break;
case L'"':
return L'"';
break;
default:
return c;
break;
}
}
object dispatchedChar(wint_t c)
{
object result;
switch (c)
{
case L'\\':
TYPE(result) = charObject;
wchar_t *buffer = getBuffer();
int i = 0, n;
c = scanwc(stdin);
if (c == WEOF)
{
SIGERR(unexpectedEOFError);
}
if (!isConstituent(c))
{
CHR(result) = c;
}
else
{
unscanwc(c, stdin);
while ((c = scanwc(stdin)) != WEOF && !iswspace(c))
{
if (i + 1 >= bufferSize)
{
increaseBuffer();
}
buffer[i++] = c;
}
buffer[i] = L'\0';
n = wcslen(buffer);
if (n == 1)
{
CHR(result) = buffer[0];
}
else if (!wcscmp(buffer, L"размак"))
{
CHR(result) = L' ';
}
else if (!wcscmp(buffer, L"новиред"))
{
CHR(result) = L'\n';
}
else if (!wcscmp(buffer, L"табулар"))
{
CHR(result) = L'\t';
}
else
{
SIGERR(invalidHashSequenceError);
}
}
break;
case L'И':
case L'и':
TYPE(result) = boolObject;
BOOL(result) = 1;
break;
case L'Л':
case L'л':
TYPE(result) = boolObject;
BOOL(result) = 0;
break;
case L'|':
for (;;)
{
if (((c = scanwc(stdin)) == L'|' &&
(c = scanwc(stdin)) == L'#') || c == WEOF)
{
break;
}
}
TYPE(result) = unspecifiedObject;
break;
case WEOF:
SIGERR(unexpectedEOFError);
default:
SIGERR(invalidHashSequenceError);
break;
}
return result;
}
object macroFunction(wchar_t m)
{
object result;
object *listCurrent;
object expression;
wchar_t *buffer;
wint_t c;
switch (m)
{
case L'(':
listCurrent = &result;
for (;;)
{
object currentObject = read("");
if (TYPE(currentObject) == errorObject &&
ERR(currentObject) == unmatchedParenError)
{
TYPE(*listCurrent) = nilObject;
break;
}
else if (TYPE(currentObject) == EOFObject)
{
TYPE(*listCurrent) = nilObject;
deleteObject(result);
SIGERR(unexpectedEOFError);
break;
}
else
{
TYPE(*listCurrent) = consObject;
CONS(*listCurrent) = malloc(sizeof(cons));
CAR(*listCurrent) = copyObject(currentObject);
listCurrent = &CDR(*listCurrent);
}
deleteObject(currentObject);
}
int noErrors = 1;
listCurrent = &result;
while (TYPE(*listCurrent) != nilObject)
{
if (TYPE(CAR(*listCurrent)) == errorObject)
{
noErrors = 0;
break;
}
listCurrent = &CDR(*listCurrent);
}
if (!noErrors)
{
object error = copyObject(CAR(*listCurrent));
deleteObject(result);
SIGERR(ERR(error));
}
int properDotComb = 1, dotPlace = -1, length;
object *dot;
for (length = 0, listCurrent = &result; TYPE(*listCurrent) !=
nilObject; ++length, listCurrent = &CDR(*listCurrent))
{
if (TYPE(CAR(*listCurrent)) == symbolObject &&
!strcmp(SYM(CAR(*listCurrent)), "."))
{
if (dotPlace == -1)
{
dotPlace = length;
dot = listCurrent;
}
else
{
properDotComb = 0;
break;
}
}
}
if (dotPlace != -1)
{
if (dotPlace != length - 2 || dotPlace == 0)
{
properDotComb = 0;
}
}
if (!properDotComb)
{
SIGERR(improperDotNotation);
}
if (dotPlace != -1 && properDotComb)
{
object tmp = copyObject(CAR(CDR(*dot)));
deleteObject(*dot);
*dot = tmp;
}
return result;
break;
case L')':
SIGERR(unmatchedParenError);
break;
case L'\'':
case L'`':
expression = read("");
if (TYPE(expression) == errorObject)
{
SIGERR(ERR(expression));
}
else if (TYPE(expression) == EOFObject)
{
SIGERR(unexpectedEOFError);
}
TYPE(result) = consObject;
CONS(result) = malloc(sizeof(cons));
TYPE(CAR(result)) = symbolObject;
SYM(CAR(result)) = malloc((strlen("навод") + 1) *
sizeof(char));
strcpy(SYM(CAR(result)), "навод");
TYPE(CDR(result)) = consObject;
CONS(CDR(result)) = malloc(sizeof(cons));
CAR(CDR(result)) = expression;
TYPE(CDR(CDR(result))) = nilObject;
break;
case L';':
TYPE(result) = unspecifiedObject;
while ((c = scanwc(stdin)) != L'\n' && c != WEOF)
;
break;
case L'"':
buffer = getBuffer();
int i = 0;
while ((c = scanwc(stdin)) != L'"' && c != WEOF)
{
if (i + 2 >= bufferSize)
{
increaseBuffer();
}
if (c == L'\\')
{
c = scanwc(stdin);
buffer[i++] = escapedWChar(c);
}
else
{
buffer[i++] = c;
}
}
if (c == WEOF)
{
SIGERR(unexpectedEOFError);
}
buffer[i] = L'\0';
int n = wcstombs(NULL, buffer, 0) + 1;
char *s = malloc(n * sizeof(char));
wcstombs(s, buffer, n);
TYPE(result) = stringObject;
STR(result) = s;
break;
case L'#':
result = dispatchedChar(scanwc(stdin));
break;
}
return result;
}