cirilisp/read.c

532 lines
9 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>
#include <wctype.h>
#include <ctype.h>
#include <string.h>
#include <regex.h>
#include "util.h"
#include "read.h"
int isSerbAlpha(wchar_t c);
int isConstituent(wchar_t c);
int isMacroC(wchar_t c);
int isEscape(wchar_t c);
wchar_t scanwc(FILE *stream);
#define unscanwc(c,stream) ungetwc(c,stream);
object getToken();
object macroFunction(wchar_t m);
object read(char *prompt)
{
printf("%s", prompt);
wint_t c;
object result;
while (iswspace(c = scanwc(stdin)))
;
if (isMacroC(c))
{
result = macroFunction(c);
}
else if (isEscape(c) || isConstituent(c))
{
unscanwc(c, stdin);
result = getToken();
}
else
{
SIGERR(invalidCharacterError);
}
if (TYPE(result) == unspecifiedObject)
{
return read("");
/* уколико улаз функције није прави објекат (на пример уколико је учитан
* коментар) покушавамо прочитати опет */
}
else
{
return result;
}
}
int isSerbAlpha(wchar_t c)
{
return ((c) == L'Ђ') || (((c) >= L'Ј') && ((c) <= L'Ћ')) ||
(((c) >= L'Џ') && ((c) <= L'И')) || (((c) >= L'К') &&
((c) <= L'Ш')) || (((c) >= L'а') && ((c) <= L'и')) ||
(((c) >= L'к') && ((c) <= L'ш')) || ((c) == L'ђ') ||
((c >= L'ј') && (c <= L'ћ')) || (c == L'џ');
}
int isConstituent(wchar_t c)
{
return isSerbAlpha(c) || iswdigit(c) || ((c) == L'!') || ((c) == L'$')
|| ((c) == L'&') || ((c) == L'*') || ((c) == L'+') ||
(((c) >= L'-') && ((c) <= L'/')) || (((c) >= L'<') &&
((c) <= L'@')) || ((c) == L'^') || ((c) == L'\\') ||
((c) == L'_') || ((c) == L'~') || ((c) == L',');
}
int isMacroC(wchar_t c)
{
return ((c) == L'"') || ((c) == L'#') || ((c) == L'\'') ||
((c) == L'(') || ((c) == L')') || ((c) == L';') ||
((c) == L'`');
}
int isEscape(wchar_t c)
{
return (c) == L'|';
}
int bufferSize = 1024;
wchar_t *globalBuffer = NULL;
wchar_t *getBuffer()
{
if (globalBuffer == NULL)
{
globalBuffer = malloc(bufferSize * sizeof(wchar_t));
}
return globalBuffer;
}
wchar_t *increaseBuffer()
{
bufferSize += 1024;
return realloc(globalBuffer, bufferSize);
}
wchar_t scanwc(FILE *stream)
{
wint_t c;
if ((c = fgetwc(stream)) == WEOF)
{
printf("\nКрај улазног стрима.\nВоЗдра и дођите нам опет!\n");
exit(0);
}
else
{
return c;
}
}
int lengthDigitArray(char *s)
{
int i;
for (i = 0; isdigit(s[i]); ++i)
;
return i;
}
int isFracNumToken(char *s)
{
int digitNum1 = lengthDigitArray(s);
if (digitNum1 == 0)
{
return 0;
}
else if (s[digitNum1] == '\0')
{
return 1;
}
else if (s[digitNum1] == '/')
{
int digitNum2 = lengthDigitArray(s + digitNum1 + 1);
if (digitNum2 == 0)
{
return 0;
}
else if (s[digitNum1 + 1 + digitNum2] == '\0')
{
return 1;
}
else
{
return 0;
}
}
else
{
return 0;
}
}
int isRealNumToken(char *s)
{
int digitNum1 = lengthDigitArray(s);
if (digitNum1 == 0)
{
return 0;
}
else if (s[digitNum1] == '\0')
{
return 1;
}
else if (s[digitNum1] == ',')
{
int digitNum2 = lengthDigitArray(s + digitNum1 + 1);
if (digitNum2 == 0)
{
return 0;
}
else if (s[digitNum1 + 1 + digitNum2] == '\0')
{
return 1;
}
else
{
return 0;
}
}
else
{
return 0;
}
}
object getToken()
{
object result;
wchar_t *buffer = getBuffer();
wint_t c;
int i = 0;
buffer[0] = c = scanwc(stdin);
if (isEscape(c))
{
while (!isEscape(c = scanwc(stdin)))
{
if (i + 2 >= bufferSize)
{
increaseBuffer();
}
buffer[++i] = c;
}
buffer[++i] = c;
buffer[++i] = L'\0';
}
else
{
while (isConstituent(c = scanwc(stdin)))
{
if (i + 1 >= bufferSize)
{
increaseBuffer();
}
buffer[++i] = towlower(c);
}
unscanwc(c, stdin);
buffer[++i] = L'\0';
}
int n = wcstombs(NULL, buffer, 0) + 1;
char *s = malloc(n * sizeof(char));
wcstombs(s, buffer, n);
regex_t regNumberFrac, regNumberReal;
regcomp(&regNumberFrac, "^[-+]?[[:digit:]]+(/[[:digit:]]+)?$",
REG_EXTENDED);
regcomp(&regNumberReal, "^[-+]?[[:digit:]]*,[[:digit:]]+$",
REG_EXTENDED);
const int nmatches = 1;
regmatch_t a[nmatches];
if (!regexec(&regNumberFrac, s, nmatches, a, 0))
{
TYPE(result) = numberObject;
NUM_TYPE(result) = fractionNum;
NUM_NUMER(result) = atoll(s);
char *tmp;
NUM_DENOM(result) = (tmp = strchr(s, '/')) == NULL ?
1 : atoll(tmp + 1);
result = shortenFractionNum(result);
}
else if (!regexec(&regNumberReal, s, nmatches, a, 0))
{
TYPE(result) = numberObject;
NUM_TYPE(result) = realNum;
NUM_REAL(result) = strtold(s, NULL);
}
else
{
TYPE(result) = symbolObject;
SYM(result) = malloc((strlen(s) + 1) * sizeof(char));
strcpy(SYM(result), s);
}
regfree(&regNumberFrac);
regfree(&regNumberReal);
return result;
}
wchar_t escapedWChar(wchar_t c)
{
switch (c)
{
case L'n':
return L'\n';
break;
case L't':
return L'\t';
break;
case L'\\':
return L'\\';
break;
case L'"':
return L'"';
break;
default:
return c;
break;
}
}
object dispatchedChar(wchar_t c)
{
object result;
switch (c)
{
case L'\\':
TYPE(result) = charObject;
wchar_t *buffer = getBuffer();
int i = 0, n;
c = scanwc(stdin);
if (!isConstituent(c))
{
CHR(result) = c;
}
else
{
unscanwc(c, stdin);
while (!iswspace(c = scanwc(stdin)))
{
if (i + 1 >= bufferSize)
{
increaseBuffer();
}
buffer[i++] = c;
}
buffer[i] = L'\0';
n = wcslen(buffer);
if (n == 1)
{
CHR(result) = buffer[0];
}
else if (!wcscmp(buffer, L"размак"))
{
CHR(result) = L' ';
}
else if (!wcscmp(buffer, L"новиред"))
{
CHR(result) = L'\n';
}
else if (!wcscmp(buffer, L"табулар"))
{
CHR(result) = L'\t';
}
else
{
SIGERR(invalidHashSequenceError);
}
}
break;
case L'И':
case L'и':
TYPE(result) = boolObject;
BOOL(result) = 1;
break;
case L'Л':
case L'л':
TYPE(result) = boolObject;
BOOL(result) = 0;
break;
case L'|':
for (;;)
{
if ((c = scanwc(stdin)) == L'|' &&
(c = scanwc(stdin)) == L'#')
{
break;
}
}
TYPE(result) = unspecifiedObject;
break;
default:
SIGERR(invalidHashSequenceError);
break;
}
return result;
}
object macroFunction(wchar_t m)
{
object result;
object *listCurrent;
object expression;
wchar_t *buffer;
switch (m)
{
case L'(':
listCurrent = &result;
for (;;)
{
object currentObject = read("");
if (TYPE(currentObject) == errorObject &&
ERR(currentObject) == unmatchedParenError)
{
TYPE(*listCurrent) = nilObject;
break;
}
else
{
TYPE(*listCurrent) = consObject;
CONS(*listCurrent) = malloc(sizeof(cons));
CAR(*listCurrent) = copyObject(currentObject);
listCurrent = &CDR(*listCurrent);
}
deleteObject(currentObject);
}
int noErrors = 1;
listCurrent = &result;
while (TYPE(*listCurrent) != nilObject)
{
if (TYPE(CAR(*listCurrent)) == errorObject)
{
noErrors = 0;
break;
}
listCurrent = &CDR(*listCurrent);
}
if (!noErrors)
{
object error = copyObject(CAR(*listCurrent));
deleteObject(result);
SIGERR(ERR(error));
}
int properDotComb = 1, dotPlace = -1, length;
object *dot;
for (length = 0, listCurrent = &result; TYPE(*listCurrent) !=
nilObject; ++length, listCurrent = &CDR(*listCurrent))
{
if (TYPE(CAR(*listCurrent)) == symbolObject &&
!strcmp(SYM(CAR(*listCurrent)), "."))
{
if (dotPlace == -1)
{
dotPlace = length;
dot = listCurrent;
}
else
{
properDotComb = 0;
break;
}
}
}
if (dotPlace != -1)
{
if (dotPlace != length - 2 || dotPlace == 0)
{
properDotComb = 0;
}
}
if (!properDotComb)
{
SIGERR(improperDotNotation);
}
if (dotPlace != -1 && properDotComb)
{
object tmp = copyObject(CAR(CDR(*dot)));
deleteObject(*dot);
*dot = tmp;
}
return result;
break;
case L')':
SIGERR(unmatchedParenError);
break;
case L'\'':
case L'`':
expression = read("");
if (TYPE(expression) == errorObject)
{
return expression;
}
TYPE(result) = consObject;
CONS(result) = malloc(sizeof(cons));
TYPE(CAR(result)) = symbolObject;
SYM(CAR(result)) = malloc((strlen("навод") + 1) *
sizeof(char));
strcpy(SYM(CAR(result)), "навод");
TYPE(CDR(result)) = consObject;
CONS(CDR(result)) = malloc(sizeof(cons));
CAR(CDR(result)) = expression;
TYPE(CDR(CDR(result))) = nilObject;
break;
case L';':
TYPE(result) = unspecifiedObject;
while (scanwc(stdin) != L'\n')
;
break;
case L'"':
buffer = getBuffer();
wchar_t c;
int i = 0;
while ((c = scanwc(stdin)) != L'"')
{
if (i + 2 >= bufferSize)
{
increaseBuffer();
}
if (c == L'\\')
{
c = scanwc(stdin);
buffer[i++] = escapedWChar(c);
}
else
{
buffer[i++] = c;
}
}
buffer[i] = L'\0';
int n = wcstombs(NULL, buffer, 0) + 1;
char *s = malloc(n * sizeof(char));
wcstombs(s, buffer, n);
TYPE(result) = stringObject;
STR(result) = s;
break;
case L'#':
result = dispatchedChar(scanwc(stdin));
break;
}
return result;
}