cirilisp/read.c

553 lines
9.4 KiB
C
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>
#include <wctype.h>
#include <ctype.h>
#include <string.h>
#include "util.h"
#include "read.h"
int isSerbAlpha(wchar_t c);
int isConstituent(wchar_t c);
int isMacroC(wchar_t c);
int isEscape(wchar_t c);
wint_t scanwc(FILE *stream);
wint_t unscanwc(wint_t c, FILE *stream);
object getToken();
object macroFunction(wchar_t m, FILE *stream);
object Read(char *prompt, FILE *stream)
{
printf("%s", prompt);
wint_t c;
object result;
while (iswspace(c = scanwc(stream)))
;
if (c == WEOF)
{
TYPE(result) = EOFObject;
}
else if (isMacroC(c))
{
result = macroFunction(c, stream);
}
else if (isEscape(c) || isConstituent(c))
{
unscanwc(c, stream);
result = getToken(stream);
}
else
{
SIGERR(invalidCharacterError);
}
if (TYPE(result) == unspecifiedObject)
{
return Read("", stream);
/* уколико улаз функције није прави објекат (на пример уколико је учитан
* коментар) покушавамо прочитати опет */
}
else
{
return result;
}
}
int isSerbAlpha(wchar_t c)
{
return ((c) == L'Ђ') || (((c) >= L'Ј') && ((c) <= L'Ћ')) ||
(((c) >= L'Џ') && ((c) <= L'И')) || (((c) >= L'К') &&
((c) <= L'Ш')) || (((c) >= L'а') && ((c) <= L'и')) ||
(((c) >= L'к') && ((c) <= L'ш')) || ((c) == L'ђ') ||
((c >= L'ј') && (c <= L'ћ')) || (c == L'џ');
}
int isConstituent(wchar_t c)
{
return isSerbAlpha(c) || iswdigit(c) || ((c) == L'!') || ((c) == L'$')
|| ((c) == L'&') || ((c) == L'*') || ((c) == L'+') ||
(((c) >= L'-') && ((c) <= L'/')) || (((c) >= L'<') &&
((c) <= L'@')) || ((c) == L'^') || ((c) == L'\\') ||
((c) == L'_') || ((c) == L'~') || ((c) == L',');
}
int isMacroC(wchar_t c)
{
return ((c) == L'"') || ((c) == L'#') || ((c) == L'\'') ||
((c) == L'(') || ((c) == L')') || ((c) == L';') ||
((c) == L'`');
}
int isEscape(wchar_t c)
{
return (c) == L'|';
}
int bufferSize = 1024;
wchar_t *globalBuffer = NULL;
wchar_t *getBuffer()
{
if (globalBuffer == NULL)
{
globalBuffer = malloc(bufferSize * sizeof(wchar_t));
}
return globalBuffer;
}
wchar_t *increaseBuffer()
{
bufferSize += 1024;
return realloc(globalBuffer, bufferSize);
}
int eofStatus = 0;
wint_t scanwc(FILE *stream)
{
if (eofStatus)
{
return WEOF;
}
else
{
wint_t c = getwc(stream);
if (c == WEOF)
{
eofStatus = 1;
}
return c;
}
}
wint_t unscanwc(wint_t c, FILE *stream)
{
if (c == WEOF)
{
eofStatus = 1;
return WEOF;
}
else
{
return ungetwc(c, stream);
}
}
int validFracNum(char *s)
{
char *endptr;
strtoll(s, &endptr, 10);
if (*endptr == '\0' && endptr != s)
{
return 1;
}
else if (*endptr == '/' && endptr != s)
{
char *denom = endptr + 1;
strtoll(denom, &endptr, 10);
if (*endptr == '\0' && endptr != denom)
{
return 1;
}
else
{
return 0;
}
}
else
{
return 0;
}
}
int validRealNum(char *s)
{
char *endptr;
strtold(s, &endptr);
if (*endptr == '\0')
{
return 1;
}
else
{
return 0;
}
}
object getToken(FILE *stream)
{
object result;
wchar_t *buffer = getBuffer();
wint_t c;
int i = 0;
c = scanwc(stream);
buffer[0] = towlower(c);
if (isEscape(c))
{
while ((c = scanwc(stream)) != WEOF && !isEscape(c))
{
if (i + 2 >= bufferSize)
{
increaseBuffer();
}
buffer[++i] = c;
}
buffer[++i] = c;
buffer[++i] = L'\0';
if (c == WEOF)
{
SIGERR(unexpectedEOFError);
}
}
else
{
while (isConstituent(c = scanwc(stream)))
{
if (i + 1 >= bufferSize)
{
increaseBuffer();
}
buffer[++i] = towlower(c);
}
unscanwc(c, stream);
buffer[++i] = L'\0';
}
int n = wcstombs(NULL, buffer, 0) + 1;
char *s = malloc(n * sizeof(char));
wcstombs(s, buffer, n);
char *endptr;
if (validFracNum(s))
{
TYPE(result) = numberObject;
NUM_TYPE(result) = fractionNum;
NUM_NUMER(result) = strtoll(s, &endptr, 10);
NUM_DENOM(result) = *endptr == '/' ?
strtoll(endptr + 1, &endptr, 10) : 1;
result = shortenFractionNum(result);
}
else if (validRealNum(s))
{
TYPE(result) = numberObject;
NUM_TYPE(result) = realNum;
NUM_REAL(result) = strtold(s, NULL);
}
else
{
TYPE(result) = symbolObject;
SYM(result) = malloc((strlen(s) + 1) * sizeof(char));
strcpy(SYM(result), s);
}
free(s);
return result;
}
wchar_t escapedWChar(wchar_t c)
{
switch (c)
{
case L'n':
return L'\n';
break;
case L't':
return L'\t';
break;
case L'\\':
return L'\\';
break;
case L'"':
return L'"';
break;
default:
return c;
break;
}
}
object dispatchedChar(wint_t c, FILE *stream)
{
object result;
switch (c)
{
case L'\\':
TYPE(result) = charObject;
wchar_t *buffer = getBuffer();
int i = 0, n;
c = scanwc(stream);
if (c == WEOF)
{
SIGERR(unexpectedEOFError);
}
if (!isConstituent(c))
{
CHR(result) = c;
}
else
{
unscanwc(c, stream);
while ((c = scanwc(stream)) != WEOF &&
isConstituent(c))
{
if (i + 1 >= bufferSize)
{
increaseBuffer();
}
buffer[i++] = c;
}
unscanwc(c, stream);
buffer[i] = L'\0';
n = wcslen(buffer);
if (n == 1)
{
CHR(result) = buffer[0];
}
else if (!wcscmp(buffer, L"размак"))
{
CHR(result) = L' ';
}
else if (!wcscmp(buffer, L"новиред"))
{
CHR(result) = L'\n';
}
else if (!wcscmp(buffer, L"табулар"))
{
CHR(result) = L'\t';
}
else if (!wcscmp(buffer, L"нул"))
{
CHR(result) = L'\0';
}
else
{
SIGERR(invalidHashSequenceError);
}
}
break;
case L'И':
case L'и':
TYPE(result) = boolObject;
BOOL(result) = 1;
break;
case L'Л':
case L'л':
TYPE(result) = boolObject;
BOOL(result) = 0;
break;
case L'|':
for (;;)
{
if (((c = scanwc(stream)) == L'|' &&
(c = scanwc(stream)) == L'#') || c == WEOF)
{
break;
}
}
TYPE(result) = unspecifiedObject;
break;
case L'!':
for (;;)
{
if ((c = scanwc(stream)) == L'\n' || c == WEOF)
{
break;
}
}
TYPE(result) = unspecifiedObject;
break;
case WEOF:
SIGERR(unexpectedEOFError);
break;
default:
SIGERR(invalidHashSequenceError);
break;
}
return result;
}
object macroFunction(wchar_t m, FILE *stream)
{
object result;
object *listCurrent;
object expression;
wchar_t *buffer;
wint_t c;
switch (m)
{
case L'(':
listCurrent = &result;
for (;;)
{
object currentObject = Read("", stream);
if (TYPE(currentObject) == errorObject &&
!strcmp(ERR(currentObject),
commonErrs[unmatchedParenError]))
{
deleteObject(currentObject);
TYPE(*listCurrent) = nilObject;
break;
}
else if (TYPE(currentObject) == EOFObject)
{
TYPE(*listCurrent) = nilObject;
deleteObject(result);
SIGERR(unexpectedEOFError);
break;
}
else
{
TYPE(*listCurrent) = consObject;
CONS(*listCurrent) = malloc(sizeof(cons));
CAR(*listCurrent) = copyObject(currentObject);
listCurrent = &CDR(*listCurrent);
}
deleteObject(currentObject);
}
int noErrors = 1;
listCurrent = &result;
while (TYPE(*listCurrent) != nilObject)
{
if (TYPE(CAR(*listCurrent)) == errorObject)
{
noErrors = 0;
break;
}
listCurrent = &CDR(*listCurrent);
}
if (!noErrors)
{
object error = copyObject(CAR(*listCurrent));
deleteObject(result);
CPYERR(ERR(error));
}
int properDotComb = 1, dotPlace = -1, length;
object *dot;
for (length = 0, listCurrent = &result; TYPE(*listCurrent) !=
nilObject; ++length, listCurrent = &CDR(*listCurrent))
{
if (TYPE(CAR(*listCurrent)) == symbolObject &&
!strcmp(SYM(CAR(*listCurrent)), "."))
{
if (dotPlace == -1)
{
dotPlace = length;
dot = listCurrent;
}
else
{
properDotComb = 0;
break;
}
}
}
if (dotPlace != -1)
{
if (dotPlace != length - 2 || dotPlace == 0)
{
properDotComb = 0;
}
}
if (!properDotComb)
{
SIGERR(improperDotNotation);
}
if (dotPlace != -1 && properDotComb)
{
object tmp = copyObject(CAR(CDR(*dot)));
deleteObject(*dot);
*dot = tmp;
}
return result;
break;
case L')':
SIGERR(unmatchedParenError);
break;
case L'\'':
case L'`':
expression = Read("", stream);
if (TYPE(expression) == errorObject)
{
CPYERR(ERR(expression));
}
else if (TYPE(expression) == EOFObject)
{
SIGERR(unexpectedEOFError);
}
TYPE(result) = consObject;
CONS(result) = malloc(sizeof(cons));
TYPE(CAR(result)) = symbolObject;
SYM(CAR(result)) = malloc((strlen("навод") + 1) *
sizeof(char));
strcpy(SYM(CAR(result)), "навод");
TYPE(CDR(result)) = consObject;
CONS(CDR(result)) = malloc(sizeof(cons));
CAR(CDR(result)) = expression;
TYPE(CDR(CDR(result))) = nilObject;
break;
case L';':
TYPE(result) = unspecifiedObject;
while ((c = scanwc(stream)) != L'\n' && c != WEOF)
;
break;
case L'"':
buffer = getBuffer();
int i = 0;
while ((c = scanwc(stream)) != L'"' && c != WEOF)
{
if (i + 2 >= bufferSize)
{
increaseBuffer();
}
if (c == L'\\')
{
c = scanwc(stream);
if (c != L'\n')
{
buffer[i++] = escapedWChar(c);
}
}
else
{
buffer[i++] = c;
}
}
if (c == WEOF)
{
SIGERR(unexpectedEOFError);
}
buffer[i] = L'\0';
int n = wcstombs(NULL, buffer, 0) + 1;
char *s = malloc(n * sizeof(char));
wcstombs(s, buffer, n);
TYPE(result) = stringObject;
STR(result) = s;
break;
case L'#':
result = dispatchedChar(scanwc(stream), stream);
break;
}
return result;
}