cirilisp/read.c

#include <stdio.h>
#include <stdlib.h>
#include <wchar.h>
#include <wctype.h>
#include <ctype.h>
#include <string.h>
#include <regex.h>

#include "util.h"
#include "read.h"

int isSerbAlpha(wchar_t c);
int isConstituent(wchar_t c);
int isMacroC(wchar_t c);
int isEscape(wchar_t c);

wchar_t scanwc(FILE *stream);
#define unscanwc(c,stream) ungetwc(c,stream);
object getToken();
object macroFunction(wchar_t m);

object read(char *prompt)
{
	printf("%s", prompt);

	wint_t c;
	object result;

	while (iswspace(c = scanwc(stdin)))
		;
	if (isMacroC(c))
	{
		result = macroFunction(c);
	}
	else if (isEscape(c) || isConstituent(c))
	{
		unscanwc(c, stdin);
		result = getToken();
	}
	else
	{
		SIGERR(invalidCharacterError);
	}

	if (TYPE(result) == unspecifiedObject)
	{
		return read("");
/* уколико улаз функције није прави објекат (на пример уколико је учитан
 * коментар) покушавамо прочитати опет */
	}
	else
	{
		return result;
	}
}

int isSerbAlpha(wchar_t c)
{
	return ((c) == L'Ђ') || (((c) >= L'Ј') && ((c) <= L'Ћ')) ||
		(((c) >= L'Џ') && ((c) <= L'И')) || (((c) >= L'К') &&
		((c) <= L'Ш')) || (((c) >= L'а') && ((c) <= L'и')) ||
		(((c) >= L'к') && ((c) <= L'ш')) || ((c) == L'ђ') ||
		((c >= L'ј') && (c <= L'ћ')) || (c == L'џ');
}

int isConstituent(wchar_t c)
{
	return isSerbAlpha(c) || iswdigit(c) || ((c) == L'!') || ((c) == L'$')
		|| ((c) == L'&') || ((c) == L'*') || ((c) == L'+') ||
		(((c) >= L'-') && ((c) <= L'/')) || (((c) >= L'<') &&
		((c) <= L'@')) || ((c) == L'^') || ((c) == L'\\') ||
		((c) == L'_') || ((c) == L'~') || ((c) == L',');
}

int isMacroC(wchar_t c)
{
	return ((c) == L'"') || ((c) == L'#') || ((c) == L'\'') ||
		((c) == L'(') || ((c) == L')') || ((c) == L';') ||
		((c) == L'`');
}

int isEscape(wchar_t c)
{
	return (c) == L'|';
}

int bufferSize = 1024;
wchar_t *globalBuffer = NULL;

wchar_t *getBuffer()
{
	if (globalBuffer == NULL)
	{
		globalBuffer = malloc(bufferSize * sizeof(wchar_t));
	}

	return globalBuffer;
}

wchar_t *increaseBuffer()
{
	bufferSize += 1024;
	return realloc(globalBuffer, bufferSize);
}

wchar_t scanwc(FILE *stream)
{
	wint_t c;

	if ((c = fgetwc(stream)) == WEOF)
	{
		printf("\nКрај улазног стрима.\nВоЗдра и дођите нам опет!\n");
		exit(0);
	}
	else
	{
		return c;
	}
}

int lengthDigitArray(char *s)
{
	int i;
	for (i = 0; isdigit(s[i]); ++i)
		;
	return i;
}

int isFracNumToken(char *s)
{
	int digitNum1 = lengthDigitArray(s);

	if (digitNum1 == 0)
	{
		return 0;
	}
	else if (s[digitNum1] == '\0')
	{
		return 1;
	}
	else if (s[digitNum1] == '/')
	{
		int digitNum2 = lengthDigitArray(s + digitNum1 + 1);

		if (digitNum2 == 0)
		{
			return 0;
		}
		else if (s[digitNum1 + 1 + digitNum2] == '\0')
		{
			return 1;
		}
		else
		{
			return 0;
		}
	}
	else
	{
		return 0;
	}
}

int isRealNumToken(char *s)
{
	int digitNum1 = lengthDigitArray(s);

	if (digitNum1 == 0)
	{
		return 0;
	}
	else if (s[digitNum1] == '\0')
	{
		return 1;
	}
	else if (s[digitNum1] == ',')
	{
		int digitNum2 = lengthDigitArray(s + digitNum1 + 1);

		if (digitNum2 == 0)
		{
			return 0;
		}
		else if (s[digitNum1 + 1 + digitNum2] == '\0')
		{
			return 1;
		}
		else
		{
			return 0;
		}
	}
	else
	{
		return 0;
	}
}

object getToken()
{
	object result;
	wchar_t *buffer = getBuffer();
	wint_t c;
	int i = 0;
	buffer[0] = c = scanwc(stdin);
	if (isEscape(c))
	{
		while (!isEscape(c = scanwc(stdin)))
		{
			if (i + 2 >= bufferSize)
			{
				increaseBuffer();
			}
			buffer[++i] = c;
		}

		buffer[++i] = c;
		buffer[++i] = L'\0';
	}
	else
	{
		while (isConstituent(c = scanwc(stdin)))
		{
			if (i + 1 >= bufferSize)
			{
				increaseBuffer();
			}
			buffer[++i] = towlower(c);
		}
		unscanwc(c, stdin);
		buffer[++i] = L'\0';
	}

	int n = wcstombs(NULL, buffer, 0) + 1;
	char *s = malloc(n * sizeof(char));
	wcstombs(s, buffer, n);

	regex_t regNumberFrac, regNumberReal;
	regcomp(&regNumberFrac, "^[-+]?[[:digit:]]+(/[[:digit:]]+)?$",
						REG_EXTENDED);
	regcomp(&regNumberReal, "^[-+]?[[:digit:]]*,[[:digit:]]+$",
						REG_EXTENDED);
	const int nmatches = 1;
	regmatch_t a[nmatches];

	if (!regexec(&regNumberFrac, s, nmatches, a, 0))
	{
		TYPE(result) = numberObject;
		NUM_TYPE(result) = fractionNum;
		NUM_NUMER(result) = atoll(s);
		char *tmp;
		NUM_DENOM(result) = (tmp = strchr(s, '/')) == NULL ?
			1 : atoll(tmp + 1);
		result = shortenFractionNum(result);
	}
	else if (!regexec(&regNumberReal, s, nmatches, a, 0))
	{
		TYPE(result) = numberObject;
		NUM_TYPE(result) = realNum;
		NUM_REAL(result) = strtold(s, NULL);
	}
	else
	{
		TYPE(result) = symbolObject;
		SYM(result) = malloc((strlen(s) + 1) * sizeof(char));
		strcpy(SYM(result), s);
	}
	regfree(&regNumberFrac);
	regfree(&regNumberReal);

	return result;
}

wchar_t escapedWChar(wchar_t c)
{
	switch (c)
	{
	case L'n':
		return L'\n';
		break;
	case L't':
		return L'\t';
		break;
	case L'\\':
		return L'\\';
		break;
	case L'"':
		return L'"';
		break;
	default:
		return c;
		break;
	}
}

object dispatchedChar(wchar_t c)
{
	object result;

	switch (c)
	{
	case L'\\':
		TYPE(result) = charObject;

		wchar_t *buffer = getBuffer();
		int i = 0, n;

		c = scanwc(stdin);
		if (!isConstituent(c))
		{
			CHR(result) = c;
		}
		else
		{
			unscanwc(c, stdin);
			while (!iswspace(c = scanwc(stdin)))
			{
				if (i + 1 >= bufferSize)
				{
					increaseBuffer();
				}
				buffer[i++] = c;
			}
			buffer[i] = L'\0';
			n = wcslen(buffer);
			if (n == 1)
			{
				CHR(result) = buffer[0];
			}
			else if (!wcscmp(buffer, L"размак"))
			{
				CHR(result) = L' ';
			}
			else if (!wcscmp(buffer, L"новиред"))
			{
				CHR(result) = L'\n';
			}
			else if (!wcscmp(buffer, L"табулар"))
			{
				CHR(result) = L'\t';
			}
			else
			{
				SIGERR(invalidHashSequenceError);
			}
		}
		break;
	case L'И':
	case L'и':
		TYPE(result) = boolObject;
		BOOL(result) = 1;
		break;
	case L'Л':
	case L'л':
		TYPE(result) = boolObject;
		BOOL(result) = 0;
		break;
	case L'|':
		for (;;)
		{
			if ((c = scanwc(stdin)) == L'|' &&
					(c = scanwc(stdin)) == L'#')
			{
				break;
			}
		}
		TYPE(result) = unspecifiedObject;
		break;
	default:
		SIGERR(invalidHashSequenceError);
		break;
	}

	return result;
}

object macroFunction(wchar_t m)
{
	object result;
	object *listCurrent;
	object expression;
	wchar_t *buffer;

	switch (m)
	{
	case L'(':
		listCurrent = &result;
		for (;;)
		{
			object currentObject = read("");
			if (TYPE(currentObject) == errorObject &&
				ERR(currentObject) == unmatchedParenError)
			{
				TYPE(*listCurrent) = nilObject;
				break;
			}
			else
			{
				TYPE(*listCurrent) = consObject;
				CONS(*listCurrent) = malloc(sizeof(cons));
				CAR(*listCurrent) = copyObject(currentObject);

				listCurrent = &CDR(*listCurrent);
			}
			deleteObject(currentObject);
		}

		int noErrors = 1;
		listCurrent = &result;

		while (TYPE(*listCurrent) != nilObject)
		{
			if (TYPE(CAR(*listCurrent)) == errorObject)
			{
				noErrors = 0;
				break;
			}
			listCurrent = &CDR(*listCurrent);
		}
		if (!noErrors)
		{
			object error = copyObject(CAR(*listCurrent));
			deleteObject(result);
			SIGERR(ERR(error));
		}

		int properDotComb = 1, dotPlace = -1, length;
		object *dot;
		for (length = 0, listCurrent = &result; TYPE(*listCurrent) !=
			nilObject; ++length, listCurrent = &CDR(*listCurrent))
		{
			if (TYPE(CAR(*listCurrent)) == symbolObject &&
				!strcmp(SYM(CAR(*listCurrent)), "."))
			{
				if (dotPlace == -1)
				{
					dotPlace = length;
					dot = listCurrent;
				}
				else
				{
					properDotComb = 0;
					break;
				}
			}
		}
		if (dotPlace != -1)
		{
			if (dotPlace != length - 2 || dotPlace == 0)
			{
				properDotComb = 0;
			}
		}
		if (!properDotComb)
		{
			SIGERR(improperDotNotation);
		}
		if (dotPlace != -1 && properDotComb)
		{
			object tmp = copyObject(CAR(CDR(*dot)));
			deleteObject(*dot);
			*dot = tmp;
		}

		return result;
		break;
	case L')':
		SIGERR(unmatchedParenError);
		break;
	case L'\'':
	case L'`':
		expression = read("");
		if (TYPE(expression) == errorObject)
		{
			return expression;
		}

		TYPE(result) = consObject;
		CONS(result) = malloc(sizeof(cons));
		TYPE(CAR(result)) = symbolObject;
		SYM(CAR(result)) = malloc((strlen("навод") + 1) *
			sizeof(char));
		strcpy(SYM(CAR(result)), "навод");

		TYPE(CDR(result)) = consObject;
		CONS(CDR(result)) = malloc(sizeof(cons));
		CAR(CDR(result)) = expression;

		TYPE(CDR(CDR(result))) = nilObject;
		break;
	case L';':
		TYPE(result) = unspecifiedObject;
		while (scanwc(stdin) != L'\n')
			;
		break;
	case L'"':
		buffer = getBuffer();
		wchar_t c;
		int i = 0;

		while ((c = scanwc(stdin)) != L'"')
		{
			if (i + 2 >= bufferSize)
			{
				increaseBuffer();
			}
			if (c == L'\\')
			{
				c = scanwc(stdin);
				buffer[i++] = escapedWChar(c);
			}
			else
			{
				buffer[i++] = c;
			}
		}
		buffer[i] = L'\0';

		int n = wcstombs(NULL, buffer, 0) + 1;
		char *s = malloc(n * sizeof(char));
		wcstombs(s, buffer, n);
		TYPE(result) = stringObject;
		STR(result) = s;
		break;
	case L'#':
		result = dispatchedChar(scanwc(stdin));
		break;
	}

	return result;
}