commit 1ef9e255d0e6c2d83e0c01d5433713f5d77cd563 Author: Petar Kapriš Date: Tue Jul 16 18:05:56 2024 +0200 Initial commit: completed simple lexer for pj1 diff --git a/pj1-go/go.mod b/pj1-go/go.mod new file mode 100644 index 0000000..52a4ad2 --- /dev/null +++ b/pj1-go/go.mod @@ -0,0 +1,10 @@ +module git.bonsai.cool/kayprish/pj1/pj1-go + +go 1.18 + +require golang.org/x/tools v0.1.13-0.20220917004541-4d18923f060e + +require ( + golang.org/x/mod v0.12.0 // indirect + golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f // indirect +) diff --git a/pj1-go/go.sum b/pj1-go/go.sum new file mode 100644 index 0000000..2069d68 --- /dev/null +++ b/pj1-go/go.sum @@ -0,0 +1,6 @@ +golang.org/x/mod v0.12.0 h1:rmsUpXtvNzj340zd98LZ4KntptpfRHwpFOHG188oHXc= +golang.org/x/mod v0.12.0/go.mod h1:iBbtSCu2XBx23ZKBPSOrRkjjQPZFPuis4dIYUhu/chs= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f h1:v4INt8xihDGvnrfjMDVXGxw9wrfxYyCjk0KbXjhR55s= +golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/tools v0.1.13-0.20220917004541-4d18923f060e h1:K/LreqAwv7hZaSPyj5LvaiQd2wROouJDabf2r+oBqUw= +golang.org/x/tools v0.1.13-0.20220917004541-4d18923f060e/go.mod h1:VsjNM1dMo+Ofkp5d7y7fOdQZD8MTXSQ4w3EPk65AvKU= diff --git a/pj1-go/lexer.go b/pj1-go/lexer.go new file mode 100644 index 0000000..89ba5b7 --- /dev/null +++ b/pj1-go/lexer.go @@ -0,0 +1,360 @@ +package main + +import ( + "fmt" + "math/big" + "strconv" + "strings" + "unicode/utf8" +) + +type TokenType int + +const ( + // Single-character tokens. + LEFT_PAREN TokenType = iota + RIGHT_PAREN + LEFT_BRACE + RIGHT_BRACE + COMMA + DOT + MINUS + PLUS + SEMICOLON + STAR + + // One or two character tokens. + BANG + BANG_EQUAL + EQUAL + EQUAL_EQUAL + GREATER + GREATER_EQUAL + LESS + LESS_EQUAL + SLASH + SLASH_DOT + SLASH_UNDERSCORE + + // Literals + IDENTIFIER + STRING + INTEGER + FLOAT + + // Keywords. + AND + CLASS + ELSE + FALSE + FOR + FUN + IF + NIL + OR + PRINT + RETURN + SUPER + THIS + TRUE + VAR + WHILE + + EOF +) + +//go:generate go run golang.org/x/tools/cmd/stringer -type=TokenType + +var keywords = map[string]TokenType{ + "and": AND, + "class": CLASS, + "else": ELSE, + "false": FALSE, + "for": FOR, + "fun": FUN, + "if": IF, + "nil": NIL, + "or": OR, + "print": PRINT, + "return": RETURN, + "super": SUPER, + "this": THIS, + "true": TRUE, + "var": VAR, + "while": WHILE, +} + +type Token struct { + ttype TokenType + lexeme string + literal interface{} + line int +} + +func NewToken(ttype TokenType, lexeme string, literal interface{}, line int) Token { + t := Token{ttype, lexeme, literal, line} + return t +} + +func (t Token) String() string { + return fmt.Sprintf("%v %v %v", t.ttype, t.lexeme, t.literal) +} + +type Lexer struct { + source string + tokens []Token + + startByte int + currentByte int + startRune int + currentRune int + line int +} + +func NewLexer(source string) Lexer { + l := Lexer{source, []Token{}, 0, 0, 0, 0, 1} + return l +} + +func (l *Lexer) ScanTokens() { + for !l.atEnd() { + l.startByte = l.currentByte + l.startRune = l.currentRune + l.scanToken() + } + + l.tokens = append(l.tokens, NewToken(EOF, "", nil, l.line)) +} + +func (l Lexer) atEnd() bool { + return l.currentByte >= len(l.source) +} + +func (l *Lexer) scanToken() { + c := l.advance() + switch c { + case '(': + l.addSimpleToken(LEFT_PAREN) + case ')': + l.addSimpleToken(RIGHT_PAREN) + case '{': + l.addSimpleToken(LEFT_BRACE) + case '}': + l.addSimpleToken(RIGHT_BRACE) + case ',': + l.addSimpleToken(COMMA) + case '.': + l.addSimpleToken(DOT) + case '-': + l.addSimpleToken(MINUS) + case '+': + l.addSimpleToken(PLUS) + case ';': + l.addSimpleToken(SEMICOLON) + case '*': + l.addSimpleToken(STAR) + case '!': + if l.match('=') { + l.addSimpleToken(BANG_EQUAL) + } else { + l.addSimpleToken(BANG) + } + case '=': + if l.match('=') { + l.addSimpleToken(EQUAL_EQUAL) + } else { + l.addSimpleToken(EQUAL) + } + case '<': + if l.match('=') { + l.addSimpleToken(LESS_EQUAL) + } else { + l.addSimpleToken(LESS) + } + case '>': + if l.match('=') { + l.addSimpleToken(GREATER_EQUAL) + } else { + l.addSimpleToken(GREATER) + } + case '/': + if l.match('/') { + // A comment goes until the end of the line + for l.peek() != '\n' && !l.atEnd() { + l.advance() + } + } else if l.match('.') { + l.addSimpleToken(SLASH_DOT) + } else if l.match('_') { + l.addSimpleToken(SLASH_UNDERSCORE) + } else { + l.addSimpleToken(SLASH) + } + case ' ', '\r', '\t': + case '\n': + l.line++ + case '"': + l.str() + + default: + if isDigit(c) { + l.number() + } else if isAlpha(c) { + l.identifier() + } else { + // TODO: if there are multiple bad characters + // coalesce similar errors into one + error(l.line, fmt.Sprintf("Unexpected character, %v.", c)) + } + } +} + +func (l *Lexer) identifier() { + for isAlphaNumeric(l.peek()) { + l.advance() + } + + text := l.source[l.startByte:l.currentByte] + ttype, ok := keywords[text] + + if !ok { + ttype = IDENTIFIER + } + + l.addSimpleToken(ttype) +} + +func (l *Lexer) number() { + isInt := true + for isDigit(l.peek()) { + l.advance() + } + + if l.peek() == '.' && isDigit(l.peekNext()) { + l.advance() + isInt = false + + for isDigit(l.peek()) { + l.advance() + } + } + + // Only allow integer and float literals, other values can be + // made by combining these + if isInt { + var bigint big.Int + bigint.SetString(l.source[l.startByte:l.currentByte], 10) + l.addToken(INTEGER, bigint) + } else { + float, _ := strconv.ParseFloat(l.source[l.startByte:l.currentByte], 64) + l.addToken(FLOAT, float) + } +} + +func (l *Lexer) match(expected rune) bool { + if l.atEnd() { + return false + } + ch, width := utf8.DecodeRuneInString(l.source[l.currentByte:]) + if ch == expected { + l.currentRune++ + l.currentByte += width + return true + } else { + return false + } +} + +func (l Lexer) peek() rune { + if l.atEnd() { + return '\x00' + } + ch, _ := utf8.DecodeRuneInString(l.source[l.currentByte:]) + return ch +} + +func (l Lexer) peekNext() rune { + _, width1 := utf8.DecodeRuneInString(l.source[l.currentByte:]) + // "width1 == 0" signifies we reached the end of the string with the first character + if width1 == 0 || l.currentByte+width1 >= len(l.source) { + return '\x00' + } + ch2, _ := utf8.DecodeRuneInString(l.source[l.currentByte+width1:]) + return ch2 +} + +func isAlpha(c rune) bool { + return (c >= 'a' && c <= 'z') || + (c >= 'A' && c <= 'Z') || + c == '_' +} + +func isAlphaNumeric(c rune) bool { + return isAlpha(c) || isDigit(c) +} + +func isDigit(c rune) bool { + return c >= '0' && c <= '9' +} + +func (l *Lexer) advance() rune { + ch, width := utf8.DecodeRuneInString(l.source[l.currentByte:]) + l.currentRune++ + l.currentByte += width + return ch +} + +func (l *Lexer) str() { + var sb strings.Builder + for l.peek() != '"' && !l.atEnd() { + c := l.peek() + if c == '\n' { + l.line++ + } else if c == '\\' { + l.advance() + // TODO: add more escape sequences, including \xNN + switch l.peek() { + case 'n': + c = '\n' + case 'r': + c = '\r' + case 't': + c = '\t' + case '\\': + c = '\\' + case '"': + c = '"' + case '\'': + c = '\'' + case 'e': + c = '\x1b' + default: + error(l.line, fmt.Sprintf("Invalid escape sequence \\%v.", l.peek())) + return + } + } + sb.WriteRune(c) + l.advance() + } + + if l.atEnd() { + error(l.line, "Unterminated string.") + return + } + + // Closing ". + l.advance() + + value := sb.String() + l.addToken(STRING, value) +} + +// Simple refers to "having no literal" (TODO: rename function) +func (l *Lexer) addSimpleToken(ttype TokenType) { + l.addToken(ttype, nil) +} + +func (l *Lexer) addToken(ttype TokenType, literal interface{}) { + text := l.source[l.startByte:l.currentByte] + l.tokens = append(l.tokens, NewToken(ttype, text, literal, l.line)) +} diff --git a/pj1-go/pj1.go b/pj1-go/pj1.go new file mode 100644 index 0000000..694f167 --- /dev/null +++ b/pj1-go/pj1.go @@ -0,0 +1,74 @@ +package main + +import ( + "bufio" + "fmt" + "io/ioutil" + "os" +) + +var ( + hadError bool = false +) + +func main() { + if len(os.Args) > 2 { + fmt.Println("Usage: pj1-go [script]") + os.Exit(64) + } else if len(os.Args) == 2 { + runFile(os.Args[0]) + } else { + runPrompt() + } +} + +func runFile(path string) { + bytes, err := ioutil.ReadFile(path) + if err != nil { + fmt.Println(err) + return + } + run(string(bytes[:])) + + if hadError { + os.Exit(65) + } +} + +func runPrompt() { + scanner := bufio.NewScanner(os.Stdin) + for { + fmt.Print("> ") + if !scanner.Scan() { + break + } + line := scanner.Text() + fmt.Println(line) + run(line) + + hadError = false + } + if err := scanner.Err(); err != nil { + fmt.Fprintln(os.Stderr, "reading standard input:", err) + } +} + +func run(source string) { + lexer := NewLexer(source) + lexer.ScanTokens() + var tokens []Token = lexer.tokens + + for _, token := range tokens { + fmt.Println(token) + } +} + +// TODO: might have to rename +func error(line int, msg string) { + report(line, "", msg) + hadError = true +} + +func report(line int, where string, msg string) { + fmt.Fprintln(os.Stderr, "[line "+fmt.Sprint(line)+"] Error"+where+": "+msg) +} diff --git a/pj1-go/tokentype_string.go b/pj1-go/tokentype_string.go new file mode 100644 index 0000000..13258ef --- /dev/null +++ b/pj1-go/tokentype_string.go @@ -0,0 +1,64 @@ +// Code generated by "stringer -type=TokenType"; DO NOT EDIT. + +package main + +import "strconv" + +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[LEFT_PAREN-0] + _ = x[RIGHT_PAREN-1] + _ = x[LEFT_BRACE-2] + _ = x[RIGHT_BRACE-3] + _ = x[COMMA-4] + _ = x[DOT-5] + _ = x[MINUS-6] + _ = x[PLUS-7] + _ = x[SEMICOLON-8] + _ = x[STAR-9] + _ = x[BANG-10] + _ = x[BANG_EQUAL-11] + _ = x[EQUAL-12] + _ = x[EQUAL_EQUAL-13] + _ = x[GREATER-14] + _ = x[GREATER_EQUAL-15] + _ = x[LESS-16] + _ = x[LESS_EQUAL-17] + _ = x[SLASH-18] + _ = x[SLASH_DOT-19] + _ = x[SLASH_UNDERSCORE-20] + _ = x[IDENTIFIER-21] + _ = x[STRING-22] + _ = x[INTEGER-23] + _ = x[FLOAT-24] + _ = x[AND-25] + _ = x[CLASS-26] + _ = x[ELSE-27] + _ = x[FALSE-28] + _ = x[FOR-29] + _ = x[FUN-30] + _ = x[IF-31] + _ = x[NIL-32] + _ = x[OR-33] + _ = x[PRINT-34] + _ = x[RETURN-35] + _ = x[SUPER-36] + _ = x[THIS-37] + _ = x[TRUE-38] + _ = x[VAR-39] + _ = x[WHILE-40] + _ = x[EOF-41] +} + +const _TokenType_name = "LEFT_PARENRIGHT_PARENLEFT_BRACERIGHT_BRACECOMMADOTMINUSPLUSSEMICOLONSTARBANGBANG_EQUALEQUALEQUAL_EQUALGREATERGREATER_EQUALLESSLESS_EQUALSLASHSLASH_DOTSLASH_UNDERSCOREIDENTIFIERSTRINGINTEGERFLOATANDCLASSELSEFALSEFORFUNIFNILORPRINTRETURNSUPERTHISTRUEVARWHILEEOF" + +var _TokenType_index = [...]uint16{0, 10, 21, 31, 42, 47, 50, 55, 59, 68, 72, 76, 86, 91, 102, 109, 122, 126, 136, 141, 150, 166, 176, 182, 189, 194, 197, 202, 206, 211, 214, 217, 219, 222, 224, 229, 235, 240, 244, 248, 251, 256, 259} + +func (i TokenType) String() string { + if i < 0 || i >= TokenType(len(_TokenType_index)-1) { + return "TokenType(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _TokenType_name[_TokenType_index[i]:_TokenType_index[i+1]] +} diff --git a/pj1-go/tools.go b/pj1-go/tools.go new file mode 100644 index 0000000..3d7436f --- /dev/null +++ b/pj1-go/tools.go @@ -0,0 +1,8 @@ +//go:build tools +// +build tools + +package tools + +import ( + _ "golang.org/x/tools/cmd/stringer" +)