clone url: git://git.m455.casa/fa
esperbuild/espersrc/fennel-0.7.0/src/fennel/parser.fnl
1 | ;; This module is responsible for turning bytes of source code into an AST |
2 | ;; data structure. |
3 |
|
4 | (local utils (require :fennel.utils)) |
5 | (local friend (require :fennel.friend)) |
6 | (local unpack (or _G.unpack table.unpack)) |
7 |
|
8 | (fn granulate [getchunk] |
9 | "Convert a stream of chunks to a stream of bytes. |
10 | Also returns a second function to clear the buffer in the byte stream" |
11 | (var (c index done?) (values "" 1 false)) |
12 | (values (fn [parser-state] |
13 | (when (not done?) |
14 | (if (<= index (# c)) |
15 | (let [b (c:byte index)] |
16 | (set index (+ index 1)) |
17 | b) |
18 | (match (getchunk parser-state) |
19 | (char ? (not= char "")) (do (set c char) |
20 | (set index 2) |
21 | (c:byte)) |
22 | _ (set done? true))))) |
23 | (fn [] (set c "")))) |
24 |
|
25 | (fn string-stream [str] |
26 | "Convert a string into a stream of bytes." |
27 | (let [str (str:gsub "^#![^\n]*\n" "")] ; remove shebang |
28 | (var index 1) |
29 | (fn [] |
30 | (let [r (str:byte index)] |
31 | (set index (+ index 1)) |
32 | r)))) |
33 |
|
34 | ;; Table of delimiter bytes - (, ), [, ], {, } |
35 | ;; Opener keys have closer as the value; closers keys have true as their value. |
36 | (local delims {40 41 41 true |
37 | 91 93 93 true |
38 | 123 125 125 true}) |
39 |
|
40 | (fn whitespace? [b] |
41 | (or (= b 32) (and (>= b 9) (<= b 13)))) |
42 |
|
43 | (fn symbolchar? [b] |
44 | (and (> b 32) |
45 | (not (. delims b)) |
46 | (not= b 127) ; backspace |
47 | (not= b 34) ; backslash |
48 | (not= b 39) ; single quote |
49 | (not= b 126) ; tilde |
50 | (not= b 59) ; semicolon |
51 | (not= b 44) ; comma |
52 | (not= b 64) ; at |
53 | (not= b 96))) ; backtick |
54 |
|
55 | ;; prefix chars substituted while reading |
56 | (local prefixes {35 "hashfn" ; # |
57 | 39 "quote" ; ' |
58 | 44 "unquote" ; , |
59 | 96 "quote"}); ` |
60 |
|
61 | (fn parser [getbyte filename options] |
62 | "Parse one value given a function that returns sequential bytes. |
63 | Will throw an error as soon as possible without getting more bytes on bad input. |
64 | Returns if a value was read, and then the value read. Will return nil when input |
65 | stream is finished." |
66 | (var stack []) ; stack of unfinished values |
67 | ;; Provide one character buffer and keep track of current line and byte index |
68 | (var line 1) |
69 | (var byteindex 0) |
70 | (var lastb nil) |
71 |
|
72 | (fn ungetb [ub] |
73 | (when (= ub 10) |
74 | (set line (- line 1))) |
75 | (set byteindex (- byteindex 1)) |
76 | (set lastb ub)) |
77 |
|
78 | (fn getb [] |
79 | (var r nil) |
80 | (if lastb |
81 | (set (r lastb) (values lastb nil)) |
82 | (set r (getbyte {:stack-size (# stack)}))) |
83 | (set byteindex (+ byteindex 1)) |
84 | (when (= r 10) |
85 | (set line (+ line 1))) |
86 | r) |
87 |
|
88 | ;; If you add new calls to this function, please update fennel.friend as well |
89 | ;; to add suggestions for how to fix the new error! |
90 | (fn parse-error [msg byteindex-override] |
91 | (let [{: source : unfriendly} (or options utils.root.options {})] |
92 | (utils.root.reset) |
93 | (if unfriendly |
94 | (error (string.format "Parse error in %s:%s: %s" (or filename :unknown) |
95 | (or line "?") msg) 0) |
96 | (friend.parse-error msg (or filename "unknown") (or line "?") |
97 | (or byteindex-override byteindex) source)))) |
98 |
|
99 | (fn parse-stream [] |
100 | (var (whitespace-since-dispatch done? retval) true) |
101 | (fn dispatch [v] |
102 | "Dispatch when we complete a value" |
103 | (match (. stack (# stack)) |
104 | nil (set (retval done? whitespace-since-dispatch) (values v true false)) |
105 | {: prefix} (do (table.remove stack) |
106 | (dispatch (utils.list (utils.sym prefix) v))) |
107 | top (do (set whitespace-since-dispatch false) |
108 | (table.insert top v)))) |
109 |
|
110 | (fn badend [] |
111 | "Throw nice error when we expect more characters but reach end of stream." |
112 | (let [accum (utils.map stack "closer")] |
113 | (parse-error (string.format "expected closing delimiter%s %s" |
114 | (if (= (# stack) 1) "" "s") |
115 | (string.char (unpack accum)))))) |
116 |
|
117 | (fn skip-whitespace [b] |
118 | (if (and b (whitespace? b)) |
119 | (do (set whitespace-since-dispatch true) |
120 | (skip-whitespace (getb))) |
121 | (and (not b) (> (# stack) 0)) |
122 | (badend) |
123 | b)) |
124 |
|
125 | (fn skip-comment [b] |
126 | (if (and b (not= 10 b)) |
127 | (skip-comment (getb)) |
128 | b)) |
129 |
|
130 | (fn open-table [b] |
131 | (when (not whitespace-since-dispatch) |
132 | (parse-error (.. "expected whitespace before opening delimiter " |
133 | (string.char b)))) |
134 | (table.insert stack {:bytestart byteindex :closer (. delims b) |
135 | :filename filename :line line})) |
136 |
|
137 | (fn close-list [list] |
138 | (dispatch (setmetatable list (getmetatable (utils.list))))) |
139 |
|
140 | (fn close-sequence [tbl] |
141 | (let [val (utils.sequence (unpack tbl))] |
142 | ;; for table literals we can store file/line/offset source |
143 | ;; data in fields on the table itself, because the AST node |
144 | ;; *is* the table, and the fields would show up in the |
145 | ;; compiled output. keep them on the metatable instead. |
146 | (each [k v (pairs tbl)] |
147 | (tset (getmetatable val) k v)) |
148 | (dispatch val))) |
149 |
|
150 | (fn close-curly-table [tbl] |
151 | (let [val []] ; a {} table |
152 | (when (not= (% (# tbl) 2) 0) |
153 | (set byteindex (- byteindex 1)) |
154 | (parse-error "expected even number of values in table literal")) |
155 | (setmetatable val tbl) ; see note above about source data |
156 | (for [i 1 (# tbl) 2] |
157 | (when (and (= (tostring (. tbl i)) ":") |
158 | (utils.sym? (. tbl (+ i 1))) |
159 | (utils.sym? (. tbl i))) |
160 | (tset tbl i (tostring (. tbl (+ i 1))))) |
161 | (tset val (. tbl i) (. tbl (+ i 1)))) |
162 | (dispatch val))) |
163 |
|
164 | (fn close-table [b] |
165 | (let [top (table.remove stack)] |
166 | (when (= top nil) |
167 | (parse-error (.. "unexpected closing delimiter " (string.char b)))) |
168 | (when (not= top.closer b) |
169 | (parse-error (.. "mismatched closing delimiter " (string.char b) |
170 | ", expected " (string.char top.closer)))) |
171 | (set top.byteend byteindex) ; set closing byte index |
172 | (if (= b 41) (close-list top) |
173 | (= b 93) (close-sequence top) |
174 | (close-curly-table top)))) |
175 |
|
176 | (fn parse-string-loop [chars b state] |
177 | (table.insert chars b) |
178 | (let [state (match [state b] |
179 | [:base 92] :backslash |
180 | [:base 34] :done |
181 | _ :base)] |
182 | (if (and b (not= state :done)) |
183 | (parse-string-loop chars (getb) state) |
184 | b))) |
185 |
|
186 | (fn parse-string [] |
187 | (table.insert stack {:closer 34}) |
188 | (let [chars [34]] |
189 | (when (not (parse-string-loop chars (getb) :base)) |
190 | (badend)) |
191 | (table.remove stack) |
192 | (let [raw (string.char (unpack chars)) |
193 | formatted (raw:gsub "[\1-\31]" (fn [c] (.. "\\" (c:byte)))) |
194 | load-fn ((or _G.loadstring load) (.. "return " formatted))] |
195 | (dispatch (load-fn))))) |
196 |
|
197 | (fn parse-prefix [b] |
198 | "expand prefix byte into wrapping form eg. '`a' into '(quote a)'" |
199 | (table.insert stack {:prefix (. prefixes b)}) |
200 | (let [nextb (getb)] |
201 | (when (whitespace? nextb) |
202 | (when (not= b 35) |
203 | (parse-error "invalid whitespace after quoting prefix")) |
204 | (table.remove stack) |
205 | (dispatch (utils.sym "#"))) |
206 | (ungetb nextb))) |
207 |
|
208 | (fn parse-sym-loop [chars b] |
209 | (if (and b (symbolchar? b)) |
210 | (do (table.insert chars b) |
211 | (parse-sym-loop chars (getb))) |
212 | (do (when b (ungetb b)) |
213 | chars))) |
214 |
|
215 | (fn parse-number [rawstr] |
216 | (let [force-number (rawstr:match "^%d") |
217 | number-with-stripped-underscores (rawstr:gsub "_" "")] |
218 | (if force-number |
219 | (do (dispatch (or (tonumber number-with-stripped-underscores) |
220 | (parse-error (.. "could not read number \"" |
221 | rawstr "\"")))) |
222 | true) |
223 | (match (tonumber number-with-stripped-underscores) |
224 | x (do (dispatch x) true) |
225 | _ false)))) |
226 |
|
227 | (fn check-malformed-sym [rawstr] |
228 | ;; for backwards-compatibility, special-case allowance of ~= but |
229 | ;; all other uses of ~ are disallowed |
230 | (if (and (rawstr:match "^~") (not= rawstr "~=")) |
231 | (parse-error "illegal character: ~") |
232 | (rawstr:match "%.[0-9]") |
233 | (parse-error (.. "can't start multisym segment " |
234 | "with a digit: " rawstr) |
235 | (+ (+ (- byteindex (# rawstr)) |
236 | (rawstr:find "%.[0-9]")) 1)) |
237 | (and (rawstr:match "[%.:][%.:]") |
238 | (not= rawstr "..") (not= rawstr "$...")) |
239 | (parse-error (.. "malformed multisym: " rawstr) |
240 | (+ (- byteindex (# rawstr)) 1 |
241 | (rawstr:find "[%.:][%.:]"))) |
242 | (rawstr:match ":.+[%.:]") |
243 | (parse-error (.. "method must be last component " |
244 | "of multisym: " rawstr) |
245 | (+ (- byteindex (# rawstr)) |
246 | (rawstr:find ":.+[%.:]"))))) |
247 |
|
248 | (fn parse-sym [b] ; not just syms actually... |
249 | (let [bytestart byteindex |
250 | rawstr (string.char (unpack (parse-sym-loop [b] (getb))))] |
251 | (if (= rawstr "true") |
252 | (dispatch true) |
253 | (= rawstr "false") |
254 | (dispatch false) |
255 | (= rawstr "...") |
256 | (dispatch (utils.varg)) |
257 | (rawstr:match "^:.+$") |
258 | (dispatch (rawstr:sub 2)) |
259 | (parse-number rawstr) nil |
260 | (check-malformed-sym rawstr) nil |
261 | (dispatch (utils.sym rawstr nil {:byteend byteindex |
262 | :bytestart bytestart |
263 | :filename filename |
264 | :line line}))))) |
265 |
|
266 | (fn parse-loop [b] |
267 | (if (not b) nil |
268 | (= b 59) (skip-comment (getb)) |
269 | (= (type (. delims b)) :number) (open-table b) |
270 | (. delims b) (close-table b) |
271 | (= b 34) (parse-string b) |
272 | (. prefixes b) (parse-prefix b) |
273 | (or (symbolchar? b) (= b (string.byte "~"))) (parse-sym b) |
274 | (parse-error (.. "illegal character: " (string.char b)))) |
275 |
|
276 | (if (not b) nil ; EOF |
277 | done? (values true retval) |
278 | (parse-loop (skip-whitespace (getb))))) |
279 |
|
280 | (parse-loop (skip-whitespace (getb)))) |
281 | (values parse-stream (fn [] (set stack [])))) |
282 |
|
283 | {: granulate : parser : string-stream} |