clone url: git://git.m455.casa/m455.casa
src/parser.scm
1 | (module parser (clean |
2 | parse-lines) |
3 |
|
4 | (import scheme |
5 | utf8 |
6 | (chicken base) |
7 | (chicken irregex) |
8 | (chicken string) |
9 | static) |
10 |
|
11 | (define (clean str) |
12 | (string-translate* str '(("&" . "&") |
13 | ("<" . "<") |
14 | (">" . ">") |
15 | ("\"" . """) |
16 | ("\'" . "'")))) |
17 |
|
18 | ;; these formatters can't be nested or split into separate lines because of my |
19 | ;; shitty code LOL |
20 | (define (parse-characters line state) |
21 | (let* ((characters (if (string? line) |
22 | (string->list line) |
23 | line)) |
24 | (parse-rest-of-characters (lambda (state) |
25 | (parse-characters (cdr characters) state)))) |
26 | (if (null? characters) |
27 | "" |
28 | (let ((character (string (car characters)))) |
29 | (cond |
30 | ;; check for line-based states, except for codeblocks, so characters |
31 | ;; are preserves |
32 | ((or (equal? 'normal state) |
33 | (equal? 'paragraph state) |
34 | (equal? 'unordered-list state) |
35 | (equal? 'ordered-list state) |
36 | (equal? 'blockquote state)) |
37 | ;; try to catch inline formatters before the general character |
38 | ;; appending in the else statement below |
39 | (cond |
40 | ;; start of bold |
41 | ((equal? "*" character) |
42 | (string-append "<strong>" (parse-rest-of-characters 'bold))) |
43 | ;; start of inline code |
44 | ((equal? "`" character) |
45 | (string-append "<code>" (parse-rest-of-characters 'inline-code))) |
46 | ;; start of a link url |
47 | ((equal? "[" character) |
48 | (string-append "<a href=\"" (parse-rest-of-characters 'link))) |
49 | ;; if we're in any of the states above, and it's not one of the |
50 | ;; three characters above, then just append the character, and |
51 | ;; keep parsing |
52 | (else (string-append character (parse-rest-of-characters state))))) |
53 | ;; end of bold |
54 | ((and (equal? 'bold state) |
55 | (equal? "*" character)) |
56 | (string-append "</strong>" (parse-rest-of-characters 'normal))) |
57 | ;; end of inline code |
58 | ((and (equal? 'inline-code state) |
59 | (equal? "`" character)) |
60 | (string-append "</code>" (parse-rest-of-characters 'normal))) |
61 | ;; start of link url |
62 | ((equal? 'link state) |
63 | (cond |
64 | ;; middle of link url |
65 | ((not (equal? "|" character)) |
66 | (string-append character (parse-rest-of-characters 'link))) |
67 | ;; end of link url |
68 | ((equal? "|" character) |
69 | (string-append "\">" (parse-rest-of-characters 'link-title))))) |
70 | ;; start of link title |
71 | ((equal? 'link-title state) |
72 | (cond |
73 | ;; middle of link title |
74 | ((not (equal? "]" character)) |
75 | (string-append character (parse-rest-of-characters 'link-title))) |
76 | ;; end of link title |
77 | ((equal? "]" character) |
78 | (string-append "</a>" (parse-rest-of-characters 'normal))))) |
79 | ;; append a character if it doesn't meet any of the conditions above, |
80 | ;; so we actually have all of the characters on the line that we want. |
81 | (else (string-append character (parse-rest-of-characters state)))))))) |
82 |
|
83 | (define (parse-lines lines state) |
84 | (if (null? lines) |
85 | ;; if we reach the end of the file while still in a non-normal state state, then |
86 | ;; return the state's associated closing html element |
87 | (cond |
88 | ((equal? 'paragraph state) "</p>") |
89 | ((equal? 'unordered-list state) "</li>\n</ul>") |
90 | ((equal? 'ordered-list state) "</li>\n</ol>") |
91 | ((equal? 'blockquote state) "</blockquote>") |
92 | ((equal? 'codeblock state) "</pre>") |
93 | (else "")) |
94 | (let* ((line (car lines)) |
95 | (parse-rest-of-lines (lambda (state) |
96 | (parse-lines (cdr lines) state)))) |
97 | (cond |
98 | ((equal? 'normal state) |
99 | (cond |
100 | ;; horizontal rule |
101 | ((irregex-match? "^---[[:space:]]*$" line) |
102 | (string-append "<hr>\n" (parse-rest-of-lines 'normal))) |
103 | ;; title |
104 | ((irregex-match? "^title: .*$" line) |
105 | (string-append "<h1>" |
106 | (clean |
107 | (substring line (string-length TITLE-PREFIX))) |
108 | "</h1>\n\n" |
109 | (parse-rest-of-lines 'normal))) |
110 | ;; heading |
111 | ((irregex-match? "^# .*$" line) |
112 | (string-append "<h2>" |
113 | (clean |
114 | (substring line (string-length HEADING-PREFIX))) |
115 | "</h2>\n\n" |
116 | (parse-rest-of-lines 'normal))) |
117 | ;; start of a codeblock |
118 | ((irregex-match? "^```[[:space:]]*$" line) |
119 | (string-append "<pre>\n" (parse-rest-of-lines 'codeblock))) |
120 | ;; start of an unordered list |
121 | ((irregex-match? "^- .*$" line) |
122 | (string-append "<ul>\n" |
123 | "<li>\n" |
124 | (parse-characters |
125 | (clean |
126 | (substring line (string-length UNORDERED-LIST-PREFIX))) state) |
127 | "\n" |
128 | (parse-rest-of-lines 'unordered-list))) |
129 | ;; start of an ordered list |
130 | ((irregex-match? "^1. .*$" line) |
131 | (string-append "<ol>\n" |
132 | "<li>\n" |
133 | (parse-characters |
134 | (clean |
135 | (substring line (string-length ORDERED-LIST-PREFIX))) state) |
136 | "\n" |
137 | (parse-rest-of-lines 'ordered-list))) |
138 | ;; start of a blockquote |
139 | ((irregex-match? "^> .*$" line) |
140 | (string-append "<blockquote>\n" |
141 | (parse-characters |
142 | (clean (substring line 2)) state) |
143 | "\n" |
144 | (parse-rest-of-lines 'blockquote))) |
145 | ;; start of a paragraph |
146 | ((not (irregex-match? "^[[:space:]]*$" line)) |
147 | (string-append "<p>\n" |
148 | (parse-characters |
149 | (clean line) 'paragraph) |
150 | "\n" |
151 | (parse-rest-of-lines 'paragraph))) |
152 | ;; anything else, which should only match a (irregex-match? |
153 | ;; "^[[:space:]]*$" line), just keep parsing and skip it, so we can |
154 | ;; keep the html output readable, without polluting it with spaces |
155 | (else (parse-rest-of-lines 'normal)))) |
156 |
|
157 | ((equal? 'codeblock state) |
158 | ;; end of a codeblock |
159 | (if (irregex-match? "^```[[:space:]]*$" line) |
160 | (string-append "</pre>\n\n" |
161 | (parse-rest-of-lines 'normal)) |
162 | ;; middle of a codeblock |
163 | (string-append (clean line) |
164 | "\n" |
165 | (parse-rest-of-lines 'codeblock)))) |
166 | ((equal? 'unordered-list state) |
167 | (cond |
168 | ;; end of an unordered list |
169 | ((irregex-match? "^[[:space:]]*$" line) |
170 | (string-append "</li>\n" |
171 | "</ul>\n\n" |
172 | (parse-rest-of-lines 'normal))) |
173 | ;; middle of an unordered list |
174 | ((irregex-match? "^- .*$" line) |
175 | (string-append "</li>\n" |
176 | "<li>\n" |
177 | (parse-characters |
178 | (clean (substring line 2)) state) |
179 | "\n" |
180 | (parse-rest-of-lines state))) |
181 | ;; multiline unordered list item |
182 | (else (string-append (parse-characters |
183 | (clean line) state) |
184 | "\n" |
185 | (parse-rest-of-lines state))))) |
186 | ((equal? 'ordered-list state) |
187 | (cond |
188 | ;; end of an ordered list |
189 | ((irregex-match? "^[[:space:]]*$" line) |
190 | (string-append "</li>\n" |
191 | "</ol>\n\n" |
192 | (parse-rest-of-lines 'normal))) |
193 | ;; middle of an ordered list |
194 | ((irregex-match? "^1. .*$" line) |
195 | (string-append "</li>\n" |
196 | "<li>\n" |
197 | (parse-characters |
198 | (clean (substring line 3)) state) |
199 | "\n" |
200 | (parse-rest-of-lines state))) |
201 | ;; multiline ordered list item |
202 | (else (string-append (parse-characters |
203 | (clean line) state) |
204 | "\n" |
205 | (parse-rest-of-lines state))))) |
206 | ((equal? 'blockquote state) |
207 | (cond |
208 | ;; end of a blockquote |
209 | ((irregex-match? "^[[:space:]]*$" line) |
210 | (string-append "</blockquote>\n" |
211 | (parse-rest-of-lines 'normal))) |
212 | ;; middle of a blockquote |
213 | ((irregex-match? "^> .*$" line) |
214 | (string-append (parse-characters |
215 | (clean (substring line 2)) state) |
216 | "\n" |
217 | (parse-rest-of-lines 'blockquote))))) |
218 | ((equal? 'paragraph state) |
219 | (if (irregex-match? "^[[:space:]]*$" line) |
220 | ;; end of a paragraph |
221 | (string-append "</p>\n\n" (parse-rest-of-lines 'normal)) |
222 | ;; middle of a paragraph |
223 | (string-append (parse-characters |
224 | (clean line) 'paragraph) |
225 | "\n" |
226 | (parse-rest-of-lines 'paragraph)))) |
227 | ;; anything else, just skip it. i don't *think* this condition is every |
228 | ;; met, because it would require the state to be something that isn't |
229 | ;; checked for above. better safe than sorry, i guess? ... what am i |
230 | ;; talking about? i don't know what the hell i'm doing. i just know it |
231 | ;; works and i'm having fun lol |
232 | (else (parse-rest-of-lines 'normal))))))) |
233 |
|