git.m455.casa

m455.casa

clone url: git://git.m455.casa/m455.casa


src/parser.scm

1 (module parser (clean
2 parse-lines)
3
4 (import scheme
5 utf8
6 (chicken base)
7 (chicken irregex)
8 (chicken string)
9 static)
10
11 (define (clean str)
12 (string-translate* str '(("&" . "&amp")
13 ("<" . "&lt;")
14 (">" . "&gt;")
15 ("\"" . "&quot;")
16 ("\'" . "&#39;"))))
17
18 ;; these formatters can't be nested or split into separate lines because of my
19 ;; shitty code LOL
20 (define (parse-characters line state)
21 (let* ((characters (if (string? line)
22 (string->list line)
23 line))
24 (parse-rest-of-characters (lambda (state)
25 (parse-characters (cdr characters) state))))
26 (if (null? characters)
27 ""
28 (let ((character (string (car characters))))
29 (cond
30 ;; check for line-based states, except for codeblocks, so characters
31 ;; are preserves
32 ((or (equal? 'normal state)
33 (equal? 'paragraph state)
34 (equal? 'unordered-list state)
35 (equal? 'ordered-list state)
36 (equal? 'blockquote state))
37 ;; try to catch inline formatters before the general character
38 ;; appending in the else statement below
39 (cond
40 ;; start of bold
41 ((equal? "*" character)
42 (string-append "<strong>" (parse-rest-of-characters 'bold)))
43 ;; start of inline code
44 ((equal? "`" character)
45 (string-append "<code>" (parse-rest-of-characters 'inline-code)))
46 ;; start of a link url
47 ((equal? "[" character)
48 (string-append "<a href=\"" (parse-rest-of-characters 'link)))
49 ;; if we're in any of the states above, and it's not one of the
50 ;; three characters above, then just append the character, and
51 ;; keep parsing
52 (else (string-append character (parse-rest-of-characters state)))))
53 ;; end of bold
54 ((and (equal? 'bold state)
55 (equal? "*" character))
56 (string-append "</strong>" (parse-rest-of-characters 'normal)))
57 ;; end of inline code
58 ((and (equal? 'inline-code state)
59 (equal? "`" character))
60 (string-append "</code>" (parse-rest-of-characters 'normal)))
61 ;; start of link url
62 ((equal? 'link state)
63 (cond
64 ;; middle of link url
65 ((not (equal? "|" character))
66 (string-append character (parse-rest-of-characters 'link)))
67 ;; end of link url
68 ((equal? "|" character)
69 (string-append "\">" (parse-rest-of-characters 'link-title)))))
70 ;; start of link title
71 ((equal? 'link-title state)
72 (cond
73 ;; middle of link title
74 ((not (equal? "]" character))
75 (string-append character (parse-rest-of-characters 'link-title)))
76 ;; end of link title
77 ((equal? "]" character)
78 (string-append "</a>" (parse-rest-of-characters 'normal)))))
79 ;; append a character if it doesn't meet any of the conditions above,
80 ;; so we actually have all of the characters on the line that we want.
81 (else (string-append character (parse-rest-of-characters state))))))))
82
83 (define (parse-lines lines state)
84 (if (null? lines)
85 ;; if we reach the end of the file while still in a non-normal state state, then
86 ;; return the state's associated closing html element
87 (cond
88 ((equal? 'paragraph state) "</p>")
89 ((equal? 'unordered-list state) "</li>\n</ul>")
90 ((equal? 'ordered-list state) "</li>\n</ol>")
91 ((equal? 'blockquote state) "</blockquote>")
92 ((equal? 'codeblock state) "</pre>")
93 (else ""))
94 (let* ((line (car lines))
95 (parse-rest-of-lines (lambda (state)
96 (parse-lines (cdr lines) state))))
97 (cond
98 ((equal? 'normal state)
99 (cond
100 ;; horizontal rule
101 ((irregex-match? "^---[[:space:]]*$" line)
102 (string-append "<hr>\n" (parse-rest-of-lines 'normal)))
103 ;; title
104 ((irregex-match? "^title: .*$" line)
105 (string-append "<h1>"
106 (clean
107 (substring line (string-length TITLE-PREFIX)))
108 "</h1>\n\n"
109 (parse-rest-of-lines 'normal)))
110 ;; heading
111 ((irregex-match? "^# .*$" line)
112 (string-append "<h2>"
113 (clean
114 (substring line (string-length HEADING-PREFIX)))
115 "</h2>\n\n"
116 (parse-rest-of-lines 'normal)))
117 ;; start of a codeblock
118 ((irregex-match? "^```[[:space:]]*$" line)
119 (string-append "<pre>\n" (parse-rest-of-lines 'codeblock)))
120 ;; start of an unordered list
121 ((irregex-match? "^- .*$" line)
122 (string-append "<ul>\n"
123 "<li>\n"
124 (parse-characters
125 (clean
126 (substring line (string-length UNORDERED-LIST-PREFIX))) state)
127 "\n"
128 (parse-rest-of-lines 'unordered-list)))
129 ;; start of an ordered list
130 ((irregex-match? "^1. .*$" line)
131 (string-append "<ol>\n"
132 "<li>\n"
133 (parse-characters
134 (clean
135 (substring line (string-length ORDERED-LIST-PREFIX))) state)
136 "\n"
137 (parse-rest-of-lines 'ordered-list)))
138 ;; start of a blockquote
139 ((irregex-match? "^> .*$" line)
140 (string-append "<blockquote>\n"
141 (parse-characters
142 (clean (substring line 2)) state)
143 "\n"
144 (parse-rest-of-lines 'blockquote)))
145 ;; start of a paragraph
146 ((not (irregex-match? "^[[:space:]]*$" line))
147 (string-append "<p>\n"
148 (parse-characters
149 (clean line) 'paragraph)
150 "\n"
151 (parse-rest-of-lines 'paragraph)))
152 ;; anything else, which should only match a (irregex-match?
153 ;; "^[[:space:]]*$" line), just keep parsing and skip it, so we can
154 ;; keep the html output readable, without polluting it with spaces
155 (else (parse-rest-of-lines 'normal))))
156
157 ((equal? 'codeblock state)
158 ;; end of a codeblock
159 (if (irregex-match? "^```[[:space:]]*$" line)
160 (string-append "</pre>\n\n"
161 (parse-rest-of-lines 'normal))
162 ;; middle of a codeblock
163 (string-append (clean line)
164 "\n"
165 (parse-rest-of-lines 'codeblock))))
166 ((equal? 'unordered-list state)
167 (cond
168 ;; end of an unordered list
169 ((irregex-match? "^[[:space:]]*$" line)
170 (string-append "</li>\n"
171 "</ul>\n\n"
172 (parse-rest-of-lines 'normal)))
173 ;; middle of an unordered list
174 ((irregex-match? "^- .*$" line)
175 (string-append "</li>\n"
176 "<li>\n"
177 (parse-characters
178 (clean (substring line 2)) state)
179 "\n"
180 (parse-rest-of-lines state)))
181 ;; multiline unordered list item
182 (else (string-append (parse-characters
183 (clean line) state)
184 "\n"
185 (parse-rest-of-lines state)))))
186 ((equal? 'ordered-list state)
187 (cond
188 ;; end of an ordered list
189 ((irregex-match? "^[[:space:]]*$" line)
190 (string-append "</li>\n"
191 "</ol>\n\n"
192 (parse-rest-of-lines 'normal)))
193 ;; middle of an ordered list
194 ((irregex-match? "^1. .*$" line)
195 (string-append "</li>\n"
196 "<li>\n"
197 (parse-characters
198 (clean (substring line 3)) state)
199 "\n"
200 (parse-rest-of-lines state)))
201 ;; multiline ordered list item
202 (else (string-append (parse-characters
203 (clean line) state)
204 "\n"
205 (parse-rest-of-lines state)))))
206 ((equal? 'blockquote state)
207 (cond
208 ;; end of a blockquote
209 ((irregex-match? "^[[:space:]]*$" line)
210 (string-append "</blockquote>\n"
211 (parse-rest-of-lines 'normal)))
212 ;; middle of a blockquote
213 ((irregex-match? "^> .*$" line)
214 (string-append (parse-characters
215 (clean (substring line 2)) state)
216 "\n"
217 (parse-rest-of-lines 'blockquote)))))
218 ((equal? 'paragraph state)
219 (if (irregex-match? "^[[:space:]]*$" line)
220 ;; end of a paragraph
221 (string-append "</p>\n\n" (parse-rest-of-lines 'normal))
222 ;; middle of a paragraph
223 (string-append (parse-characters
224 (clean line) 'paragraph)
225 "\n"
226 (parse-rest-of-lines 'paragraph))))
227 ;; anything else, just skip it. i don't *think* this condition is every
228 ;; met, because it would require the state to be something that isn't
229 ;; checked for above. better safe than sorry, i guess? ... what am i
230 ;; talking about? i don't know what the hell i'm doing. i just know it
231 ;; works and i'm having fun lol
232 (else (parse-rest-of-lines 'normal)))))))
233