forked from gjtorikian/html-pipeline
-
Notifications
You must be signed in to change notification settings - Fork 1
/
sanitization_filter_test.rb
267 lines (204 loc) · 7.97 KB
/
sanitization_filter_test.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
# frozen_string_literal: true
require "test_helper"
require "html_pipeline/node_filter/mention_filter"
class HTMLPipeline
class SanitizationFilterTest < Minitest::Test
SanitizationFilter = HTMLPipeline::SanitizationFilter
DEFAULT_CONFIG = SanitizationFilter::DEFAULT_CONFIG
def test_removing_script_tags
orig = %(<p><img src="http://github.com/img.png" /><script></script></p>)
html = SanitizationFilter.call(orig, DEFAULT_CONFIG).to_s
refute_match(/script/, html)
end
def test_removing_style_tags
orig = %(<p><style>hey now</style></p>)
html = SanitizationFilter.call(orig, DEFAULT_CONFIG).to_s
refute_match(/style/, html)
end
def test_removing_style_attributes
orig = %(<p style='font-size:1000%'>YO DAWG</p>)
html = SanitizationFilter.call(orig, DEFAULT_CONFIG).to_s
refute_match(/font-size/, html)
refute_match(/style/, html)
end
def test_removing_script_event_handler_attributes
orig = %(<a onclick='javascript:alert(0)'>YO DAWG</a>)
html = SanitizationFilter.call(orig, DEFAULT_CONFIG).to_s
refute_match(/javscript/, html)
refute_match(/onclick/, html)
end
def test_sanitizes_li_elements_not_contained_in_ul_or_ol
stuff = "a\n<li>b</li>\nc"
html = SanitizationFilter.call(stuff, { elements: {} }).to_s
assert_equal("a\nb\nc", html)
end
def test_does_not_sanitize_li_elements_contained_in_ul_or_ol
stuff = "a\n<ul><li>b</li></ul>\nc"
assert_equal(stuff, SanitizationFilter.call(stuff, DEFAULT_CONFIG).to_s)
end
def test_github_specific_protocols_are_removed
stuff = '<a href="github-windows://spillthelog">Spill this yo</a> and so on'
assert_equal("<a>Spill this yo</a> and so on", SanitizationFilter.call(stuff, DEFAULT_CONFIG).to_s)
end
def test_unknown_schemes_are_removed
stuff = '<a href="something-weird://heyyy">Wat</a> is this'
html = SanitizationFilter.call(stuff, DEFAULT_CONFIG).to_s
assert_equal("<a>Wat</a> is this", html)
end
def test_allowlisted_longdesc_schemes_are_allowed
stuff = '<img src="./foo.jpg" longdesc="http://longdesc.com">'
html = SanitizationFilter.call(stuff, DEFAULT_CONFIG).to_s
assert_equal('<img src="./foo.jpg" longdesc="http://longdesc.com">', html)
end
def test_weird_longdesc_schemes_are_removed
stuff = '<img src="./foo.jpg" longdesc="javascript:alert(1)">'
html = SanitizationFilter.call(stuff, DEFAULT_CONFIG).to_s
assert_equal('<img src="./foo.jpg">', html)
end
def test_standard_schemes_are_removed_if_not_specified_in_anchor_schemes
config = DEFAULT_CONFIG.merge(protocols: { "a" => { "href" => [] } })
stuff = '<a href="http://www.example.com/">No href for you</a>'
html = SanitizationFilter.call(stuff, config)
assert_equal("<a>No href for you</a>", html)
end
def test_custom_anchor_schemes_are_not_removed
config = DEFAULT_CONFIG.merge(protocols: { "a" => { "href" => ["something-weird"] } })
stuff = '<a href="something-weird://heyyy">Wat</a> is this'
html = SanitizationFilter.call(stuff, config)
assert_equal(stuff, html)
end
def test_allow_svg_elements_to_be_added
config = DEFAULT_CONFIG.dup
frag = <<~FRAG
<svg height="100" width="100">
<circle cx="50" cy="50" r="40" stroke="black" stroke-width="3" fill="red" />
</svg>
FRAG
html = SanitizationFilter.call(frag, config)
assert_equal("\n", html)
config = {
elements: ["svg", "circle"],
attributes: {
"svg" => ["width"],
"circle" => ["cx", "cy", "r"],
},
}
result = <<~FRAG
<svg width="100">
<circle cx="50" cy="50" r="40" />
</svg>
FRAG
html = SanitizationFilter.call(frag, config)
assert_equal(result, html)
end
def test_anchor_schemes_are_merged_with_other_anchor_restrictions
stuff = '<a href="something-weird://heyyy" ping="more-weird://hiii">Wat</a> is this'
allowlist = {
elements: ["a"],
attributes: { "a" => ["href"] },
protocols: { "a" => { "href" => ["something-weird"] } },
}
html = SanitizationFilter.call(stuff, allowlist)
assert_equal('<a href="something-weird://heyyy">Wat</a> is this', html)
end
def test_uses_anchor_schemes_from_allowlist_when_not_separately_specified
stuff = '<a href="something-weird://heyyy">Wat</a> is this'
allowlist = {
elements: ["a"],
attributes: { "a" => ["href"] },
protocols: { "a" => { "href" => ["something-weird"] } },
}
html = SanitizationFilter.call(stuff, allowlist)
assert_equal(stuff, html)
end
def test_allowlist_contains_default_anchor_schemes
assert_equal(["http", "https", "mailto", :relative], SanitizationFilter::DEFAULT_CONFIG[:protocols]["a"]["href"])
end
def test_exports_default_anchor_schemes
assert_equal(["http", "https", "mailto", :relative], SanitizationFilter::VALID_PROTOCOLS)
end
def test_script_contents_are_removed
orig = "<script>JavaScript!</script>"
assert_equal("", SanitizationFilter.call(orig, DEFAULT_CONFIG).to_s)
end
def test_table_rows_and_cells_removed_if_not_in_table
orig = %(<tr><td>Foo</td></tr><td>Bar</td>)
assert_equal("FooBar", SanitizationFilter.call(orig, { elements: {} }))
end
def test_table_sections_removed_if_not_in_table
orig = %(<thead><tr><td>Foo</td></tr></thead>)
assert_equal("Foo", SanitizationFilter.call(orig, { elements: {} }).to_s)
end
def test_table_sections_are_not_removed
orig = %(<table>
<thead><tr><th>Column 1</th></tr></thead>
<tfoot><tr><td>Sum</td></tr></tfoot>
<tbody><tr><td>1</td></tr></tbody>
</table>)
assert_equal(orig, SanitizationFilter.call(orig, DEFAULT_CONFIG).to_s)
end
def test_summary_tag_are_not_removed
orig = %(<summary>Foo</summary>)
assert_equal(orig, SanitizationFilter.call(orig, DEFAULT_CONFIG).to_s)
end
def test_details_tag_and_open_attribute_are_not_removed
orig = %(<details open>Foo</details>)
assert_equal(orig, SanitizationFilter.call(orig, DEFAULT_CONFIG).to_s)
end
def test_nested_details_tag_are_not_removed
orig = <<-NESTED
<details>
<summary>Foo</summary>
<details>
Bar
<summary>Baz</summary>
</details>
Qux
</details>
NESTED
assert_equal(orig, SanitizationFilter.call(orig, DEFAULT_CONFIG).to_s)
end
def test_sanitization_pipeline_can_be_configured
config = {
elements: ["p", "pre", "code"],
}
pipeline = HTMLPipeline.new(
convert_filter:
HTMLPipeline::ConvertFilter::MarkdownFilter.new,
sanitization_config: config,
node_filters: [
HTMLPipeline::NodeFilter::MentionFilter.new,
],
)
result = pipeline.call(<<~CODE)
This is *great*, @balevine:
some_code(:first)
CODE
expected = <<~HTML
<p>This is great, <a href="/balevine" class="user-mention">@balevine</a>:</p>
<pre><code>some_code(:first)
</code></pre>
HTML
assert_equal(result[:output].to_s, expected.chomp)
end
def test_sanitization_pipeline_can_be_removed
pipeline = HTMLPipeline.new(
convert_filter: HTMLPipeline::ConvertFilter::MarkdownFilter.new(context: { markdown: { plugins: { syntax_highlighter: nil } } }),
sanitization_config: nil,
node_filters: [
HTMLPipeline::NodeFilter::MentionFilter.new,
],
)
result = pipeline.call(<<~CODE)
This is *great*, @balevine:
some_code(:first)
CODE
expected = <<~HTML
<p>This is <em>great</em>, <a href="/balevine" class="user-mention">@balevine</a>:</p>
<pre><code>some_code(:first)
</code></pre>
HTML
assert_equal(result[:output].to_s, expected.chomp)
end
end
end