@@ -39,7 +39,7 @@ module WebRequestConcern |
||
39 | 39 |
# detection, so we do that. |
40 | 40 |
case env[:response_headers][:content_type] |
41 | 41 |
when /;\s*charset\s*=\s*([^()<>@,;:\\\"\/\[\]?={}\s]+)/i |
42 |
- encoding = Encoding.find($1) rescue nil |
|
42 |
+ encoding = Encoding.find($1) rescue @default_encoding |
|
43 | 43 |
when /\A\s*(?:text\/[^\s;]+|application\/(?:[^\s;]+\+)?(?:xml|json))\s*(?:;|\z)/i |
44 | 44 |
encoding = @default_encoding |
45 | 45 |
else |
@@ -47,7 +47,7 @@ module WebRequestConcern |
||
47 | 47 |
next |
48 | 48 |
end |
49 | 49 |
end |
50 |
- body.encode!(Encoding::UTF_8, encoding) unless body.encoding == Encoding::UTF_8 |
|
50 |
+ body.encode!(Encoding::UTF_8, encoding) |
|
51 | 51 |
end |
52 | 52 |
end |
53 | 53 |
end |
@@ -262,11 +262,11 @@ describe Agents::WebsiteAgent do |
||
262 | 262 |
describe 'encoding' do |
263 | 263 |
it 'should be forced with force_encoding option' do |
264 | 264 |
huginn = "\u{601d}\u{8003}" |
265 |
- stub_request(:any, /no-encoding/).to_return(:body => { |
|
266 |
- :value => huginn, |
|
267 |
- }.to_json.encode(Encoding::EUC_JP), :headers => { |
|
265 |
+ stub_request(:any, /no-encoding/).to_return(body: { |
|
266 |
+ value: huginn, |
|
267 |
+ }.to_json.encode(Encoding::EUC_JP).b, headers: { |
|
268 | 268 |
'Content-Type' => 'application/json', |
269 |
- }, :status => 200) |
|
269 |
+ }, status: 200) |
|
270 | 270 |
site = { |
271 | 271 |
'name' => "Some JSON Response", |
272 | 272 |
'expected_update_period_in_days' => "2", |
@@ -278,22 +278,22 @@ describe Agents::WebsiteAgent do |
||
278 | 278 |
}, |
279 | 279 |
'force_encoding' => 'EUC-JP', |
280 | 280 |
} |
281 |
- checker = Agents::WebsiteAgent.new(:name => "No Encoding Site", :options => site) |
|
281 |
+ checker = Agents::WebsiteAgent.new(name: "No Encoding Site", options: site) |
|
282 | 282 |
checker.user = users(:bob) |
283 | 283 |
checker.save! |
284 | 284 |
|
285 |
- checker.check |
|
285 |
+ expect { checker.check }.to change { Event.count }.by(1) |
|
286 | 286 |
event = Event.last |
287 | 287 |
expect(event.payload['value']).to eq(huginn) |
288 | 288 |
end |
289 | 289 |
|
290 | 290 |
it 'should be overridden with force_encoding option' do |
291 | 291 |
huginn = "\u{601d}\u{8003}" |
292 |
- stub_request(:any, /wrong-encoding/).to_return(:body => { |
|
293 |
- :value => huginn, |
|
294 |
- }.to_json.encode(Encoding::EUC_JP), :headers => { |
|
292 |
+ stub_request(:any, /wrong-encoding/).to_return(body: { |
|
293 |
+ value: huginn, |
|
294 |
+ }.to_json.encode(Encoding::EUC_JP).b, headers: { |
|
295 | 295 |
'Content-Type' => 'application/json; UTF-8', |
296 |
- }, :status => 200) |
|
296 |
+ }, status: 200) |
|
297 | 297 |
site = { |
298 | 298 |
'name' => "Some JSON Response", |
299 | 299 |
'expected_update_period_in_days' => "2", |
@@ -305,11 +305,63 @@ describe Agents::WebsiteAgent do |
||
305 | 305 |
}, |
306 | 306 |
'force_encoding' => 'EUC-JP', |
307 | 307 |
} |
308 |
- checker = Agents::WebsiteAgent.new(:name => "Wrong Encoding Site", :options => site) |
|
308 |
+ checker = Agents::WebsiteAgent.new(name: "Wrong Encoding Site", options: site) |
|
309 | 309 |
checker.user = users(:bob) |
310 | 310 |
checker.save! |
311 | 311 |
|
312 |
- checker.check |
|
312 |
+ expect { checker.check }.to change { Event.count }.by(1) |
|
313 |
+ event = Event.last |
|
314 |
+ expect(event.payload['value']).to eq(huginn) |
|
315 |
+ end |
|
316 |
+ |
|
317 |
+ it 'should be determined by charset in Content-Type' do |
|
318 |
+ huginn = "\u{601d}\u{8003}" |
|
319 |
+ stub_request(:any, /charset-euc-jp/).to_return(body: { |
|
320 |
+ value: huginn, |
|
321 |
+ }.to_json.encode(Encoding::EUC_JP), headers: { |
|
322 |
+ 'Content-Type' => 'application/json; charset=EUC-JP', |
|
323 |
+ }, status: 200) |
|
324 |
+ site = { |
|
325 |
+ 'name' => "Some JSON Response", |
|
326 |
+ 'expected_update_period_in_days' => "2", |
|
327 |
+ 'type' => "json", |
|
328 |
+ 'url' => "http://charset-euc-jp.example.com", |
|
329 |
+ 'mode' => 'on_change', |
|
330 |
+ 'extract' => { |
|
331 |
+ 'value' => { 'path' => 'value' }, |
|
332 |
+ }, |
|
333 |
+ } |
|
334 |
+ checker = Agents::WebsiteAgent.new(name: "Charset reader", options: site) |
|
335 |
+ checker.user = users(:bob) |
|
336 |
+ checker.save! |
|
337 |
+ |
|
338 |
+ expect { checker.check }.to change { Event.count }.by(1) |
|
339 |
+ event = Event.last |
|
340 |
+ expect(event.payload['value']).to eq(huginn) |
|
341 |
+ end |
|
342 |
+ |
|
343 |
+ it 'should default to UTF-8 when unknown charset is found' do |
|
344 |
+ huginn = "\u{601d}\u{8003}" |
|
345 |
+ stub_request(:any, /charset-unknown/).to_return(body: { |
|
346 |
+ value: huginn, |
|
347 |
+ }.to_json.b, headers: { |
|
348 |
+ 'Content-Type' => 'application/json; charset=unicode', |
|
349 |
+ }, status: 200) |
|
350 |
+ site = { |
|
351 |
+ 'name' => "Some JSON Response", |
|
352 |
+ 'expected_update_period_in_days' => "2", |
|
353 |
+ 'type' => "json", |
|
354 |
+ 'url' => "http://charset-unknown.example.com", |
|
355 |
+ 'mode' => 'on_change', |
|
356 |
+ 'extract' => { |
|
357 |
+ 'value' => { 'path' => 'value' }, |
|
358 |
+ }, |
|
359 |
+ } |
|
360 |
+ checker = Agents::WebsiteAgent.new(name: "Charset reader", options: site) |
|
361 |
+ checker.user = users(:bob) |
|
362 |
+ checker.save! |
|
363 |
+ |
|
364 |
+ expect { checker.check }.to change { Event.count }.by(1) |
|
313 | 365 |
event = Event.last |
314 | 366 |
expect(event.payload['value']).to eq(huginn) |
315 | 367 |
end |