@@ -39,7 +39,7 @@ module WebRequestConcern |
||
| 39 | 39 |
# detection, so we do that. |
| 40 | 40 |
case env[:response_headers][:content_type] |
| 41 | 41 |
when /;\s*charset\s*=\s*([^()<>@,;:\\\"\/\[\]?={}\s]+)/i
|
| 42 |
- encoding = Encoding.find($1) rescue nil |
|
| 42 |
+ encoding = Encoding.find($1) rescue @default_encoding |
|
| 43 | 43 |
when /\A\s*(?:text\/[^\s;]+|application\/(?:[^\s;]+\+)?(?:xml|json))\s*(?:;|\z)/i |
| 44 | 44 |
encoding = @default_encoding |
| 45 | 45 |
else |
@@ -47,7 +47,7 @@ module WebRequestConcern |
||
| 47 | 47 |
next |
| 48 | 48 |
end |
| 49 | 49 |
end |
| 50 |
- body.encode!(Encoding::UTF_8, encoding) unless body.encoding == Encoding::UTF_8 |
|
| 50 |
+ body.encode!(Encoding::UTF_8, encoding) |
|
| 51 | 51 |
end |
| 52 | 52 |
end |
| 53 | 53 |
end |
@@ -262,11 +262,11 @@ describe Agents::WebsiteAgent do |
||
| 262 | 262 |
describe 'encoding' do |
| 263 | 263 |
it 'should be forced with force_encoding option' do |
| 264 | 264 |
huginn = "\u{601d}\u{8003}"
|
| 265 |
- stub_request(:any, /no-encoding/).to_return(:body => {
|
|
| 266 |
- :value => huginn, |
|
| 267 |
- }.to_json.encode(Encoding::EUC_JP), :headers => {
|
|
| 265 |
+ stub_request(:any, /no-encoding/).to_return(body: {
|
|
| 266 |
+ value: huginn, |
|
| 267 |
+ }.to_json.encode(Encoding::EUC_JP).b, headers: {
|
|
| 268 | 268 |
'Content-Type' => 'application/json', |
| 269 |
- }, :status => 200) |
|
| 269 |
+ }, status: 200) |
|
| 270 | 270 |
site = {
|
| 271 | 271 |
'name' => "Some JSON Response", |
| 272 | 272 |
'expected_update_period_in_days' => "2", |
@@ -278,22 +278,22 @@ describe Agents::WebsiteAgent do |
||
| 278 | 278 |
}, |
| 279 | 279 |
'force_encoding' => 'EUC-JP', |
| 280 | 280 |
} |
| 281 |
- checker = Agents::WebsiteAgent.new(:name => "No Encoding Site", :options => site) |
|
| 281 |
+ checker = Agents::WebsiteAgent.new(name: "No Encoding Site", options: site) |
|
| 282 | 282 |
checker.user = users(:bob) |
| 283 | 283 |
checker.save! |
| 284 | 284 |
|
| 285 |
- checker.check |
|
| 285 |
+ expect { checker.check }.to change { Event.count }.by(1)
|
|
| 286 | 286 |
event = Event.last |
| 287 | 287 |
expect(event.payload['value']).to eq(huginn) |
| 288 | 288 |
end |
| 289 | 289 |
|
| 290 | 290 |
it 'should be overridden with force_encoding option' do |
| 291 | 291 |
huginn = "\u{601d}\u{8003}"
|
| 292 |
- stub_request(:any, /wrong-encoding/).to_return(:body => {
|
|
| 293 |
- :value => huginn, |
|
| 294 |
- }.to_json.encode(Encoding::EUC_JP), :headers => {
|
|
| 292 |
+ stub_request(:any, /wrong-encoding/).to_return(body: {
|
|
| 293 |
+ value: huginn, |
|
| 294 |
+ }.to_json.encode(Encoding::EUC_JP).b, headers: {
|
|
| 295 | 295 |
'Content-Type' => 'application/json; UTF-8', |
| 296 |
- }, :status => 200) |
|
| 296 |
+ }, status: 200) |
|
| 297 | 297 |
site = {
|
| 298 | 298 |
'name' => "Some JSON Response", |
| 299 | 299 |
'expected_update_period_in_days' => "2", |
@@ -305,11 +305,63 @@ describe Agents::WebsiteAgent do |
||
| 305 | 305 |
}, |
| 306 | 306 |
'force_encoding' => 'EUC-JP', |
| 307 | 307 |
} |
| 308 |
- checker = Agents::WebsiteAgent.new(:name => "Wrong Encoding Site", :options => site) |
|
| 308 |
+ checker = Agents::WebsiteAgent.new(name: "Wrong Encoding Site", options: site) |
|
| 309 | 309 |
checker.user = users(:bob) |
| 310 | 310 |
checker.save! |
| 311 | 311 |
|
| 312 |
- checker.check |
|
| 312 |
+ expect { checker.check }.to change { Event.count }.by(1)
|
|
| 313 |
+ event = Event.last |
|
| 314 |
+ expect(event.payload['value']).to eq(huginn) |
|
| 315 |
+ end |
|
| 316 |
+ |
|
| 317 |
+ it 'should be determined by charset in Content-Type' do |
|
| 318 |
+ huginn = "\u{601d}\u{8003}"
|
|
| 319 |
+ stub_request(:any, /charset-euc-jp/).to_return(body: {
|
|
| 320 |
+ value: huginn, |
|
| 321 |
+ }.to_json.encode(Encoding::EUC_JP), headers: {
|
|
| 322 |
+ 'Content-Type' => 'application/json; charset=EUC-JP', |
|
| 323 |
+ }, status: 200) |
|
| 324 |
+ site = {
|
|
| 325 |
+ 'name' => "Some JSON Response", |
|
| 326 |
+ 'expected_update_period_in_days' => "2", |
|
| 327 |
+ 'type' => "json", |
|
| 328 |
+ 'url' => "http://charset-euc-jp.example.com", |
|
| 329 |
+ 'mode' => 'on_change', |
|
| 330 |
+ 'extract' => {
|
|
| 331 |
+ 'value' => { 'path' => 'value' },
|
|
| 332 |
+ }, |
|
| 333 |
+ } |
|
| 334 |
+ checker = Agents::WebsiteAgent.new(name: "Charset reader", options: site) |
|
| 335 |
+ checker.user = users(:bob) |
|
| 336 |
+ checker.save! |
|
| 337 |
+ |
|
| 338 |
+ expect { checker.check }.to change { Event.count }.by(1)
|
|
| 339 |
+ event = Event.last |
|
| 340 |
+ expect(event.payload['value']).to eq(huginn) |
|
| 341 |
+ end |
|
| 342 |
+ |
|
| 343 |
+ it 'should default to UTF-8 when unknown charset is found' do |
|
| 344 |
+ huginn = "\u{601d}\u{8003}"
|
|
| 345 |
+ stub_request(:any, /charset-unknown/).to_return(body: {
|
|
| 346 |
+ value: huginn, |
|
| 347 |
+ }.to_json.b, headers: {
|
|
| 348 |
+ 'Content-Type' => 'application/json; charset=unicode', |
|
| 349 |
+ }, status: 200) |
|
| 350 |
+ site = {
|
|
| 351 |
+ 'name' => "Some JSON Response", |
|
| 352 |
+ 'expected_update_period_in_days' => "2", |
|
| 353 |
+ 'type' => "json", |
|
| 354 |
+ 'url' => "http://charset-unknown.example.com", |
|
| 355 |
+ 'mode' => 'on_change', |
|
| 356 |
+ 'extract' => {
|
|
| 357 |
+ 'value' => { 'path' => 'value' },
|
|
| 358 |
+ }, |
|
| 359 |
+ } |
|
| 360 |
+ checker = Agents::WebsiteAgent.new(name: "Charset reader", options: site) |
|
| 361 |
+ checker.user = users(:bob) |
|
| 362 |
+ checker.save! |
|
| 363 |
+ |
|
| 364 |
+ expect { checker.check }.to change { Event.count }.by(1)
|
|
| 313 | 365 |
event = Event.last |
| 314 | 366 |
expect(event.payload['value']).to eq(huginn) |
| 315 | 367 |
end |