Add :force_encoding support to WebsiteAgent.

Akinori MUSHA лет %!s(int64=11): %!d(string=назад)
Родитель
Сommit
7bc20a0b44
2 измененных файлов с 89 добавлено и 4 удалено
  1. 21 2
      app/models/agents/website_agent.rb
  2. 68 2
      spec/models/agents/website_agent_spec.rb

+ 21 - 2
app/models/agents/website_agent.rb

@@ -42,6 +42,8 @@ module Agents
42 42
       Set `expected_update_period_in_days` to the maximum amount of time that you'd expect to pass between Events being created by this Agent.  This is only used to set the "working" status.
43 43
 
44 44
       Set `uniqueness_look_back` to limit the number of events checked for uniqueness (typically for performance).  This defaults to the larger of #{UNIQUENESS_LOOK_BACK} or #{UNIQUENESS_FACTOR}x the number of detected received results.
45
+
46
+      Set `force_encoding` to an encoding name if the website does not return a Content-Type header with a proper charset.
45 47
     MD
46 48
 
47 49
     event_description do
@@ -85,6 +87,19 @@ module Agents
85 87
       if options['uniqueness_look_back'].present?
86 88
         errors.add(:base, "Invalid uniqueness_look_back format") unless is_positive_integer?(options['uniqueness_look_back'])
87 89
       end
90
+
91
+      if (encoding = options['force_encoding']).present?
92
+        case encoding
93
+        when String
94
+          begin
95
+            Encoding.find(encoding)
96
+          rescue ArgumentError
97
+            errors.add(:base, "Unknown encoding: #{encoding.inspect}")
98
+          end
99
+        else
100
+          errors.add(:base, "force_encoding must be a string")
101
+        end
102
+      end
88 103
     end
89 104
 
90 105
     def check
@@ -99,7 +114,11 @@ module Agents
99 114
       end
100 115
 
101 116
       request.on_success do |response|
102
-        doc = parse(response.body)
117
+        body = response.body
118
+        if encoding = options['force_encoding']
119
+          body = body.encode(Encoding::UTF_8, encoding)
120
+        end
121
+        doc = parse(body)
103 122
 
104 123
         if extract_full_json?
105 124
           if store_payload!(previous_payloads(1), doc)
@@ -228,4 +247,4 @@ module Agents
228 247
       end
229 248
     end
230 249
   end
231
-end
250
+end

+ 68 - 2
spec/models/agents/website_agent_spec.rb

@@ -32,7 +32,17 @@ describe Agents::WebsiteAgent do
32 32
         lambda { @checker.save! }.should raise_error;
33 33
         @checker.options = @site
34 34
       end
35
-    
35
+
36
+      it "should validate the force_encoding option" do
37
+        @checker.options['force_encoding'] = 'UTF-8'
38
+        lambda { @checker.save! }.should_not raise_error;
39
+        @checker.options['force_encoding'] = ['UTF-8']
40
+        lambda { @checker.save! }.should raise_error;
41
+        @checker.options['force_encoding'] = 'UTF-42'
42
+        lambda { @checker.save! }.should raise_error;
43
+        @checker.options = @site
44
+      end
45
+
36 46
       it "should check for changes (and update Event.expires_at)" do
37 47
         lambda { @checker.check }.should change { Event.count }.by(1)
38 48
         event = Event.last
@@ -83,6 +93,62 @@ describe Agents::WebsiteAgent do
83 93
       end
84 94
     end
85 95
 
96
+    describe 'encoding' do
97
+      it 'should be forced with force_encoding option' do
98
+        huginn = "\u{601d}\u{8003}"
99
+        stub_request(:any, /no-encoding/).to_return(:body => {
100
+            :value => huginn,
101
+          }.to_json.encode(Encoding::EUC_JP), :headers => {
102
+            'Content-Type' => 'application/json',
103
+          }, :status => 200)
104
+        site = {
105
+          'name' => "Some JSON Response",
106
+          'expected_update_period_in_days' => 2,
107
+          'type' => "json",
108
+          'url' => "http://no-encoding.example.com",
109
+          'mode' => 'on_change',
110
+          'extract' => {
111
+            'value' => { 'path' => 'value' },
112
+          },
113
+          'force_encoding' => 'EUC-JP',
114
+        }
115
+        checker = Agents::WebsiteAgent.new(:name => "No Encoding Site", :options => site)
116
+        checker.user = users(:bob)
117
+        checker.save!
118
+
119
+        checker.check
120
+        event = Event.last
121
+        event.payload['value'].should == huginn
122
+      end
123
+
124
+      it 'should be overridden with force_encoding option' do
125
+        huginn = "\u{601d}\u{8003}"
126
+        stub_request(:any, /wrong-encoding/).to_return(:body => {
127
+            :value => huginn,
128
+          }.to_json.encode(Encoding::EUC_JP), :headers => {
129
+            'Content-Type' => 'application/json; UTF-8',
130
+          }, :status => 200)
131
+        site = {
132
+          'name' => "Some JSON Response",
133
+          'expected_update_period_in_days' => 2,
134
+          'type' => "json",
135
+          'url' => "http://wrong-encoding.example.com",
136
+          'mode' => 'on_change',
137
+          'extract' => {
138
+            'value' => { 'path' => 'value' },
139
+          },
140
+          'force_encoding' => 'EUC-JP',
141
+        }
142
+        checker = Agents::WebsiteAgent.new(:name => "Wrong Encoding Site", :options => site)
143
+        checker.user = users(:bob)
144
+        checker.save!
145
+
146
+        checker.check
147
+        event = Event.last
148
+        event.payload['value'].should == huginn
149
+      end
150
+    end
151
+
86 152
     describe '#working?' do
87 153
       it 'checks if events have been received within the expected receive period' do
88 154
         stubbed_time = Time.now
@@ -258,4 +324,4 @@ describe Agents::WebsiteAgent do
258 324
       end
259 325
     end
260 326
   end
261
-end
327
+end