minor code cleanup

Andrew Cantino 11 anni fa
parent
commit
f4bae10250
2 ha cambiato i file con 42 aggiunte e 41 eliminazioni
  1. 42 40
      app/models/agents/website_agent.rb
  2. 0 1
      spec/models/agents/website_agent_spec.rb

+ 42 - 40
app/models/agents/website_agent.rb

@@ -6,6 +6,11 @@ module Agents
6 6
   class WebsiteAgent < Agent
7 7
     cannot_receive_events!
8 8
 
9
+    default_schedule "every_12h"
10
+
11
+    UNIQUENESS_LOOK_BACK = 200
12
+    UNIQUENESS_FACTOR = 3
13
+
9 14
     description <<-MD
10 15
       The WebsiteAgent scrapes a website, XML document, or JSON feed and creates Events based on the results.
11 16
 
@@ -34,20 +39,15 @@ module Agents
34 39
 
35 40
       Can be configured to use HTTP basic auth by including the `basic_auth` parameter with `username:password`.
36 41
 
37
-      Set `expected_update_period_in_days` to the maximum amount of time that you'd expect to pass between Events being created by this Agent (only used to set the "working" status).
42
+      Set `expected_update_period_in_days` to the maximum amount of time that you'd expect to pass between Events being created by this Agent.  This is only used to set the "working" status.
38 43
 
39
-      Set `uniqueness_look_back` (defaults to the larger of 200, 3x the number of received events) to limit the number of events checked for uniqueness (typically for performance).
44
+      Set `uniqueness_look_back` to limit the number of events checked for uniqueness (typically for performance).  This defaults to the larger of #{UNIQUENESS_LOOK_BACK} or #{UNIQUENESS_FACTOR}x the number of detected received results.
40 45
     MD
41 46
 
42 47
     event_description do
43 48
       "Events will have the fields you specified.  Your options look like:\n\n    #{Utils.pretty_print options['extract']}"
44 49
     end
45 50
 
46
-    default_schedule "every_12h"
47
-
48
-    UNIQUENESS_LOOK_BACK = 200
49
-    UNIQUENESS_FACTOR = 3
50
-
51 51
     def working?
52 52
       event_created_within?(options['expected_update_period_in_days']) && !recent_error_logs?
53 53
     end
@@ -66,54 +66,44 @@ module Agents
66 66
     end
67 67
 
68 68
     def validate_options
69
-      # Check required fields are present
69
+      # Check for required fields
70 70
       errors.add(:base, "url and expected_update_period_in_days are required") unless options['expected_update_period_in_days'].present? && options['url'].present?
71 71
       if !options['extract'].present? && extraction_type != "json"
72 72
         errors.add(:base, "extract is required for all types except json")
73 73
       end
74
-      # Check options:
74
+
75
+      # Check for optional fields
75 76
       if options['mode'].present?
76
-        if options['mode'] != "on_change" && options['mode'] != "all"
77
-          errors.add(:base, "mode should be all or on_change")
78
-        end
77
+        errors.add(:base, "mode must be set to on_change or all") unless %w[on_change all].include?(options['mode'])
79 78
       end
80
-      # Check integer variables:      
79
+
81 80
       if options['expected_update_period_in_days'].present?
82
-        begin
83
-          Integer(options['expected_update_period_in_days'])
84
-          rescue
85
-          errors.add(:base, "Invalid expected_update_period_in_days format")
86
-        end
81
+        errors.add(:base, "Invalid expected_update_period_in_days format") unless is_positive_integer?(options['expected_update_period_in_days'])
87 82
       end
83
+
88 84
       if options['uniqueness_look_back'].present?
89
-        begin
90
-          Integer(options['uniqueness_look_back'])
91
-          rescue
92
-          errors.add(:base, "Invalid uniqueness_look_back format")
93
-        end      
85
+        errors.add(:base, "Invalid uniqueness_look_back format") unless is_positive_integer?(options['uniqueness_look_back'])
94 86
       end
95 87
     end
96 88
 
97 89
     def check
98 90
       hydra = Typhoeus::Hydra.new
99 91
       log "Fetching #{options['url']}"
100
-      request_opts = {:followlocation => true}
101
-      if options['basic_auth'].present?
102
-        request_opts[:userpwd] = options['basic_auth']
103
-      end
92
+      request_opts = { :followlocation => true }
93
+      request_opts[:userpwd] = options['basic_auth'] if options['basic_auth'].present?
104 94
       request = Typhoeus::Request.new(options['url'], request_opts)
95
+
105 96
       request.on_failure do |response|
106 97
         error "Failed: #{response.inspect}"
107 98
       end
99
+
108 100
       request.on_success do |response|
109 101
         doc = parse(response.body)
110 102
 
111 103
         if extract_full_json?
112
-          old_events = previous_payloads 1
113
-          result = doc
114
-          if store_payload? old_events, result
115
-            log "Storing new result for '#{name}': #{result.inspect}"
116
-            create_event :payload => result
104
+          if store_payload!(previous_payloads(1), doc)
105
+            log "Storing new result for '#{name}': #{doc.inspect}"
106
+            create_event :payload => doc
117 107
           end
118 108
         else
119 109
           output = {}
@@ -152,7 +142,7 @@ module Agents
152 142
               end
153 143
             end
154 144
 
155
-            if store_payload? old_events, result
145
+            if store_payload!(old_events, result)
156 146
               log "Storing new parsed result for '#{name}': #{result.inspect}"
157 147
               create_event :payload => result
158 148
             end
@@ -165,16 +155,20 @@ module Agents
165 155
 
166 156
     private
167 157
 
168
-    def store_payload?(old_events, result)
169
-      if !options['mode']
158
+    # This method returns true if the result should be stored as a new event.
159
+    # If mode is set to 'on_change', this method may return false and update an existing
160
+    # event to expire further in the future.
161
+    def store_payload!(old_events, result)
162
+      if !options['mode'].present?
170 163
         return true
171 164
       elsif options['mode'].to_s == "all"
172 165
         return true
173 166
       elsif options['mode'].to_s == "on_change"
167
+        result_json = result.to_json
174 168
         old_events.each do |old_event|
175
-          if old_event.payload.to_json == result.to_json
169
+          if old_event.payload.to_json == result_json
176 170
             old_event.expires_at = new_event_expiration_date
177
-            old_event.save
171
+            old_event.save!
178 172
             return false
179 173
          end
180 174
         end
@@ -187,8 +181,8 @@ module Agents
187 181
       if options['uniqueness_look_back'].present?
188 182
         look_back = options['uniqueness_look_back'].to_i
189 183
       else
190
-        # Larger of UNIQUENESS_FACTOR*num_events and UNIQUENESS_LOOK_BACK
191
-        look_back = UNIQUENESS_FACTOR*num_events
184
+        # Larger of UNIQUENESS_FACTOR * num_events and UNIQUENESS_LOOK_BACK
185
+        look_back = UNIQUENESS_FACTOR * num_events
192 186
         if look_back < UNIQUENESS_LOOK_BACK
193 187
           look_back = UNIQUENESS_LOOK_BACK
194 188
         end
@@ -197,7 +191,7 @@ module Agents
197 191
     end
198 192
 
199 193
     def extract_full_json?
200
-      (!options['extract'].present? && extraction_type == "json")
194
+      !options['extract'].present? && extraction_type == "json"
201 195
     end
202 196
 
203 197
     def extraction_type
@@ -224,5 +218,13 @@ module Agents
224 218
           raise "Unknown extraction type #{extraction_type}"
225 219
       end
226 220
     end
221
+
222
+    def is_positive_integer?(value)
223
+      begin
224
+        Integer(value) >= 0
225
+      rescue
226
+        false
227
+      end
228
+    end
227 229
   end
228 230
 end

+ 0 - 1
spec/models/agents/website_agent_spec.rb

@@ -256,5 +256,4 @@ describe Agents::WebsiteAgent do
256 256
       end
257 257
     end
258 258
   end
259
-
260 259
 end