@@ -103,6 +103,8 @@ module Agents |
||
| 103 | 103 |
|
| 104 | 104 |
Set `unzip` to `gzip` to inflate the resource using gzip. |
| 105 | 105 |
|
| 106 |
+ Set `http_success_codes` to an array of status codes (e.g., `[404, 422]`) to treat HTTP response codes beyond 200 as successes. |
|
| 107 |
+ |
|
| 106 | 108 |
# Liquid Templating |
| 107 | 109 |
|
| 108 | 110 |
In Liquid templating, the following variable is available: |
@@ -149,6 +151,7 @@ module Agents |
||
| 149 | 151 |
errors.add(:base, "either url, url_from_event, or data_from_event are required") unless options['url'].present? || options['url_from_event'].present? || options['data_from_event'].present? |
| 150 | 152 |
errors.add(:base, "expected_update_period_in_days is required") unless options['expected_update_period_in_days'].present? |
| 151 | 153 |
validate_extract_options! |
| 154 |
+ validate_http_success_codes! |
|
| 152 | 155 |
|
| 153 | 156 |
# Check for optional fields |
| 154 | 157 |
if options['mode'].present? |
@@ -166,6 +169,25 @@ module Agents |
||
| 166 | 169 |
validate_web_request_options! |
| 167 | 170 |
end |
| 168 | 171 |
|
| 172 |
+ def validate_http_success_codes! |
|
| 173 |
+ consider_success = options["http_success_codes"] |
|
| 174 |
+ if consider_success.present? |
|
| 175 |
+ |
|
| 176 |
+ if (consider_success.class != Array) |
|
| 177 |
+ errors.add(:http_success_codes, "must be an array and specify at least one status code") |
|
| 178 |
+ else |
|
| 179 |
+ if consider_success.uniq.count != consider_success.count |
|
| 180 |
+ errors.add(:http_success_codes, "duplicate http code found") |
|
| 181 |
+ else |
|
| 182 |
+ if consider_success.any?{|e| e.to_s !~ /^\d+$/ }
|
|
| 183 |
+ errors.add(:http_success_codes, "please make sure to use only numeric values for code, ex 404, or \"404\"") |
|
| 184 |
+ end |
|
| 185 |
+ end |
|
| 186 |
+ end |
|
| 187 |
+ |
|
| 188 |
+ end |
|
| 189 |
+ end |
|
| 190 |
+ |
|
| 169 | 191 |
def validate_extract_options! |
| 170 | 192 |
extraction_type = (extraction_type() rescue extraction_type(options)) |
| 171 | 193 |
case extract = options['extract'] |
@@ -273,7 +295,8 @@ module Agents |
||
| 273 | 295 |
uri = Utils.normalize_uri(url) |
| 274 | 296 |
log "Fetching #{uri}"
|
| 275 | 297 |
response = faraday.get(uri) |
| 276 |
- raise "Failed: #{response.inspect}" unless response.success?
|
|
| 298 |
+ |
|
| 299 |
+ raise "Failed: #{response.inspect}" unless consider_response_successful?(response)
|
|
| 277 | 300 |
|
| 278 | 301 |
interpolation_context.stack {
|
| 279 | 302 |
interpolation_context['_response_'] = ResponseDrop.new(response) |
@@ -353,6 +376,12 @@ module Agents |
||
| 353 | 376 |
end |
| 354 | 377 |
|
| 355 | 378 |
private |
| 379 |
+ def consider_response_successful?(response) |
|
| 380 |
+ response.success? || begin |
|
| 381 |
+ consider_success = options["http_success_codes"] |
|
| 382 |
+ consider_success.present? && (consider_success.include?(response.status.to_s) || consider_success.include?(response.status)) |
|
| 383 |
+ end |
|
| 384 |
+ end |
|
| 356 | 385 |
|
| 357 | 386 |
def handle_event_data(data, event, existing_payload) |
| 358 | 387 |
handle_data(data, event.payload['url'], existing_payload) |
@@ -40,6 +40,32 @@ describe Agents::WebsiteAgent do |
||
| 40 | 40 |
expect(@checker).not_to be_valid |
| 41 | 41 |
end |
| 42 | 42 |
|
| 43 |
+ it 'should validate the http_success_codes fields' do |
|
| 44 |
+ @checker.options['http_success_codes'] = [404] |
|
| 45 |
+ expect(@checker).to be_valid |
|
| 46 |
+ |
|
| 47 |
+ @checker.options['http_success_codes'] = [404, 404] |
|
| 48 |
+ expect(@checker).not_to be_valid |
|
| 49 |
+ |
|
| 50 |
+ @checker.options['http_success_codes'] = [404, "422"] |
|
| 51 |
+ expect(@checker).to be_valid |
|
| 52 |
+ |
|
| 53 |
+ @checker.options['http_success_codes'] = [404.0] |
|
| 54 |
+ expect(@checker).not_to be_valid |
|
| 55 |
+ |
|
| 56 |
+ @checker.options['http_success_codes'] = ["not_a_code"] |
|
| 57 |
+ expect(@checker).not_to be_valid |
|
| 58 |
+ |
|
| 59 |
+ @checker.options['http_success_codes'] = [] |
|
| 60 |
+ expect(@checker).to be_valid |
|
| 61 |
+ |
|
| 62 |
+ @checker.options['http_success_codes'] = '' |
|
| 63 |
+ expect(@checker).to be_valid |
|
| 64 |
+ |
|
| 65 |
+ @checker.options['http_success_codes'] = false |
|
| 66 |
+ expect(@checker).to be_valid |
|
| 67 |
+ end |
|
| 68 |
+ |
|
| 43 | 69 |
it "should validate uniqueness_look_back" do |
| 44 | 70 |
@checker.options['uniqueness_look_back'] = "nonsense" |
| 45 | 71 |
expect(@checker).not_to be_valid |
@@ -169,6 +195,38 @@ describe Agents::WebsiteAgent do |
||
| 169 | 195 |
end |
| 170 | 196 |
end |
| 171 | 197 |
|
| 198 |
+ describe 'http_success_codes' do |
|
| 199 |
+ it 'should allow scraping from a 404 result' do |
|
| 200 |
+ json = {
|
|
| 201 |
+ 'response' => {
|
|
| 202 |
+ 'version' => 2, |
|
| 203 |
+ 'title' => "hello!" |
|
| 204 |
+ } |
|
| 205 |
+ } |
|
| 206 |
+ zipped = ActiveSupport::Gzip.compress(json.to_json) |
|
| 207 |
+ stub_request(:any, /gzip/).to_return(body: zipped, headers: { 'Content-Encoding' => 'gzip' }, status: 404)
|
|
| 208 |
+ site = {
|
|
| 209 |
+ 'name' => "Some JSON Response", |
|
| 210 |
+ 'expected_update_period_in_days' => "2", |
|
| 211 |
+ 'type' => "json", |
|
| 212 |
+ 'url' => "http://gzip.com", |
|
| 213 |
+ 'mode' => 'on_change', |
|
| 214 |
+ 'http_success_codes' => [404], |
|
| 215 |
+ 'extract' => {
|
|
| 216 |
+ 'version' => { 'path' => 'response.version' },
|
|
| 217 |
+ }, |
|
| 218 |
+ # no unzip option |
|
| 219 |
+ } |
|
| 220 |
+ checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site) |
|
| 221 |
+ checker.user = users(:bob) |
|
| 222 |
+ checker.save! |
|
| 223 |
+ |
|
| 224 |
+ checker.check |
|
| 225 |
+ event = Event.last |
|
| 226 |
+ expect(event.payload['version']).to eq(2) |
|
| 227 |
+ end |
|
| 228 |
+ end |
|
| 229 |
+ |
|
| 172 | 230 |
describe 'unzipping' do |
| 173 | 231 |
it 'should unzip automatically if the response has Content-Encoding: gzip' do |
| 174 | 232 |
json = {
|