@@ -103,6 +103,8 @@ module Agents |
||
| 103 | 103 |
|
| 104 | 104 |
Set `unzip` to `gzip` to inflate the resource using gzip. |
| 105 | 105 |
|
| 106 |
+ Set `consider_http_error_success` to an array of ints, ex: `[404]` to consider also 404 as successes, and to scrape it. |
|
| 107 |
+ |
|
| 106 | 108 |
# Liquid Templating |
| 107 | 109 |
|
| 108 | 110 |
In Liquid templating, the following variable is available: |
@@ -149,6 +151,7 @@ module Agents |
||
| 149 | 151 |
errors.add(:base, "either url, url_from_event, or data_from_event are required") unless options['url'].present? || options['url_from_event'].present? || options['data_from_event'].present? |
| 150 | 152 |
errors.add(:base, "expected_update_period_in_days is required") unless options['expected_update_period_in_days'].present? |
| 151 | 153 |
validate_extract_options! |
| 154 |
+ validate_consider_http_success_option! |
|
| 152 | 155 |
|
| 153 | 156 |
# Check for optional fields |
| 154 | 157 |
if options['mode'].present? |
@@ -166,6 +169,27 @@ module Agents |
||
| 166 | 169 |
validate_web_request_options! |
| 167 | 170 |
end |
| 168 | 171 |
|
| 172 |
+ def validate_consider_http_success_option! |
|
| 173 |
+ consider_success = options["consider_http_error_success"] |
|
| 174 |
+ if consider_success != nil |
|
| 175 |
+ |
|
| 176 |
+ if consider_success.class != Array |
|
| 177 |
+ errors.add(:base,"Must be an array and specify at least one status code") |
|
| 178 |
+ else |
|
| 179 |
+ if consider_success.uniq.count != consider_success.count |
|
| 180 |
+ errors.add(:base,"Duplicate http code found") |
|
| 181 |
+ else |
|
| 182 |
+ if consider_success.map(&:class).uniq != [Fixnum] |
|
| 183 |
+ errors.add(:base,"Please make sure to use only integer values for code") |
|
| 184 |
+ else |
|
| 185 |
+ @error_codes_considered_success = consider_success |
|
| 186 |
+ end |
|
| 187 |
+ end |
|
| 188 |
+ end |
|
| 189 |
+ |
|
| 190 |
+ end |
|
| 191 |
+ end |
|
| 192 |
+ |
|
| 169 | 193 |
def validate_extract_options! |
| 170 | 194 |
extraction_type = (extraction_type() rescue extraction_type(options)) |
| 171 | 195 |
case extract = options['extract'] |
@@ -273,7 +297,7 @@ module Agents |
||
| 273 | 297 |
uri = Utils.normalize_uri(url) |
| 274 | 298 |
log "Fetching #{uri}"
|
| 275 | 299 |
response = faraday.get(uri) |
| 276 |
- raise "Failed: #{response.inspect}" unless response.success?
|
|
| 300 |
+ raise "Failed: #{response.inspect}" unless consider_response_successful?(response)
|
|
| 277 | 301 |
|
| 278 | 302 |
interpolation_context.stack {
|
| 279 | 303 |
interpolation_context['_response_'] = ResponseDrop.new(response) |
@@ -353,6 +377,11 @@ module Agents |
||
| 353 | 377 |
end |
| 354 | 378 |
|
| 355 | 379 |
private |
| 380 |
+ def consider_response_successful?(response) |
|
| 381 |
+ response.success? || begin |
|
| 382 |
+ @error_codes_considered_success.present? && @error_codes_considered_success.include?(response.status) |
|
| 383 |
+ end |
|
| 384 |
+ end |
|
| 356 | 385 |
|
| 357 | 386 |
def handle_event_data(data, event, existing_payload) |
| 358 | 387 |
handle_data(data, event.payload['url'], existing_payload) |
@@ -40,6 +40,23 @@ describe Agents::WebsiteAgent do |
||
| 40 | 40 |
expect(@checker).not_to be_valid |
| 41 | 41 |
end |
| 42 | 42 |
|
| 43 |
+ it 'should validate the consider_http_error_success fields' do |
|
| 44 |
+ @checker.options['consider_http_error_success'] = [404] |
|
| 45 |
+ expect(@checker).to be_valid |
|
| 46 |
+ |
|
| 47 |
+ @checker.options['consider_http_error_success'] = [404, 404] |
|
| 48 |
+ expect(@checker).not_to be_valid |
|
| 49 |
+ |
|
| 50 |
+ @checker.options['consider_http_error_success'] = [404.0] |
|
| 51 |
+ expect(@checker).not_to be_valid |
|
| 52 |
+ |
|
| 53 |
+ @checker.options['consider_http_error_success'] = ["not_a_code"] |
|
| 54 |
+ expect(@checker).not_to be_valid |
|
| 55 |
+ |
|
| 56 |
+ @checker.options['consider_http_error_success'] = [] |
|
| 57 |
+ expect(@checker).not_to be_valid |
|
| 58 |
+ end |
|
| 59 |
+ |
|
| 43 | 60 |
it "should validate uniqueness_look_back" do |
| 44 | 61 |
@checker.options['uniqueness_look_back'] = "nonsense" |
| 45 | 62 |
expect(@checker).not_to be_valid |
@@ -169,6 +186,38 @@ describe Agents::WebsiteAgent do |
||
| 169 | 186 |
end |
| 170 | 187 |
end |
| 171 | 188 |
|
| 189 |
+ describe 'consider_http_error_success' do |
|
| 190 |
+ it 'should allow scraping from a 404 result' do |
|
| 191 |
+ json = {
|
|
| 192 |
+ 'response' => {
|
|
| 193 |
+ 'version' => 2, |
|
| 194 |
+ 'title' => "hello!" |
|
| 195 |
+ } |
|
| 196 |
+ } |
|
| 197 |
+ zipped = ActiveSupport::Gzip.compress(json.to_json) |
|
| 198 |
+ stub_request(:any, /gzip/).to_return(body: zipped, headers: { 'Content-Encoding' => 'gzip' }, status: 404)
|
|
| 199 |
+ site = {
|
|
| 200 |
+ 'name' => "Some JSON Response", |
|
| 201 |
+ 'expected_update_period_in_days' => "2", |
|
| 202 |
+ 'type' => "json", |
|
| 203 |
+ 'url' => "http://gzip.com", |
|
| 204 |
+ 'mode' => 'on_change', |
|
| 205 |
+ 'consider_http_error_success': [404], |
|
| 206 |
+ 'extract' => {
|
|
| 207 |
+ 'version' => { 'path' => 'response.version' },
|
|
| 208 |
+ }, |
|
| 209 |
+ # no unzip option |
|
| 210 |
+ } |
|
| 211 |
+ checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site) |
|
| 212 |
+ checker.user = users(:bob) |
|
| 213 |
+ checker.save! |
|
| 214 |
+ |
|
| 215 |
+ checker.check |
|
| 216 |
+ event = Event.last |
|
| 217 |
+ expect(event.payload['version']).to eq(2) |
|
| 218 |
+ end |
|
| 219 |
+ end |
|
| 220 |
+ |
|
| 172 | 221 |
describe 'unzipping' do |
| 173 | 222 |
it 'should unzip automatically if the response has Content-Encoding: gzip' do |
| 174 | 223 |
json = {
|