@@ -1,5 +1,6 @@ |
||
| 1 | 1 |
# Changes |
| 2 | 2 |
|
| 3 |
+* Jun 19, 2015 - Add `url_from_event` to WebsiteAgent. |
|
| 3 | 4 |
* Jun 17, 2015 - RssAgent emits events for new feed items in chronological order. |
| 4 | 5 |
* Jun 15, 2015 - Liquid filter `uri_expand` added. |
| 5 | 6 |
* Jun 12, 2015 - RSSAgent can now accept an array of URLs. |
@@ -19,7 +19,7 @@ module Agents |
||
| 19 | 19 |
|
| 20 | 20 |
`url` can be a single url, or an array of urls (for example, for multiple pages with the exact same structure but different content to scrape) |
| 21 | 21 |
|
| 22 |
- The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload. If you specify `merge` as the `mode`, it will retain the old payload and update it with the new values. |
|
| 22 |
+ The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload, or if you set `url_from_event` it is used as a Liquid template to generate the url to access. If you specify `merge` as the `mode`, it will retain the old payload and update it with the new values. |
|
| 23 | 23 |
|
| 24 | 24 |
# Supported Document Types |
| 25 | 25 |
|
@@ -135,7 +135,8 @@ module Agents |
||
| 135 | 135 |
|
| 136 | 136 |
def validate_options |
| 137 | 137 |
# Check for required fields |
| 138 |
- errors.add(:base, "url and expected_update_period_in_days are required") unless options['expected_update_period_in_days'].present? && options['url'].present? |
|
| 138 |
+ errors.add(:base, "either url or url_from_event is required") unless options['url'].present? || options['url_from_event'].present? |
|
| 139 |
+ errors.add(:base, "expected_update_period_in_days is required") unless options['expected_update_period_in_days'].present? |
|
| 139 | 140 |
if !options['extract'].present? && extraction_type != "json" |
| 140 | 141 |
errors.add(:base, "extract is required for all types except json") |
| 141 | 142 |
end |
@@ -257,7 +258,12 @@ module Agents |
||
| 257 | 258 |
def receive(incoming_events) |
| 258 | 259 |
incoming_events.each do |event| |
| 259 | 260 |
interpolate_with(event) do |
| 260 |
- url_to_scrape = event.payload['url'] |
|
| 261 |
+ url_to_scrape = |
|
| 262 |
+ if url_template = options['url_from_event'].presence |
|
| 263 |
+ interpolate_string(url_template) |
|
| 264 |
+ else |
|
| 265 |
+ event.payload['url'] |
|
| 266 |
+ end |
|
| 261 | 267 |
check_url(url_to_scrape, |
| 262 | 268 |
interpolated['mode'].to_s == "merge" ? event.payload : {})
|
| 263 | 269 |
end |
@@ -633,6 +633,17 @@ fire: hot |
||
| 633 | 633 |
}.to change { Event.count }.by(1)
|
| 634 | 634 |
end |
| 635 | 635 |
|
| 636 |
+ it "should use url_from_event as url to scrape if it exists when receiving an event" do |
|
| 637 |
+ stub = stub_request(:any, 'http://example.org/?url=http%3A%2F%2Fxkcd.com') |
|
| 638 |
+ |
|
| 639 |
+ @checker.options = @valid_options.merge( |
|
| 640 |
+ 'url_from_event' => 'http://example.org/?url={{url | uri_escape}}'
|
|
| 641 |
+ ) |
|
| 642 |
+ @checker.receive([@event]) |
|
| 643 |
+ |
|
| 644 |
+ expect(stub).to have_been_requested |
|
| 645 |
+ end |
|
| 646 |
+ |
|
| 636 | 647 |
it "should interpolate values from incoming event payload" do |
| 637 | 648 |
expect {
|
| 638 | 649 |
@valid_options['extract'] = {
|