@@ -19,7 +19,7 @@ module Agents |
||
| 19 | 19 |
|
| 20 | 20 |
Options: |
| 21 | 21 |
|
| 22 |
- * `url` - The URL of the RSS feed. |
|
| 22 |
+ * `url` - The URL of the RSS feed (an array of URLs can also be used; items with identical guids across feeds will be considered duplicates). |
|
| 23 | 23 |
* `clean` - Attempt to use [feed-normalizer](https://github.com/aasmith/feed-normalizer)'s' `clean!` method to cleanup HTML in the feed. Set to `true` to use. |
| 24 | 24 |
* `expected_update_period_in_days` - How often you expect this RSS feed to change. If more than this amount of time passes without an update, the Agent will mark itself as not working. |
| 25 | 25 |
* `headers` - When present, it should be a hash of headers to send with the request. |
@@ -70,32 +70,34 @@ module Agents |
||
| 70 | 70 |
end |
| 71 | 71 |
|
| 72 | 72 |
def check |
| 73 |
- response = faraday.get(interpolated['url']) |
|
| 74 |
- if response.success? |
|
| 75 |
- feed = FeedNormalizer::FeedNormalizer.parse(response.body) |
|
| 76 |
- feed.clean! if interpolated['clean'] == 'true' |
|
| 77 |
- created_event_count = 0 |
|
| 78 |
- feed.entries.each do |entry| |
|
| 79 |
- entry_id = get_entry_id(entry) |
|
| 80 |
- if check_and_track(entry_id) |
|
| 81 |
- created_event_count += 1 |
|
| 82 |
- create_event(payload: {
|
|
| 83 |
- id: entry_id, |
|
| 84 |
- date_published: entry.date_published, |
|
| 85 |
- last_updated: entry.last_updated, |
|
| 86 |
- url: entry.url, |
|
| 87 |
- urls: entry.urls, |
|
| 88 |
- description: entry.description, |
|
| 89 |
- content: entry.content, |
|
| 90 |
- title: entry.title, |
|
| 91 |
- authors: entry.authors, |
|
| 92 |
- categories: entry.categories |
|
| 93 |
- }) |
|
| 73 |
+ Array(interpolated['url']).each do |url| |
|
| 74 |
+ response = faraday.get(url) |
|
| 75 |
+ if response.success? |
|
| 76 |
+ feed = FeedNormalizer::FeedNormalizer.parse(response.body) |
|
| 77 |
+ feed.clean! if interpolated['clean'] == 'true' |
|
| 78 |
+ created_event_count = 0 |
|
| 79 |
+ feed.entries.each do |entry| |
|
| 80 |
+ entry_id = get_entry_id(entry) |
|
| 81 |
+ if check_and_track(entry_id) |
|
| 82 |
+ created_event_count += 1 |
|
| 83 |
+ create_event(payload: {
|
|
| 84 |
+ id: entry_id, |
|
| 85 |
+ date_published: entry.date_published, |
|
| 86 |
+ last_updated: entry.last_updated, |
|
| 87 |
+ url: entry.url, |
|
| 88 |
+ urls: entry.urls, |
|
| 89 |
+ description: entry.description, |
|
| 90 |
+ content: entry.content, |
|
| 91 |
+ title: entry.title, |
|
| 92 |
+ authors: entry.authors, |
|
| 93 |
+ categories: entry.categories |
|
| 94 |
+ }) |
|
| 95 |
+ end |
|
| 94 | 96 |
end |
| 97 |
+ log "Fetched #{interpolated['url']} and created #{created_event_count} event(s)."
|
|
| 98 |
+ else |
|
| 99 |
+ error "Failed to fetch #{interpolated['url']}: #{response.inspect}"
|
|
| 95 | 100 |
end |
| 96 |
- log "Fetched #{interpolated['url']} and created #{created_event_count} event(s)."
|
|
| 97 |
- else |
|
| 98 |
- error "Failed to fetch #{interpolated['url']}: #{response.inspect}"
|
|
| 99 | 101 |
end |
| 100 | 102 |
end |
| 101 | 103 |
|
@@ -25,6 +25,9 @@ describe Agents::RssAgent do |
||
| 25 | 25 |
agent.options['url'] = "http://google.com" |
| 26 | 26 |
expect(agent).to be_valid |
| 27 | 27 |
|
| 28 |
+ agent.options['url'] = ["http://google.com", "http://yahoo.com"] |
|
| 29 |
+ expect(agent).to be_valid |
|
| 30 |
+ |
|
| 28 | 31 |
agent.options['url'] = "" |
| 29 | 32 |
expect(agent).not_to be_valid |
| 30 | 33 |
|
@@ -82,6 +85,15 @@ describe Agents::RssAgent do |
||
| 82 | 85 |
agent.check |
| 83 | 86 |
expect(agent.memory['seen_ids'].length).to eq(500) |
| 84 | 87 |
end |
| 88 |
+ |
|
| 89 |
+ it "should support an array of URLs" do |
|
| 90 |
+ agent.options['url'] = ["https://github.com/cantino/huginn/commits/master.atom", "http://feeds.feedburner.com/SlickdealsnetFP?format=atom"] |
|
| 91 |
+ agent.save! |
|
| 92 |
+ |
|
| 93 |
+ expect {
|
|
| 94 |
+ agent.check |
|
| 95 |
+ }.to change { agent.events.count }.by(20 + 79)
|
|
| 96 |
+ end |
|
| 85 | 97 |
end |
| 86 | 98 |
|
| 87 | 99 |
context "when no ids are available" do |