Merge pull request #852 from cantino/rss_agent_can_take_multiple_urls

the RSSAgent can now take an array of feed URLs

Andrew Cantino 9 years ago
parent
commit
88a217ce0a
2 changed files with 39 additions and 25 deletions
  1. 27 25
      app/models/agents/rss_agent.rb
  2. 12 0
      spec/models/agents/rss_agent_spec.rb

+ 27 - 25
app/models/agents/rss_agent.rb

@@ -19,7 +19,7 @@ module Agents
19 19
 
20 20
         Options:
21 21
 
22
-          * `url` - The URL of the RSS feed.
22
+          * `url` - The URL of the RSS feed (an array of URLs can also be used; items with identical guids across feeds will be considered duplicates).
23 23
           * `clean` - Attempt to use [feed-normalizer](https://github.com/aasmith/feed-normalizer)'s' `clean!` method to cleanup HTML in the feed.  Set to `true` to use.
24 24
           * `expected_update_period_in_days` - How often you expect this RSS feed to change.  If more than this amount of time passes without an update, the Agent will mark itself as not working.
25 25
           * `headers` - When present, it should be a hash of headers to send with the request.
@@ -70,32 +70,34 @@ module Agents
70 70
     end
71 71
 
72 72
     def check
73
-      response = faraday.get(interpolated['url'])
74
-      if response.success?
75
-        feed = FeedNormalizer::FeedNormalizer.parse(response.body)
76
-        feed.clean! if interpolated['clean'] == 'true'
77
-        created_event_count = 0
78
-        feed.entries.each do |entry|
79
-          entry_id = get_entry_id(entry)
80
-          if check_and_track(entry_id)
81
-            created_event_count += 1
82
-            create_event(payload: {
83
-              id: entry_id,
84
-              date_published: entry.date_published,
85
-              last_updated: entry.last_updated,
86
-              url: entry.url,
87
-              urls: entry.urls,
88
-              description: entry.description,
89
-              content: entry.content,
90
-              title: entry.title,
91
-              authors: entry.authors,
92
-              categories: entry.categories
93
-            })
73
+      Array(interpolated['url']).each do |url|
74
+        response = faraday.get(url)
75
+        if response.success?
76
+          feed = FeedNormalizer::FeedNormalizer.parse(response.body)
77
+          feed.clean! if interpolated['clean'] == 'true'
78
+          created_event_count = 0
79
+          feed.entries.each do |entry|
80
+            entry_id = get_entry_id(entry)
81
+            if check_and_track(entry_id)
82
+              created_event_count += 1
83
+              create_event(payload: {
84
+                id: entry_id,
85
+                date_published: entry.date_published,
86
+                last_updated: entry.last_updated,
87
+                url: entry.url,
88
+                urls: entry.urls,
89
+                description: entry.description,
90
+                content: entry.content,
91
+                title: entry.title,
92
+                authors: entry.authors,
93
+                categories: entry.categories
94
+              })
95
+            end
94 96
           end
97
+          log "Fetched #{url} and created #{created_event_count} event(s)."
98
+        else
99
+          error "Failed to fetch #{url}: #{response.inspect}"
95 100
         end
96
-        log "Fetched #{interpolated['url']} and created #{created_event_count} event(s)."
97
-      else
98
-        error "Failed to fetch #{interpolated['url']}: #{response.inspect}"
99 101
       end
100 102
     end
101 103
 

+ 12 - 0
spec/models/agents/rss_agent_spec.rb

@@ -25,6 +25,9 @@ describe Agents::RssAgent do
25 25
       agent.options['url'] = "http://google.com"
26 26
       expect(agent).to be_valid
27 27
 
28
+      agent.options['url'] = ["http://google.com", "http://yahoo.com"]
29
+      expect(agent).to be_valid
30
+
28 31
       agent.options['url'] = ""
29 32
       expect(agent).not_to be_valid
30 33
 
@@ -82,6 +85,15 @@ describe Agents::RssAgent do
82 85
       agent.check
83 86
       expect(agent.memory['seen_ids'].length).to eq(500)
84 87
     end
88
+
89
+    it "should support an array of URLs" do
90
+      agent.options['url'] = ["https://github.com/cantino/huginn/commits/master.atom", "http://feeds.feedburner.com/SlickdealsnetFP?format=atom"]
91
+      agent.save!
92
+
93
+      expect {
94
+        agent.check
95
+      }.to change { agent.events.count }.by(20 + 79)
96
+    end
85 97
   end
86 98
 
87 99
   context "when no ids are available" do