Merge pull request #1124 from cantino/data_output_agent_push

DataOutputAgent: Optionally push to PubSubHubbub hubs

Akinori MUSHA 9 years ago
parent
commit
a871ea2f2b
2 changed files with 133 additions and 28 deletions
  1. 100 28
      app/models/agents/data_output_agent.rb
  2. 33 0
      spec/models/agents/data_output_agent_spec.rb

+ 100 - 28
app/models/agents/data_output_agent.rb

@@ -1,5 +1,7 @@
1 1
 module Agents
2 2
   class DataOutputAgent < Agent
3
+    include WebRequestConcern
4
+
3 5
     cannot_be_scheduled!
4 6
 
5 7
     description  do
@@ -19,9 +21,10 @@ module Agents
19 21
 
20 22
           * `secrets` - An array of tokens that the requestor must provide for light-weight authentication.
21 23
           * `expected_receive_period_in_days` - How often you expect data to be received by this Agent from other Agents.
22
-          * `template` - A JSON object representing a mapping between item output keys and incoming event values.  Use [Liquid](https://github.com/cantino/huginn/wiki/Formatting-Events-using-Liquid) to format the values.  Values of the `link`, `title`, `description` and `icon` keys will be put into the \\<channel\\> section of RSS output.  The `item` key will be repeated for every Event.  The `pubDate` key for each item will have the creation time of the Event unless given.
24
+          * `template` - A JSON object representing a mapping between item output keys and incoming event values.  Use [Liquid](https://github.com/cantino/huginn/wiki/Formatting-Events-using-Liquid) to format the values.  Values of the `link`, `title`, `description` and `icon` keys will be put into the \\<channel\\> section of RSS output.  Value of the `self` key will be used as URL for this feed itself, which is useful when you serve it via reverse proxy.  The `item` key will be repeated for every Event.  The `pubDate` key for each item will have the creation time of the Event unless given.
23 25
           * `events_to_show` - The number of events to output in RSS or JSON. (default: `40`)
24 26
           * `ttl` - A value for the \\<ttl\\> element in RSS output. (default: `60`)
27
+          * `push_hubs` - Set to a list of PubSubHubbub endpoints you want to publish an update to every time this agent receives an event. (default: none)  Popular hubs include [Superfeedr](https://pubsubhubbub.superfeedr.com/) and [Google](https://pubsubhubbub.appspot.com/).  Note that publishing updates will make your feed URL known to the public, so if you want to keep it secret, set up a reverse proxy to serve your feed via a safe URL and specify it in `template.self`.
25 28
 
26 29
         If you'd like to output RSS tags with attributes, such as `enclosure`, use something like the following in your `template`:
27 30
 
@@ -95,6 +98,29 @@ module Agents
95 98
       unless options['template'].present? && options['template']['item'].present? && options['template']['item'].is_a?(Hash)
96 99
         errors.add(:base, "Please provide template and template.item")
97 100
       end
101
+
102
+      case options['push_hubs']
103
+      when nil
104
+      when Array
105
+        options['push_hubs'].each do |hub|
106
+          case hub
107
+          when /\{/
108
+            # Liquid templating
109
+          when String
110
+            begin
111
+              URI.parse(hub)
112
+            rescue URI::Error
113
+              errors.add(:base, "invalid URL found in push_hubs")
114
+              break
115
+            end
116
+          else
117
+            errors.add(:base, "push_hubs must be an array of endpoint URLs")
118
+            break
119
+          end
120
+        end
121
+      else
122
+        errors.add(:base, "push_hubs must be an array")
123
+      end
98 124
     end
99 125
 
100 126
     def events_to_show
@@ -114,11 +140,12 @@ module Agents
114 140
     end
115 141
 
116 142
     def feed_url(options = {})
117
-      feed_link + Rails.application.routes.url_helpers.
118
-                  web_requests_path(agent_id: id || ':id',
119
-                                    user_id: user_id,
120
-                                    secret: options[:secret],
121
-                                    format: options[:format])
143
+      interpolated['template']['self'].presence ||
144
+        feed_link + Rails.application.routes.url_helpers.
145
+                    web_requests_path(agent_id: id || ':id',
146
+                                      user_id: user_id,
147
+                                      secret: options[:secret],
148
+                                      format: options[:format])
122 149
     end
123 150
 
124 151
     def feed_icon
@@ -129,6 +156,10 @@ module Agents
129 156
       interpolated['template']['description'].presence || "A feed of Events received by the '#{name}' Huginn Agent"
130 157
     end
131 158
 
159
+    def push_hubs
160
+      interpolated['push_hubs'].presence || []
161
+    end
162
+
132 163
     def receive_web_request(params, method, format)
133 164
       unless interpolated['secrets'].include?(params['secret'])
134 165
         if format =~ /json/
@@ -159,40 +190,54 @@ module Agents
159 190
           interpolated
160 191
         end
161 192
 
193
+        now = Time.now
194
+
162 195
         if format =~ /json/
163 196
           content = {
164 197
             'title' => feed_title,
165 198
             'description' => feed_description,
166
-            'pubDate' => Time.now,
199
+            'pubDate' => now,
167 200
             'items' => simplify_item_for_json(items)
168 201
           }
169 202
 
170 203
           return [content, 200]
171 204
         else
172
-          content = Utils.unindent(<<-XML)
173
-            <?xml version="1.0" encoding="UTF-8" ?>
174
-            <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
175
-            <channel>
176
-             <atom:link href=#{feed_url(secret: params['secret'], format: :xml).encode(xml: :attr)} rel="self" type="application/rss+xml" />
177
-             <atom:icon>#{feed_icon.encode(xml: :text)}</atom:icon>
178
-             <title>#{feed_title.encode(xml: :text)}</title>
179
-             <description>#{feed_description.encode(xml: :text)}</description>
180
-             <link>#{feed_link.encode(xml: :text)}</link>
181
-             <lastBuildDate>#{Time.now.rfc2822.to_s.encode(xml: :text)}</lastBuildDate>
182
-             <pubDate>#{Time.now.rfc2822.to_s.encode(xml: :text)}</pubDate>
183
-             <ttl>#{feed_ttl}</ttl>
184
-
205
+          hub_links = push_hubs.map { |hub|
206
+            <<-XML
207
+ <atom:link rel="hub" href=#{hub.encode(xml: :attr)}/>
208
+            XML
209
+          }.join
210
+
211
+          items = simplify_item_for_xml(items)
212
+                  .to_xml(skip_types: true, root: "items", skip_instruct: true, indent: 1)
213
+                  .gsub(%r{^</?items>\n}, '')
214
+
215
+          return [<<-XML, 200, 'text/xml']
216
+<?xml version="1.0" encoding="UTF-8" ?>
217
+<rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom">
218
+<channel>
219
+ <atom:link href=#{feed_url(secret: params['secret'], format: :xml).encode(xml: :attr)} rel="self" type="application/rss+xml" />
220
+ <atom:icon>#{feed_icon.encode(xml: :text)}</atom:icon>
221
+#{hub_links}
222
+ <title>#{feed_title.encode(xml: :text)}</title>
223
+ <description>#{feed_description.encode(xml: :text)}</description>
224
+ <link>#{feed_link.encode(xml: :text)}</link>
225
+ <lastBuildDate>#{now.rfc2822.to_s.encode(xml: :text)}</lastBuildDate>
226
+ <pubDate>#{now.rfc2822.to_s.encode(xml: :text)}</pubDate>
227
+ <ttl>#{feed_ttl}</ttl>
228
+#{items}
229
+</channel>
230
+</rss>
185 231
           XML
232
+        end
233
+      end
234
+    end
186 235
 
187
-          content += simplify_item_for_xml(items).to_xml(skip_types: true, root: "items", skip_instruct: true, indent: 1).gsub(/^<\/?items>/, '').strip
188
-
189
-          content += Utils.unindent(<<-XML)
190
-            </channel>
191
-            </rss>
192
-          XML
236
+    def receive(incoming_events)
237
+      url = feed_url(secret: interpolated['secrets'].first, format: :xml)
193 238
 
194
-          return [content, 200, 'text/xml']
195
-        end
239
+      push_hubs.each do |hub|
240
+        push_to_hub(hub, url)
196 241
       end
197 242
     end
198 243
 
@@ -261,5 +306,32 @@ module Agents
261 306
         item
262 307
       end
263 308
     end
309
+
310
+    def push_to_hub(hub, url)
311
+      hub_uri =
312
+        begin
313
+          URI.parse(hub)
314
+        rescue URI::Error
315
+          nil
316
+        end
317
+
318
+      if !hub_uri.is_a?(URI::HTTP)
319
+        error "Invalid push endpoint: #{hub}"
320
+        return
321
+      end
322
+
323
+      log "Pushing #{url} to #{hub_uri}"
324
+
325
+      return if dry_run?
326
+
327
+      begin
328
+        faraday.post hub_uri, {
329
+          'hub.mode' => 'publish',
330
+          'hub.url' => url
331
+        }
332
+     rescue => e
333
+       error "Push failed: #{e.message}"
334
+      end
335
+    end
264 336
   end
265 337
 end

+ 33 - 0
spec/models/agents/data_output_agent_spec.rb

@@ -73,6 +73,29 @@ describe Agents::DataOutputAgent do
73 73
     end
74 74
   end
75 75
 
76
+  describe "#receive" do
77
+    it "should push to hubs when push_hubs is given" do
78
+      agent.options[:push_hubs] = %w[http://push.example.com]
79
+      agent.options[:template] = { 'link' => 'http://huginn.example.org' }
80
+
81
+      alist = nil
82
+
83
+      stub_request(:post, 'http://push.example.com/')
84
+        .with(headers: { 'Content-Type' => %r{\Aapplication/x-www-form-urlencoded\s*(?:;|\z)} })
85
+        .to_return { |request|
86
+        alist = URI.decode_www_form(request.body).sort
87
+        { status: 200, body: 'ok' }
88
+      }
89
+
90
+      agent.receive(events(:bob_website_agent_event))
91
+
92
+      expect(alist).to eq [
93
+        ["hub.mode", "publish"],
94
+        ["hub.url", agent.feed_url(secret: agent.options[:secrets].first, format: :xml)]
95
+      ]
96
+    end
97
+  end
98
+
76 99
   describe "#receive_web_request" do
77 100
     before do
78 101
       current_time = Time.now
@@ -170,6 +193,16 @@ describe Agents::DataOutputAgent do
170 193
         XML
171 194
       end
172 195
 
196
+      it "can output RSS with hub links when push_hubs is specified" do
197
+        stub(agent).feed_link { "https://yoursite.com" }
198
+        agent.options[:push_hubs] = %w[https://pubsubhubbub.superfeedr.com/ https://pubsubhubbub.appspot.com/]
199
+        content, status, content_type = agent.receive_web_request({ 'secret' => 'secret1' }, 'get', 'text/xml')
200
+        expect(status).to eq(200)
201
+        expect(content_type).to eq('text/xml')
202
+        xml = Nokogiri::XML(content)
203
+        expect(xml.xpath('/rss/channel/atom:link[@rel="hub"]/@href').map(&:text).sort).to eq agent.options[:push_hubs].sort
204
+      end
205
+
173 206
       it "can output JSON" do
174 207
         agent.options['template']['item']['foo'] = "hi"
175 208