data_output_agent.rb 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338
  1. module Agents
  2. class DataOutputAgent < Agent
  3. include WebRequestConcern
  4. cannot_be_scheduled!
  5. description do
  6. <<-MD
  7. The Data Output Agent outputs received events as either RSS or JSON. Use it to output a public or private stream of Huginn data.
  8. This Agent will output data at:
  9. `https://#{ENV['DOMAIN']}#{Rails.application.routes.url_helpers.web_requests_path(agent_id: ':id', user_id: user_id, secret: ':secret', format: :xml)}`
  10. where `:secret` is one of the allowed secrets specified in your options and the extension can be `xml` or `json`.
  11. You can setup multiple secrets so that you can individually authorize external systems to
  12. access your Huginn data.
  13. Options:
  14. * `secrets` - An array of tokens that the requestor must provide for light-weight authentication.
  15. * `expected_receive_period_in_days` - How often you expect data to be received by this Agent from other Agents.
  16. * `template` - A JSON object representing a mapping between item output keys and incoming event values. Use [Liquid](https://github.com/cantino/huginn/wiki/Formatting-Events-using-Liquid) to format the values. Values of the `link`, `title`, `description` and `icon` keys will be put into the \\<channel\\> section of RSS output. Value of the `self` key will be used as URL for this feed itself, which is useful when you serve it via reverse proxy. The `item` key will be repeated for every Event. The `pubDate` key for each item will have the creation time of the Event unless given.
  17. * `events_to_show` - The number of events to output in RSS or JSON. (default: `40`)
  18. * `ttl` - A value for the \\<ttl\\> element in RSS output. (default: `60`)
  19. * `push_hubs` - Set to a list of PubSubHubbub endpoints you want to publish an update to every time this agent receives an event. (default: none) Popular hubs include [Superfeedr](https://pubsubhubbub.superfeedr.com/) and [Google](https://pubsubhubbub.appspot.com/). Note that publishing updates will make your feed URL known to the public, so if you want to keep it secret, set up a reverse proxy to serve your feed via a safe URL and specify it in `template.self`.
  20. If you'd like to output RSS tags with attributes, such as `enclosure`, use something like the following in your `template`:
  21. "enclosure": {
  22. "_attributes": {
  23. "url": "{{media_url}}",
  24. "length": "1234456789",
  25. "type": "audio/mpeg"
  26. }
  27. },
  28. "another_tag": {
  29. "_attributes": {
  30. "key": "value",
  31. "another_key": "another_value"
  32. },
  33. "_contents": "tag contents (can be an object for nesting)"
  34. }
  35. # Ordering events in the output
  36. #{description_events_order('events in the output')}
  37. # Liquid Templating
  38. In Liquid templating, the following variable is available:
  39. * `events`: An array of events being output, sorted in the given order, up to `events_to_show` in number. For example, if source events contain a site title in the `site_title` key, you can refer to it in `template.title` by putting `{{events.first.site_title}}`.
  40. MD
  41. end
  42. def default_options
  43. {
  44. "secrets" => ["a-secret-key"],
  45. "expected_receive_period_in_days" => 2,
  46. "template" => {
  47. "title" => "XKCD comics as a feed",
  48. "description" => "This is a feed of recent XKCD comics, generated by Huginn",
  49. "item" => {
  50. "title" => "{{title}}",
  51. "description" => "Secret hovertext: {{hovertext}}",
  52. "link" => "{{url}}"
  53. }
  54. }
  55. }
  56. end
  57. def working?
  58. last_receive_at && last_receive_at > options['expected_receive_period_in_days'].to_i.days.ago && !recent_error_logs?
  59. end
  60. def validate_options
  61. if options['secrets'].is_a?(Array) && options['secrets'].length > 0
  62. options['secrets'].each do |secret|
  63. case secret
  64. when %r{[/.]}
  65. errors.add(:base, "secret may not contain a slash or dot")
  66. when String
  67. else
  68. errors.add(:base, "secret must be a string")
  69. end
  70. end
  71. else
  72. errors.add(:base, "Please specify one or more secrets for 'authenticating' incoming feed requests")
  73. end
  74. unless options['expected_receive_period_in_days'].present? && options['expected_receive_period_in_days'].to_i > 0
  75. errors.add(:base, "Please provide 'expected_receive_period_in_days' to indicate how many days can pass before this Agent is considered to be not working")
  76. end
  77. unless options['template'].present? && options['template']['item'].present? && options['template']['item'].is_a?(Hash)
  78. errors.add(:base, "Please provide template and template.item")
  79. end
  80. case options['push_hubs']
  81. when nil
  82. when Array
  83. options['push_hubs'].each do |hub|
  84. case hub
  85. when /\{/
  86. # Liquid templating
  87. when String
  88. begin
  89. URI.parse(hub)
  90. rescue URI::Error
  91. errors.add(:base, "invalid URL found in push_hubs")
  92. break
  93. end
  94. else
  95. errors.add(:base, "push_hubs must be an array of endpoint URLs")
  96. break
  97. end
  98. end
  99. else
  100. errors.add(:base, "push_hubs must be an array")
  101. end
  102. end
  103. def events_to_show
  104. (interpolated['events_to_show'].presence || 40).to_i
  105. end
  106. def feed_ttl
  107. (interpolated['ttl'].presence || 60).to_i
  108. end
  109. def feed_title
  110. interpolated['template']['title'].presence || "#{name} Event Feed"
  111. end
  112. def feed_link
  113. interpolated['template']['link'].presence || "https://#{ENV['DOMAIN']}"
  114. end
  115. def feed_url(options = {})
  116. interpolated['template']['self'].presence ||
  117. feed_link + Rails.application.routes.url_helpers.
  118. web_requests_path(agent_id: id || ':id',
  119. user_id: user_id,
  120. secret: options[:secret],
  121. format: options[:format])
  122. end
  123. def feed_icon
  124. interpolated['template']['icon'].presence || feed_link + '/favicon.ico'
  125. end
  126. def feed_description
  127. interpolated['template']['description'].presence || "A feed of Events received by the '#{name}' Huginn Agent"
  128. end
  129. def push_hubs
  130. interpolated['push_hubs'].presence || []
  131. end
  132. def receive_web_request(params, method, format)
  133. unless interpolated['secrets'].include?(params['secret'])
  134. if format =~ /json/
  135. return [{ error: "Not Authorized" }, 401]
  136. else
  137. return ["Not Authorized", 401]
  138. end
  139. end
  140. source_events = sort_events(received_events.order(id: :desc).limit(events_to_show).to_a)
  141. interpolation_context.stack do
  142. interpolation_context['events'] = source_events
  143. items = source_events.map do |event|
  144. interpolated = interpolate_options(options['template']['item'], event)
  145. interpolated['guid'] = {'_attributes' => {'isPermaLink' => 'false'},
  146. '_contents' => interpolated['guid'].presence || event.id}
  147. date_string = interpolated['pubDate'].to_s
  148. date =
  149. begin
  150. Time.zone.parse(date_string) # may return nil
  151. rescue => e
  152. error "Error parsing a \"pubDate\" value \"#{date_string}\": #{e.message}"
  153. nil
  154. end || event.created_at
  155. interpolated['pubDate'] = date.rfc2822.to_s
  156. interpolated
  157. end
  158. now = Time.now
  159. if format =~ /json/
  160. content = {
  161. 'title' => feed_title,
  162. 'description' => feed_description,
  163. 'pubDate' => now,
  164. 'items' => simplify_item_for_json(items)
  165. }
  166. return [content, 200]
  167. else
  168. hub_links = push_hubs.map { |hub|
  169. <<-XML
  170. <atom:link rel="hub" href=#{hub.encode(xml: :attr)}/>
  171. XML
  172. }.join
  173. items = simplify_item_for_xml(items)
  174. .to_xml(skip_types: true, root: "items", skip_instruct: true, indent: 1)
  175. .gsub(%r{^</?items>\n}, '')
  176. return [<<-XML, 200, 'text/xml']
  177. <?xml version="1.0" encoding="UTF-8" ?>
  178. <rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">
  179. <channel>
  180. <atom:link href=#{feed_url(secret: params['secret'], format: :xml).encode(xml: :attr)} rel="self" type="application/rss+xml" />
  181. <atom:icon>#{feed_icon.encode(xml: :text)}</atom:icon>
  182. #{hub_links}
  183. <title>#{feed_title.encode(xml: :text)}</title>
  184. <description>#{feed_description.encode(xml: :text)}</description>
  185. <link>#{feed_link.encode(xml: :text)}</link>
  186. <lastBuildDate>#{now.rfc2822.to_s.encode(xml: :text)}</lastBuildDate>
  187. <pubDate>#{now.rfc2822.to_s.encode(xml: :text)}</pubDate>
  188. <ttl>#{feed_ttl}</ttl>
  189. #{items}
  190. </channel>
  191. </rss>
  192. XML
  193. end
  194. end
  195. end
  196. def receive(incoming_events)
  197. url = feed_url(secret: interpolated['secrets'].first, format: :xml)
  198. push_hubs.each do |hub|
  199. push_to_hub(hub, url)
  200. end
  201. end
  202. private
  203. class XMLNode
  204. def initialize(tag_name, attributes, contents)
  205. @tag_name, @attributes, @contents = tag_name, attributes, contents
  206. end
  207. def to_xml(options)
  208. if @contents.is_a?(Hash)
  209. options[:builder].tag! @tag_name, @attributes do
  210. @contents.each { |key, value| ActiveSupport::XmlMini.to_tag(key, value, options.merge(skip_instruct: true)) }
  211. end
  212. else
  213. options[:builder].tag! @tag_name, @attributes, @contents
  214. end
  215. end
  216. end
  217. def simplify_item_for_xml(item)
  218. if item.is_a?(Hash)
  219. item.each.with_object({}) do |(key, value), memo|
  220. if value.is_a?(Hash)
  221. if value.key?('_attributes') || value.key?('_contents')
  222. memo[key] = XMLNode.new(key, value['_attributes'], simplify_item_for_xml(value['_contents']))
  223. else
  224. memo[key] = simplify_item_for_xml(value)
  225. end
  226. else
  227. memo[key] = value
  228. end
  229. end
  230. elsif item.is_a?(Array)
  231. item.map { |value| simplify_item_for_xml(value) }
  232. else
  233. item
  234. end
  235. end
  236. def simplify_item_for_json(item)
  237. if item.is_a?(Hash)
  238. item.each.with_object({}) do |(key, value), memo|
  239. if value.is_a?(Hash)
  240. if value.key?('_attributes') || value.key?('_contents')
  241. contents = if value['_contents'] && value['_contents'].is_a?(Hash)
  242. simplify_item_for_json(value['_contents'])
  243. elsif value['_contents']
  244. { "contents" => value['_contents'] }
  245. else
  246. {}
  247. end
  248. memo[key] = contents.merge(value['_attributes'] || {})
  249. else
  250. memo[key] = simplify_item_for_json(value)
  251. end
  252. else
  253. memo[key] = value
  254. end
  255. end
  256. elsif item.is_a?(Array)
  257. item.map { |value| simplify_item_for_json(value) }
  258. else
  259. item
  260. end
  261. end
  262. def push_to_hub(hub, url)
  263. hub_uri =
  264. begin
  265. URI.parse(hub)
  266. rescue URI::Error
  267. nil
  268. end
  269. if !hub_uri.is_a?(URI::HTTP)
  270. error "Invalid push endpoint: #{hub}"
  271. return
  272. end
  273. log "Pushing #{url} to #{hub_uri}"
  274. return if dry_run?
  275. begin
  276. faraday.post hub_uri, {
  277. 'hub.mode' => 'publish',
  278. 'hub.url' => url
  279. }
  280. rescue => e
  281. error "Push failed: #{e.message}"
  282. end
  283. end
  284. end
  285. end