Nessuna descrizione http://j1x-huginn.herokuapp.com

imap_folder_agent.rb 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430
  1. require 'delegate'
  2. require 'net/imap'
  3. require 'mail'
  4. module Agents
  5. class ImapFolderAgent < Agent
  6. cannot_receive_events!
  7. default_schedule "every_30m"
  8. description <<-MD
  9. The ImapFolderAgent checks an IMAP server in specified folders
  10. and creates Events based on new unread mails.
  11. Specify an IMAP server to connect with `host`, and set `ssl` to
  12. true if the server supports IMAP over SSL. Specify `port` if
  13. you need to connect to a port other than standard (143 or 993
  14. depending on the `ssl` value).
  15. Specify login credentials in `username` and `password`.
  16. List the names of folders to check in `folders`.
  17. To narrow mails by conditions, build a `conditions` hash with
  18. the following keys:
  19. - "subject"
  20. - "body"
  21. Specify a string of the regular expression that is matched
  22. against the decoded subject/body of each mail.
  23. Use the `(?i)` directive for case insensitive search.
  24. When a mail has multiple non-attachment text parts, they are
  25. prioritized according to the `mime_types` option (which see
  26. below) and the first part that matches a "body" pattern, if
  27. specified, will be chosen as the "body" value in a created
  28. event.
  29. Named captues will appear in the "matches" hash in a created
  30. event.
  31. - "from", "to", "cc"
  32. Specify a shell glob pattern string that is matched against
  33. mail addresses extracted from the corresponding header
  34. values of each mail.
  35. Patterns match addresses in case insensitive manner.
  36. Multiple pattern strings can be specified in an array, in
  37. which case a mail is selected if any of the patterns
  38. matches. (i.e. patterns are OR'd)
  39. - "mime_types"
  40. Specify an array of MIME types to tell which non-attachment
  41. part of a mail among its text/* parts should be used as mail
  42. body. The default value is `['text/plain', 'text/enriched',
  43. 'text/html']`.
  44. - "has_attachment"
  45. Setting this to true or false means only mails that does or does
  46. not have an attachment are selected.
  47. If this key is unspecified or set to null, it is ignored.
  48. Set `mark_as_read` to true to mark found mails as read.
  49. MD
  50. event_description <<-MD
  51. Events look like this:
  52. {
  53. "subject": "...",
  54. "from": "Nanashi <nanashi.gombeh@example.jp>",
  55. "to": ["Jane <jane.doe@example.com>"],
  56. "cc": [],
  57. "date": "2014-05-10T03:47:20+0900",
  58. "mime_type": "text/plain",
  59. "body": "Hello,\n\n...",
  60. "matches": {
  61. }
  62. }
  63. MD
  64. IDCACHE_SIZE = 100
  65. def working?
  66. event_created_within?(options['expected_update_period_in_days']) && !recent_error_logs?
  67. end
  68. def default_options
  69. {
  70. 'expected_update_period_in_days' => "1",
  71. 'host' => 'imap.gmail.com',
  72. 'ssl' => true,
  73. 'username' => 'your.account',
  74. 'password' => 'your.password',
  75. 'folders' => %w[INBOX],
  76. 'conditions' => {}
  77. }
  78. end
  79. def validate_options
  80. %w[host username password].each { |key|
  81. String === options[key] or
  82. errors.add(:base, '%s is required and must be a string' % key)
  83. }
  84. if options['port'].present?
  85. errors.add(:base, "port must be a positive integer") unless is_positive_integer?(options['port'])
  86. end
  87. %w[ssl mark_as_read].each { |key|
  88. if options[key].present?
  89. case options[key]
  90. when true, false
  91. else
  92. errors.add(:base, '%s must be a boolean value' % key)
  93. end
  94. end
  95. }
  96. case mime_types = options['mime_types']
  97. when nil
  98. when Array
  99. mime_types.all? { |mime_type|
  100. String === mime_type && mime_type.start_with?('text/')
  101. } or errors.add(:base, 'mime_types may only contain strings that match "text/*".')
  102. if mime_types.empty?
  103. errors.add(:base, 'mime_types should not be empty')
  104. end
  105. else
  106. errors.add(:base, 'mime_types must be an array')
  107. end
  108. case folders = options['folders']
  109. when nil
  110. when Array
  111. folders.all? { |folder|
  112. String === folder
  113. } or errors.add(:base, 'folders may only contain strings')
  114. if folders.empty?
  115. errors.add(:base, 'folders should not be empty')
  116. end
  117. else
  118. errors.add(:base, 'folders must be an array')
  119. end
  120. case conditions = options['conditions']
  121. when nil
  122. when Hash
  123. conditions.each { |key, value|
  124. value.present? or next
  125. case key
  126. when 'subject', 'body'
  127. case value
  128. when String
  129. begin
  130. Regexp.new(value)
  131. rescue
  132. errors.add(:base, 'conditions.%s contains an invalid regexp' % key)
  133. end
  134. else
  135. errors.add(:base, 'conditions.%s contains a non-string object' % key)
  136. end
  137. when 'from', 'to', 'cc'
  138. Array(value).each { |pattern|
  139. case pattern
  140. when String
  141. begin
  142. glob_match?(pattern, '')
  143. rescue
  144. errors.add(:base, 'conditions.%s contains an invalid glob pattern' % key)
  145. end
  146. else
  147. errors.add(:base, 'conditions.%s contains a non-string object' % key)
  148. end
  149. }
  150. when 'has_attachment'
  151. case value
  152. when true, false
  153. else
  154. errors.add(:base, 'conditions.%s must be a boolean value or null' % key)
  155. end
  156. end
  157. }
  158. else
  159. errors.add(:base, 'conditions must be a hash')
  160. end
  161. if options['expected_update_period_in_days'].present?
  162. errors.add(:base, "Invalid expected_update_period_in_days format") unless is_positive_integer?(options['expected_update_period_in_days'])
  163. end
  164. end
  165. def check
  166. # 'seen' keeps a hash of { uidvalidity => uids, ... } which
  167. # lists unread mails in watched folders.
  168. seen = memory['seen'] || {}
  169. new_seen = Hash.new { |hash, key|
  170. hash[key] = []
  171. }
  172. # 'notified' keeps an array of message-ids of {IDCACHE_SIZE}
  173. # most recent notified mails.
  174. notified = memory['notified'] || []
  175. each_unread_mail { |mail|
  176. new_seen[mail.uidvalidity] << mail.uid
  177. next if (uids = seen[mail.uidvalidity]) && uids.include?(mail.uid)
  178. next if notified.include?(mail.message_id)
  179. body_parts = mail.body_parts(mime_types)
  180. matched_part = nil
  181. matches = {}
  182. options['conditions'].all? { |key, value|
  183. case key
  184. when 'subject'
  185. value.present? or next true
  186. re = Regexp.new(value)
  187. if m = re.match(mail.subject)
  188. m.names.each { |name|
  189. matches[name] = m[name]
  190. }
  191. true
  192. else
  193. false
  194. end
  195. when 'body'
  196. value.present? or next true
  197. re = Regexp.new(value)
  198. matched_part = body_parts.find { |part|
  199. if m = re.match(part.decoded)
  200. m.names.each { |name|
  201. matches[name] = m[name]
  202. }
  203. true
  204. else
  205. false
  206. end
  207. }
  208. when 'from', 'to', 'cc'
  209. value.present? or next true
  210. mail.header[key].addresses.any? { |address|
  211. Array(value).any? { |pattern|
  212. glob_match?(pattern, address)
  213. }
  214. }
  215. when 'has_attachment'
  216. value == mail.has_attachment?
  217. else
  218. log 'Unknown condition key ignored: %s' % key
  219. true
  220. end
  221. } or next
  222. matched_part ||= body_parts.first
  223. if matched_part
  224. mime_type = matched_part.mime_type
  225. body = matched_part.decoded
  226. else
  227. mime_type = 'text/plain'
  228. body = ''
  229. end
  230. create_event :payload => {
  231. 'subject' => mail.subject,
  232. 'from' => mail.from_addrs.first,
  233. 'to' => mail.to_addrs,
  234. 'cc' => mail.cc_addrs,
  235. 'date' => (mail.date.iso8601 rescue nil),
  236. 'mime_type' => mime_type,
  237. 'body' => body,
  238. 'matches' => matches,
  239. 'has_attachment' => mail.has_attachment?,
  240. }
  241. if options['mark_as_read']
  242. log 'Marking as read'
  243. mail.mark_as_read
  244. end
  245. notified << mail.message_id if mail.message_id
  246. }
  247. notified.slice!(0...-IDCACHE_SIZE) if notified.size > IDCACHE_SIZE
  248. memory['seen'] = new_seen
  249. memory['notified'] = notified
  250. save!
  251. end
  252. def each_unread_mail
  253. host, port, ssl, username = options.values_at(:host, :port, :ssl, :username)
  254. log "Connecting to #{host}#{':%d' % port if port}#{' via SSL' if ssl}"
  255. Client.open(host, port, ssl) { |imap|
  256. log "Logging in as #{username}"
  257. imap.login(username, options[:password])
  258. options['folders'].each { |folder|
  259. log "Selecting the folder: %s" % folder
  260. imap.select(folder)
  261. unseen = imap.search('UNSEEN')
  262. if unseen.empty?
  263. log "No unread mails"
  264. next
  265. end
  266. imap.fetch_mails(unseen).each { |mail|
  267. yield mail
  268. }
  269. }
  270. }
  271. ensure
  272. log 'Connection closed'
  273. end
  274. def mime_types
  275. options['mime_types'] || %w[text/plain text/enriched text/html]
  276. end
  277. private
  278. def is_positive_integer?(value)
  279. Integer(value) >= 0
  280. rescue
  281. false
  282. end
  283. def glob_match?(pattern, value)
  284. File.fnmatch?(pattern, value, File::FNM_CASEFOLD | File::FNM_EXTGLOB)
  285. end
  286. class Client < ::Net::IMAP
  287. class << self
  288. def open(host, port, ssl)
  289. imap = new(host, port, ssl)
  290. yield imap
  291. ensure
  292. imap.disconnect
  293. end
  294. end
  295. def select(folder)
  296. ret = super
  297. @uidvalidity = responses['UIDVALIDITY'].last
  298. ret
  299. end
  300. def fetch_mails(set)
  301. fetch(set, %w[UID RFC822.HEADER]).map { |data|
  302. Message.new(self, @uidvalidity, data)
  303. }
  304. end
  305. end
  306. class Message < SimpleDelegator
  307. DEFAULT_BODY_MIME_TYPES = %w[text/plain text/enriched text/html]
  308. attr_reader :uidvalidity, :uid
  309. def initialize(client, uidvalidity, fetch_data)
  310. @client = client
  311. @uidvalidity = uidvalidity
  312. attr = fetch_data.attr
  313. @uid = attr['UID']
  314. super(Mail.read_from_string(attr['RFC822.HEADER']))
  315. end
  316. def has_attachment?
  317. @has_attachment ||=
  318. begin
  319. data = @client.uid_fetch(@uid, 'BODYSTRUCTURE').first
  320. struct_has_attachment?(data.attr['BODYSTRUCTURE'])
  321. end
  322. end
  323. def fetch
  324. @parsed ||=
  325. begin
  326. data = @client.uid_fetch(@uid, 'BODY.PEEK[]').first
  327. Mail.read_from_string(data.attr['BODY[]'])
  328. end
  329. end
  330. def body_parts(mime_types = DEFAULT_BODY_MIME_TYPES)
  331. mail = fetch
  332. if mail.multipart?
  333. mail.body.set_sort_order(mime_types)
  334. mail.body.sort_parts!
  335. mail.all_parts
  336. else
  337. [mail]
  338. end.reject { |part|
  339. part.multipart? || part.attachment? || !part.text? ||
  340. !mime_types.include?(part.mime_type)
  341. }
  342. end
  343. def mark_as_read
  344. @client.uid_store(@uid, '+FLAGS', [:Seen])
  345. end
  346. private
  347. def struct_has_attachment?(struct)
  348. struct.multipart? && (
  349. struct.subtype == 'MIXED' ||
  350. struct.parts.any? { |part|
  351. struct_has_attachment?(part)
  352. }
  353. )
  354. end
  355. end
  356. end
  357. end