imap_folder_agent.rb 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551
  1. require 'delegate'
  2. require 'net/imap'
  3. require 'mail'
  4. module Agents
  5. class ImapFolderAgent < Agent
  6. cannot_receive_events!
  7. can_dry_run!
  8. default_schedule "every_30m"
  9. description <<-MD
  10. The Imap Folder Agent checks an IMAP server in specified folders and creates Events based on new mails found since the last run. In the first visit to a folder, this agent only checks for the initial status and does not create events.
  11. Specify an IMAP server to connect with `host`, and set `ssl` to true if the server supports IMAP over SSL. Specify `port` if you need to connect to a port other than standard (143 or 993 depending on the `ssl` value).
  12. Specify login credentials in `username` and `password`.
  13. List the names of folders to check in `folders`.
  14. To narrow mails by conditions, build a `conditions` hash with the following keys:
  15. - `subject`
  16. - `body`
  17. Specify a regular expression to match against the decoded subject/body of each mail.
  18. Use the `(?i)` directive for case-insensitive search. For example, a pattern `(?i)alert` will match "alert", "Alert"or "ALERT". You can also make only a part of a pattern to work case-insensitively: `Re: (?i:alert)` will match either "Re: Alert" or "Re: alert", but not "RE: alert".
  19. When a mail has multiple non-attachment text parts, they are prioritized according to the `mime_types` option (which see below) and the first part that matches a "body" pattern, if specified, will be chosen as the "body" value in a created event.
  20. Named captures will appear in the "matches" hash in a created event.
  21. - `from`, `to`, `cc`
  22. Specify a shell glob pattern string that is matched against mail addresses extracted from the corresponding header values of each mail.
  23. Patterns match addresses in case insensitive manner.
  24. Multiple pattern strings can be specified in an array, in which case a mail is selected if any of the patterns matches. (i.e. patterns are OR'd)
  25. - `mime_types`
  26. Specify an array of MIME types to tell which non-attachment part of a mail among its text/* parts should be used as mail body. The default value is `['text/plain', 'text/enriched', 'text/html']`.
  27. - `is_unread`
  28. Setting this to true or false means only mails that is marked as unread or read respectively, are selected.
  29. If this key is unspecified or set to null, it is ignored.
  30. - `has_attachment`
  31. Setting this to true or false means only mails that does or does not have an attachment are selected.
  32. If this key is unspecified or set to null, it is ignored.
  33. Set `mark_as_read` to true to mark found mails as read.
  34. Each agent instance memorizes the highest UID of mails that are found in the last run for each watched folder, so even if you change a set of conditions so that it matches mails that are missed previously, or if you alter the flag status of already found mails, they will not show up as new events.
  35. Also, in order to avoid duplicated notification it keeps a list of Message-Id's of 100 most recent mails, so if multiple mails of the same Message-Id are found, you will only see one event out of them.
  36. MD
  37. event_description <<-MD
  38. Events look like this:
  39. {
  40. "folder": "INBOX",
  41. "subject": "...",
  42. "from": "Nanashi <nanashi.gombeh@example.jp>",
  43. "to": ["Jane <jane.doe@example.com>"],
  44. "cc": [],
  45. "date": "2014-05-10T03:47:20+0900",
  46. "mime_type": "text/plain",
  47. "body": "Hello,\n\n...",
  48. "matches": {
  49. }
  50. }
  51. MD
  52. IDCACHE_SIZE = 100
  53. FNM_FLAGS = [:FNM_CASEFOLD, :FNM_EXTGLOB].inject(0) { |flags, sym|
  54. if File.const_defined?(sym)
  55. flags | File.const_get(sym)
  56. else
  57. flags
  58. end
  59. }
  60. def working?
  61. event_created_within?(interpolated['expected_update_period_in_days']) && !recent_error_logs?
  62. end
  63. def default_options
  64. {
  65. 'expected_update_period_in_days' => "1",
  66. 'host' => 'imap.gmail.com',
  67. 'ssl' => true,
  68. 'username' => 'your.account',
  69. 'password' => 'your.password',
  70. 'folders' => %w[INBOX],
  71. 'conditions' => {}
  72. }
  73. end
  74. def validate_options
  75. %w[host username password].each { |key|
  76. String === options[key] or
  77. errors.add(:base, '%s is required and must be a string' % key)
  78. }
  79. if options['port'].present?
  80. errors.add(:base, "port must be a positive integer") unless is_positive_integer?(options['port'])
  81. end
  82. %w[ssl mark_as_read].each { |key|
  83. if options[key].present?
  84. if boolify(options[key]).nil?
  85. errors.add(:base, '%s must be a boolean value' % key)
  86. end
  87. end
  88. }
  89. case mime_types = options['mime_types']
  90. when nil
  91. when Array
  92. mime_types.all? { |mime_type|
  93. String === mime_type && mime_type.start_with?('text/')
  94. } or errors.add(:base, 'mime_types may only contain strings that match "text/*".')
  95. if mime_types.empty?
  96. errors.add(:base, 'mime_types should not be empty')
  97. end
  98. else
  99. errors.add(:base, 'mime_types must be an array')
  100. end
  101. case folders = options['folders']
  102. when nil
  103. when Array
  104. folders.all? { |folder|
  105. String === folder
  106. } or errors.add(:base, 'folders may only contain strings')
  107. if folders.empty?
  108. errors.add(:base, 'folders should not be empty')
  109. end
  110. else
  111. errors.add(:base, 'folders must be an array')
  112. end
  113. case conditions = options['conditions']
  114. when Hash
  115. conditions.each { |key, value|
  116. value.present? or next
  117. case key
  118. when 'subject', 'body'
  119. case value
  120. when String
  121. begin
  122. Regexp.new(value)
  123. rescue
  124. errors.add(:base, 'conditions.%s contains an invalid regexp' % key)
  125. end
  126. else
  127. errors.add(:base, 'conditions.%s contains a non-string object' % key)
  128. end
  129. when 'from', 'to', 'cc'
  130. Array(value).each { |pattern|
  131. case pattern
  132. when String
  133. begin
  134. glob_match?(pattern, '')
  135. rescue
  136. errors.add(:base, 'conditions.%s contains an invalid glob pattern' % key)
  137. end
  138. else
  139. errors.add(:base, 'conditions.%s contains a non-string object' % key)
  140. end
  141. }
  142. when 'is_unread', 'has_attachment'
  143. case boolify(value)
  144. when true, false
  145. else
  146. errors.add(:base, 'conditions.%s must be a boolean value or null' % key)
  147. end
  148. end
  149. }
  150. else
  151. errors.add(:base, 'conditions must be a hash')
  152. end
  153. if options['expected_update_period_in_days'].present?
  154. errors.add(:base, "Invalid expected_update_period_in_days format") unless is_positive_integer?(options['expected_update_period_in_days'])
  155. end
  156. end
  157. def check
  158. each_unread_mail { |mail, notified|
  159. message_id = mail.message_id
  160. body_parts = mail.body_parts(mime_types)
  161. matched_part = nil
  162. matches = {}
  163. interpolated['conditions'].all? { |key, value|
  164. case key
  165. when 'subject'
  166. value.present? or next true
  167. re = Regexp.new(value)
  168. if m = re.match(mail.scrubbed(:subject))
  169. m.names.each { |name|
  170. matches[name] = m[name]
  171. }
  172. true
  173. else
  174. false
  175. end
  176. when 'body'
  177. value.present? or next true
  178. re = Regexp.new(value)
  179. matched_part = body_parts.find { |part|
  180. if m = re.match(part.scrubbed(:decoded))
  181. m.names.each { |name|
  182. matches[name] = m[name]
  183. }
  184. true
  185. else
  186. false
  187. end
  188. }
  189. when 'from', 'to', 'cc'
  190. value.present? or next true
  191. begin
  192. # Mail::Field really needs to define respond_to_missing?
  193. # so we could use try(:addresses) here.
  194. addresses = mail.header[key].addresses
  195. rescue NoMethodError
  196. next false
  197. end
  198. addresses.any? { |address|
  199. Array(value).any? { |pattern|
  200. glob_match?(pattern, address)
  201. }
  202. }
  203. when 'has_attachment'
  204. boolify(value) == mail.has_attachment?
  205. when 'is_unread'
  206. true # already filtered out by each_unread_mail
  207. else
  208. log 'Unknown condition key ignored: %s' % key
  209. true
  210. end
  211. } or next
  212. if notified.include?(mail.message_id)
  213. log 'Ignoring mail: %s (already notified)' % message_id
  214. else
  215. matched_part ||= body_parts.first
  216. if matched_part
  217. mime_type = matched_part.mime_type
  218. body = matched_part.scrubbed(:decoded)
  219. else
  220. mime_type = 'text/plain'
  221. body = ''
  222. end
  223. log 'Emitting an event for mail: %s' % message_id
  224. create_event :payload => {
  225. 'folder' => mail.folder,
  226. 'subject' => mail.scrubbed(:subject),
  227. 'from' => mail.from_addrs.first,
  228. 'to' => mail.to_addrs,
  229. 'cc' => mail.cc_addrs,
  230. 'date' => (mail.date.iso8601 rescue nil),
  231. 'mime_type' => mime_type,
  232. 'body' => body,
  233. 'matches' => matches,
  234. 'has_attachment' => mail.has_attachment?,
  235. }
  236. notified << mail.message_id if mail.message_id
  237. end
  238. if boolify(interpolated['mark_as_read'])
  239. log 'Marking as read'
  240. mail.mark_as_read unless dry_run?
  241. end
  242. }
  243. end
  244. def each_unread_mail
  245. host, port, ssl, username = interpolated.values_at(:host, :port, :ssl, :username)
  246. ssl = boolify(ssl)
  247. port = (Integer(port) if port.present?)
  248. log "Connecting to #{host}#{':%d' % port if port}#{' via SSL' if ssl}"
  249. Client.open(host, port: port, ssl: ssl) { |imap|
  250. log "Logging in as #{username}"
  251. imap.login(username, interpolated[:password])
  252. # 'lastseen' keeps a hash of { uidvalidity => lastseenuid, ... }
  253. lastseen, seen = self.lastseen, self.make_seen
  254. # 'notified' keeps an array of message-ids of {IDCACHE_SIZE}
  255. # most recent notified mails.
  256. notified = self.notified
  257. interpolated['folders'].each { |folder|
  258. log "Selecting the folder: %s" % folder
  259. imap.select(folder)
  260. uidvalidity = imap.uidvalidity
  261. lastseenuid = lastseen[uidvalidity]
  262. if lastseenuid.nil?
  263. maxseq = imap.responses['EXISTS'].last
  264. log "Recording the initial status: %s" % pluralize(maxseq, 'existing mail')
  265. if maxseq > 0
  266. seen[uidvalidity] = imap.fetch(maxseq, 'UID').last.attr['UID']
  267. end
  268. next
  269. end
  270. seen[uidvalidity] = lastseenuid
  271. is_unread = boolify(interpolated['conditions']['is_unread'])
  272. uids = imap.uid_fetch((lastseenuid + 1)..-1, 'FLAGS').
  273. each_with_object([]) { |data, ret|
  274. uid, flags = data.attr.values_at('UID', 'FLAGS')
  275. seen[uidvalidity] = uid
  276. next if uid <= lastseenuid
  277. case is_unread
  278. when nil, !flags.include?(:Seen)
  279. ret << uid
  280. end
  281. }
  282. log pluralize(uids.size,
  283. case is_unread
  284. when true
  285. 'new unread mail'
  286. when false
  287. 'new read mail'
  288. else
  289. 'new mail'
  290. end)
  291. next if uids.empty?
  292. imap.uid_fetch_mails(uids).each { |mail|
  293. yield mail, notified
  294. }
  295. }
  296. self.notified = notified
  297. self.lastseen = seen
  298. save!
  299. }
  300. ensure
  301. log 'Connection closed'
  302. end
  303. def mime_types
  304. interpolated['mime_types'] || %w[text/plain text/enriched text/html]
  305. end
  306. def lastseen
  307. Seen.new(memory['lastseen'])
  308. end
  309. def lastseen= value
  310. memory.delete('seen') # obsolete key
  311. memory['lastseen'] = value
  312. end
  313. def make_seen
  314. Seen.new
  315. end
  316. def notified
  317. Notified.new(memory['notified'])
  318. end
  319. def notified= value
  320. memory['notified'] = value
  321. end
  322. private
  323. def is_positive_integer?(value)
  324. Integer(value) >= 0
  325. rescue
  326. false
  327. end
  328. def glob_match?(pattern, value)
  329. File.fnmatch?(pattern, value, FNM_FLAGS)
  330. end
  331. def pluralize(count, noun)
  332. "%d %s" % [count, noun.pluralize(count)]
  333. end
  334. class Client < ::Net::IMAP
  335. class << self
  336. def open(host, *args)
  337. imap = new(host, *args)
  338. yield imap
  339. ensure
  340. imap.disconnect unless imap.nil?
  341. end
  342. end
  343. attr_reader :uidvalidity
  344. def select(folder)
  345. ret = super(@folder = folder)
  346. @uidvalidity = responses['UIDVALIDITY'].last
  347. ret
  348. end
  349. def fetch(*args)
  350. super || []
  351. end
  352. def uid_fetch(*args)
  353. super || []
  354. end
  355. def uid_fetch_mails(set)
  356. uid_fetch(set, 'RFC822.HEADER').map { |data|
  357. Message.new(self, data, folder: @folder, uidvalidity: @uidvalidity)
  358. }
  359. end
  360. end
  361. class Seen < Hash
  362. def initialize(hash = nil)
  363. super()
  364. if hash
  365. # Deserialize a JSON hash which keys are strings
  366. hash.each { |uidvalidity, uid|
  367. self[uidvalidity.to_i] = uid
  368. }
  369. end
  370. end
  371. def []=(uidvalidity, uid)
  372. # Update only if the new value is larger than the current value
  373. if (curr = self[uidvalidity]).nil? || curr <= uid
  374. super
  375. end
  376. end
  377. end
  378. class Notified < Array
  379. def initialize(array = nil)
  380. super()
  381. replace(array) if array
  382. end
  383. def <<(value)
  384. slice!(0...-IDCACHE_SIZE) if size > IDCACHE_SIZE
  385. super
  386. end
  387. end
  388. class Message < SimpleDelegator
  389. DEFAULT_BODY_MIME_TYPES = %w[text/plain text/enriched text/html]
  390. attr_reader :uid, :folder, :uidvalidity
  391. module Scrubbed
  392. def scrubbed(method)
  393. (@scrubbed ||= {})[method.to_sym] ||=
  394. __send__(method).try(:scrub) { |bytes| "<#{bytes.unpack('H*')[0]}>" }
  395. end
  396. end
  397. include Scrubbed
  398. def initialize(client, fetch_data, props = {})
  399. @client = client
  400. props.each { |key, value|
  401. instance_variable_set(:"@#{key}", value)
  402. }
  403. attr = fetch_data.attr
  404. @uid = attr['UID']
  405. super(Mail.read_from_string(attr['RFC822.HEADER']))
  406. end
  407. def has_attachment?
  408. @has_attachment ||=
  409. if data = @client.uid_fetch(@uid, 'BODYSTRUCTURE').first
  410. struct_has_attachment?(data.attr['BODYSTRUCTURE'])
  411. else
  412. false
  413. end
  414. end
  415. def fetch
  416. @parsed ||=
  417. if data = @client.uid_fetch(@uid, 'BODY.PEEK[]').first
  418. Mail.read_from_string(data.attr['BODY[]'])
  419. else
  420. Mail.read_from_string('')
  421. end
  422. end
  423. def body_parts(mime_types = DEFAULT_BODY_MIME_TYPES)
  424. mail = fetch
  425. if mail.multipart?
  426. mail.body.set_sort_order(mime_types)
  427. mail.body.sort_parts!
  428. mail.all_parts
  429. else
  430. [mail]
  431. end.select { |part|
  432. if part.multipart? || part.attachment? || !part.text? ||
  433. !mime_types.include?(part.mime_type)
  434. false
  435. else
  436. part.extend(Scrubbed)
  437. true
  438. end
  439. }
  440. end
  441. def mark_as_read
  442. @client.uid_store(@uid, '+FLAGS', [:Seen])
  443. end
  444. private
  445. def struct_has_attachment?(struct)
  446. struct.multipart? && (
  447. struct.subtype == 'MIXED' ||
  448. struct.parts.any? { |part|
  449. struct_has_attachment?(part)
  450. }
  451. )
  452. end
  453. end
  454. end
  455. end