Merge pull request #210 from knu/ftpsite_agent

Implement FtpsiteAgent.

Andrew Cantino 10 years ago
parent
commit
7c98b1acce
2 changed files with 293 additions and 0 deletions
  1. 214 0
      app/models/agents/ftpsite_agent.rb
  2. 79 0
      spec/models/agents/ftpsite_agent_spec.rb

+ 214 - 0
app/models/agents/ftpsite_agent.rb

@@ -0,0 +1,214 @@
1
+require 'net/ftp'
2
+require 'uri'
3
+require 'time'
4
+
5
+module Agents
6
+  class FtpsiteAgent < Agent
7
+    cannot_receive_events!
8
+
9
+    default_schedule "every_12h"
10
+
11
+    description <<-MD
12
+      The FtpsiteAgent checks a FTP site and creates Events based on newly uploaded files in a directory.
13
+
14
+      Specify a `url` that represents a directory of an FTP site to watch, and a list of `patterns` to match against file names.
15
+
16
+      Login credentials can be included in `url` if authentication is required.
17
+
18
+      Only files with a last modification time later than the `after` value, if specifed, are notified.
19
+    MD
20
+
21
+    event_description <<-MD
22
+      Events look like this:
23
+
24
+          {
25
+            "url": "ftp://example.org/pub/releases/foo-1.2.tar.gz",
26
+            "filename": "foo-1.2.tar.gz",
27
+            "timestamp": "2014-04-10T22:50:00Z"
28
+          }
29
+    MD
30
+
31
+    def working?
32
+      event_created_within?(options['expected_update_period_in_days']) && !recent_error_logs?
33
+    end
34
+
35
+    def default_options
36
+      {
37
+          'expected_update_period_in_days' => "1",
38
+          'url' => "ftp://example.org/pub/releases/",
39
+          'patterns' => [
40
+            'foo-*.tar.gz',
41
+          ],
42
+          'after' => Time.now.iso8601,
43
+      }
44
+    end
45
+
46
+    def validate_options
47
+      # Check for required fields
48
+      begin
49
+        url = options['url']
50
+        String === url or raise
51
+        uri = URI(url)
52
+        URI::FTP === uri or raise
53
+        errors.add(:base, "url must end with a slash") unless uri.path.end_with?('/')
54
+      rescue
55
+        errors.add(:base, "url must be a valid FTP URL")
56
+      end
57
+
58
+      patterns = options['patterns']
59
+      case patterns
60
+      when Array
61
+        if patterns.empty?
62
+          errors.add(:base, "patterns must not be empty")
63
+        end
64
+      when nil, ''
65
+        errors.add(:base, "patterns must be specified")
66
+      else
67
+        errors.add(:base, "patterns must be an array")
68
+      end
69
+
70
+      # Check for optional fields
71
+      if (timestamp = options['timestamp']).present?
72
+        begin
73
+          Time.parse(timestamp)
74
+        rescue
75
+          errors.add(:base, "timestamp cannot be parsed as time")
76
+        end
77
+      end
78
+
79
+      if options['expected_update_period_in_days'].present?
80
+        errors.add(:base, "Invalid expected_update_period_in_days format") unless is_positive_integer?(options['expected_update_period_in_days'])
81
+      end
82
+    end
83
+
84
+    def check
85
+      saving_entries do |found|
86
+        each_entry { |filename, mtime|
87
+          found[filename, mtime]
88
+        }
89
+      end
90
+    end
91
+
92
+    def each_entry
93
+      patterns = options['patterns']
94
+
95
+      after =
96
+        if str = options['after']
97
+          Time.parse(str)
98
+        else
99
+          Time.at(0)
100
+        end
101
+
102
+      open_ftp(base_uri) do |ftp|
103
+        log "Listing the directory"
104
+        # Do not use a block style call because we need to call other
105
+        # commands during iteration.
106
+        list = ftp.list('-a')
107
+
108
+        month2year = {}
109
+
110
+        list.each do |line|
111
+          mon, day, smtn, rest = line.split(' ', 9)[5..-1]
112
+
113
+          # Remove symlink target part if any
114
+          filename = rest[/\A(.+?)(?:\s+->\s|\z)/, 1]
115
+
116
+          patterns.any? { |pattern|
117
+            File.fnmatch?(pattern, filename)
118
+          } or next
119
+
120
+          case smtn
121
+          when /:/
122
+            if year = month2year[mon]
123
+              mtime = Time.parse("#{mon} #{day} #{year} #{smtn} GMT")
124
+            else
125
+              log "Getting mtime of #{filename}"
126
+              mtime = ftp.mtime(filename)
127
+              month2year[mon] = mtime.year
128
+            end
129
+          else
130
+            # Do not bother calling MDTM for old files.  Losing the
131
+            # time part only makes a timestamp go backwards, meaning
132
+            # that it will trigger no new event.
133
+            mtime = Time.parse("#{mon} #{day} #{smtn} GMT")
134
+          end
135
+
136
+          after < mtime or next
137
+
138
+          yield filename, mtime
139
+        end
140
+      end
141
+    end
142
+
143
+    def open_ftp(uri)
144
+      ftp = Net::FTP.new
145
+
146
+      log "Connecting to #{uri.host}#{':%d' % uri.port if uri.port != uri.default_port}"
147
+      ftp.connect(uri.host, uri.port)
148
+
149
+      user =
150
+        if str = uri.user
151
+          URI.decode(str)
152
+        else
153
+          'anonymous'
154
+        end
155
+      password =
156
+        if str = uri.password
157
+          URI.decode(str)
158
+        else
159
+          'anonymous@'
160
+        end
161
+      log "Logging in as #{user}"
162
+      ftp.login(user, password)
163
+
164
+      ftp.passive = true
165
+
166
+      path = uri.path.chomp('/')
167
+      log "Changing directory to #{path}"
168
+      ftp.chdir(path)
169
+
170
+      yield ftp
171
+    ensure
172
+      log "Closing the connection"
173
+      ftp.close
174
+    end
175
+
176
+    def base_uri
177
+      @base_uri ||= URI(options['url'])
178
+    end
179
+
180
+    def saving_entries
181
+      known_entries = memory['known_entries'] || {}
182
+      found_entries = {}
183
+      new_files = []
184
+
185
+      yield proc { |filename, mtime|
186
+        found_entries[filename] = misotime = mtime.utc.iso8601
187
+        unless (prev = known_entries[filename]) && misotime <= prev
188
+          new_files << filename
189
+        end
190
+      }
191
+
192
+      new_files.sort_by { |filename|
193
+        found_entries[filename]
194
+      }.each { |filename|
195
+        create_event :payload => {
196
+          'url' => (base_uri + filename).to_s,
197
+          'filename' => filename,
198
+          'timestamp' => found_entries[filename],
199
+        }
200
+      }
201
+
202
+      memory['known_entries'] = found_entries
203
+      save!
204
+    end
205
+
206
+    private
207
+
208
+    def is_positive_integer?(value)
209
+      Integer(value) >= 0
210
+    rescue
211
+      false
212
+    end
213
+  end
214
+end

+ 79 - 0
spec/models/agents/ftpsite_agent_spec.rb

@@ -0,0 +1,79 @@
1
+require 'spec_helper'
2
+require 'time'
3
+
4
+describe Agents::FtpsiteAgent do
5
+  describe "checking anonymous FTP" do
6
+    before do
7
+      @site = {
8
+        'expected_update_period_in_days' => 1,
9
+        'url' => "ftp://ftp.example.org/pub/releases/",
10
+        'patterns' => ["example-*.tar.gz"],
11
+      }
12
+      @checker = Agents::FtpsiteAgent.new(:name => "Example", :options => @site, :keep_events_for => 2)
13
+      @checker.user = users(:bob)
14
+      @checker.save!
15
+      stub(@checker).each_entry.returns { |block|
16
+        block.call("example-latest.tar.gz", Time.parse("2014-04-01T10:00:01Z"))
17
+        block.call("example-1.0.tar.gz",    Time.parse("2013-10-01T10:00:00Z"))
18
+        block.call("example-1.1.tar.gz",    Time.parse("2014-04-01T10:00:00Z"))
19
+      }
20
+    end
21
+
22
+    describe "#check" do
23
+      it "should validate the integer fields" do
24
+        @checker.options['expected_update_period_in_days'] = "nonsense"
25
+        lambda { @checker.save! }.should raise_error;
26
+        @checker.options = @site
27
+      end
28
+
29
+      it "should check for changes and save known entries in memory" do
30
+        lambda { @checker.check }.should change { Event.count }.by(3)
31
+        @checker.memory['known_entries'].tap { |known_entries|
32
+          known_entries.size.should == 3
33
+          known_entries.sort_by(&:last).should == [
34
+            ["example-1.0.tar.gz",    "2013-10-01T10:00:00Z"],
35
+            ["example-1.1.tar.gz",    "2014-04-01T10:00:00Z"],
36
+            ["example-latest.tar.gz", "2014-04-01T10:00:01Z"],
37
+          ]
38
+        }
39
+
40
+        Event.last(2).first.payload.should == {
41
+          'url' => 'ftp://ftp.example.org/pub/releases/example-1.1.tar.gz',
42
+          'filename' => 'example-1.1.tar.gz',
43
+          'timestamp' => '2014-04-01T10:00:00Z',
44
+        }
45
+
46
+        lambda { @checker.check }.should_not change { Event.count }
47
+
48
+        stub(@checker).each_entry.returns { |block|
49
+          block.call("example-latest.tar.gz", Time.parse("2014-04-02T10:00:01Z"))
50
+
51
+          # In the long list format the timestamp may look going
52
+          # backwards after six months: Oct 01 10:00 -> Oct 01 2013
53
+          block.call("example-1.0.tar.gz",    Time.parse("2013-10-01T00:00:00Z"))
54
+
55
+          block.call("example-1.1.tar.gz",    Time.parse("2014-04-01T10:00:00Z"))
56
+          block.call("example-1.2.tar.gz",    Time.parse("2014-04-02T10:00:00Z"))
57
+        }
58
+        lambda { @checker.check }.should change { Event.count }.by(2)
59
+        @checker.memory['known_entries'].tap { |known_entries|
60
+          known_entries.size.should == 4
61
+          known_entries.sort_by(&:last).should == [
62
+            ["example-1.0.tar.gz",    "2013-10-01T00:00:00Z"],
63
+            ["example-1.1.tar.gz",    "2014-04-01T10:00:00Z"],
64
+            ["example-1.2.tar.gz",    "2014-04-02T10:00:00Z"],
65
+            ["example-latest.tar.gz", "2014-04-02T10:00:01Z"],
66
+          ]
67
+        }
68
+
69
+        Event.last(2).first.payload.should == {
70
+          'url' => 'ftp://ftp.example.org/pub/releases/example-1.2.tar.gz',
71
+          'filename' => 'example-1.2.tar.gz',
72
+          'timestamp' => '2014-04-02T10:00:00Z',
73
+        }
74
+
75
+        lambda { @checker.check }.should_not change { Event.count }
76
+      end
77
+    end
78
+  end
79
+end