Merge pull request #1 from cantino/dsander-rails41

Upgrade and confirm functionality of rturk and wilio-ruby

Dominik Sander 11 年之前
父节点
当前提交
ef9778d150

+ 8 - 0
.env.example

@@ -78,6 +78,14 @@ AWS_ACCESS_KEY="your aws access key"
78 78
 # Set AWS_SANDBOX to true if you're developing Huginn code.
79 79
 AWS_SANDBOX=false
80 80
 
81
+########################
82
+#   Various Settings   #
83
+########################
84
+
85
+# Allow JSONPath eval expresions. i.e., $..price[?(@ < 20)]
86
+# You should not allow this on a shared Huginn box because it is not secure.
87
+ALLOW_JSONPATH_EVAL=false
88
+
81 89
 # Use Graphviz for generating diagrams instead of using Google Chart
82 90
 # Tools.  Specify a dot(1) command path built with SVG support
83 91
 # enabled.

+ 2 - 2
Gemfile

@@ -10,7 +10,7 @@ gem 'bootstrap-kaminari-views', '~> 0.0.2'
10 10
 gem 'rufus-scheduler', '~> 3.0.7', require: false
11 11
 gem 'json', '~> 1.8.1'
12 12
 gem 'jsonpath', '~> 0.5.3'
13
-gem 'twilio-ruby', '~> 3.10.0'
13
+gem 'twilio-ruby', '~> 3.11.5'
14 14
 gem 'ruby-growl', '~> 4.1.0'
15 15
 
16 16
 gem 'delayed_job', '~> 4.0.0'
@@ -40,7 +40,7 @@ gem 'nokogiri', '~> 1.6.1'
40 40
 
41 41
 gem 'wunderground', '~> 1.2.0'
42 42
 gem 'forecast_io', '~> 2.0.0'
43
-gem 'rturk', '~> 2.11.0'
43
+gem 'rturk', '~> 2.12.1'
44 44
 
45 45
 gem 'twitter', '~> 5.7.1'
46 46
 gem 'twitter-stream', github: 'cantino/twitter-stream', branch: 'master'

+ 5 - 5
Gemfile.lock

@@ -215,7 +215,7 @@ GEM
215 215
       rspec-core (~> 2.14.0)
216 216
       rspec-expectations (~> 2.14.0)
217 217
       rspec-mocks (~> 2.14.0)
218
-    rturk (2.11.3)
218
+    rturk (2.12.1)
219 219
       erector
220 220
       nokogiri
221 221
       rest-client
@@ -263,7 +263,7 @@ GEM
263 263
     treetop (1.4.15)
264 264
       polyglot
265 265
       polyglot (>= 0.3.1)
266
-    twilio-ruby (3.10.1)
266
+    twilio-ruby (3.11.5)
267 267
       builder (>= 2.1.2)
268 268
       jwt (>= 0.1.2)
269 269
       multi_json (>= 1.3.0)
@@ -289,7 +289,7 @@ GEM
289 289
       macaddr (~> 1.0)
290 290
     warden (1.2.3)
291 291
       rack (>= 1.0)
292
-    webmock (1.13.0)
292
+    webmock (1.17.4)
293 293
       addressable (>= 2.2.7)
294 294
       crack (>= 0.3.2)
295 295
     weibo_2 (0.1.6)
@@ -337,14 +337,14 @@ DEPENDENCIES
337 337
   rr
338 338
   rspec
339 339
   rspec-rails
340
-  rturk (~> 2.11.0)
340
+  rturk (~> 2.12.1)
341 341
   ruby-growl (~> 4.1.0)
342 342
   rufus-scheduler (~> 3.0.7)
343 343
   sass-rails (~> 4.0.0)
344 344
   select2-rails (~> 3.5.4)
345 345
   shoulda-matchers
346 346
   therubyracer (~> 0.12.1)
347
-  twilio-ruby (~> 3.10.0)
347
+  twilio-ruby (~> 3.11.5)
348 348
   twitter (~> 5.7.1)
349 349
   twitter-stream!
350 350
   typhoeus (~> 0.6.3)

+ 1 - 1
README.md

@@ -24,7 +24,7 @@ Follow [@tectonic](https://twitter.com/tectonic) for updates as Huginn evolves,
24 24
 
25 25
 ### We need your help!
26 26
 
27
-Want to help with Huginn?  Try tackling [issues tagged with #help-wanted](https://github.com/cantino/huginn/issues?direction=desc&labels=help-wanted&page=1&sort=created&state=open).
27
+Want to help with Huginn?  All contributions are encouraged!  You could make UI improvements, add new Agents, write documentation and tutorials, or try tackling [issues tagged with #help-wanted](https://github.com/cantino/huginn/issues?direction=desc&labels=help-wanted&page=1&sort=created&state=open).
28 28
 
29 29
 ## Examples
30 30
 

+ 10 - 10
app/models/agents/twilio_agent.rb

@@ -7,17 +7,16 @@ module Agents
7 7
     cannot_create_events!
8 8
 
9 9
     description <<-MD
10
-      The TwilioAgent receives and collects events and sends them via text message or gives you a call when scheduled.
10
+      The TwilioAgent receives and collects events and sends them via text message (up to 160 characters) or gives you a call when scheduled.
11 11
 
12
-      It is assumed that events have a `message`, `text`, or `sms` key, the value of which is sent as the content of the text message/call. You can use Event Formatting Agent if your event does not provide these keys.
12
+      It is assumed that events have a `message`, `text`, or `sms` key, the value of which is sent as the content of the text message/call. You can use the EventFormattingAgent if your event does not provide these keys.
13 13
 
14 14
       Set `receiver_cell` to the number to receive text messages/call and `sender_cell` to the number sending them.
15 15
 
16 16
       `expected_receive_period_in_days` is maximum number of days that you would expect to pass between events being received by this agent.
17 17
 
18
-      If you would like to receive calls, then set `receive_call` to true. `server_url` needs to be 
19
-      filled only if you are making calls. Dont forget to include http/https in `server_url`.
20
-
18
+      If you would like to receive calls, set `receive_call` to `true`. In this case, `server_url` must be set to the URL of your
19
+      Huginn installation (probably "https://#{ENV['DOMAIN']}"), which must be web-accessible.  Be sure to set http/https correctly.
21 20
     MD
22 21
 
23 22
     def default_options
@@ -43,13 +42,14 @@ module Agents
43 42
       @client = Twilio::REST::Client.new options['account_sid'], options['auth_token']
44 43
       memory['pending_calls'] ||= {}
45 44
       incoming_events.each do |event|
46
-        message = (event.payload['message'] || event.payload['text'] || event.payload['sms']).to_s
47
-        if message != ""
45
+        message = (event.payload['message'].presence || event.payload['text'].presence || event.payload['sms'].presence).to_s
46
+        if message.present?
48 47
           if options['receive_call'].to_s == 'true'
49 48
             secret = SecureRandom.hex 3
50 49
             memory['pending_calls'][secret] = message
51 50
             make_call secret
52 51
           end
52
+
53 53
           if options['receive_text'].to_s == 'true'
54 54
             message = message.slice 0..160
55 55
             send_message message
@@ -71,11 +71,11 @@ module Agents
71 71
     def make_call(secret)
72 72
       @client.account.calls.create :from => options['sender_cell'],
73 73
                                    :to => options['receiver_cell'],
74
-                                   :url => post_url(options['server_url'],secret)
74
+                                   :url => post_url(options['server_url'], secret)
75 75
     end
76 76
 
77
-    def post_url(server_url,secret)
78
-      "#{server_url}/users/#{self.user.id}/web_requests/#{self.id}/#{secret}"
77
+    def post_url(server_url, secret)
78
+      "#{server_url}/users/#{user.id}/web_requests/#{id}/#{secret}"
79 79
     end
80 80
 
81 81
     def receive_web_request(params, method, format)

+ 74 - 60
app/models/agents/website_agent.rb

@@ -16,6 +16,8 @@ module Agents
16 16
 
17 17
       Specify a `url` and select a `mode` for when to create Events based on the scraped data, either `all` or `on_change`.
18 18
 
19
+      `url` can be a single url, or an array of urls (for example, for multiple pages with the exact same structure but different content to scrape)
20
+
19 21
       The `type` value can be `xml`, `html`, or `json`.
20 22
 
21 23
       To tell the Agent how to parse the content, specify `extract` as a hash with keys naming the extractions and values of hashes.
@@ -107,85 +109,97 @@ module Agents
107 109
       log "Fetching #{options['url']}"
108 110
       request_opts = { :followlocation => true }
109 111
       request_opts[:userpwd] = options['basic_auth'] if options['basic_auth'].present?
110
-      request = Typhoeus::Request.new(options['url'], request_opts)
111 112
 
112
-      request.on_failure do |response|
113
-        error "Failed: #{response.inspect}"
113
+      requests = []
114
+
115
+      if options['url'].kind_of?(Array)
116
+        options['url'].each do |url|
117
+           requests.push(Typhoeus::Request.new(url, request_opts))
118
+        end
119
+      else
120
+        requests.push(Typhoeus::Request.new(options['url'], request_opts))
114 121
       end
115 122
 
116
-      request.on_success do |response|
117
-        body = response.body
118
-        if (encoding = options['force_encoding']).present?
119
-          body = body.encode(Encoding::UTF_8, encoding)
123
+      requests.each do |request|
124
+        request.on_failure do |response|
125
+          error "Failed: #{response.inspect}"
120 126
         end
121
-        doc = parse(body)
122 127
 
123
-        if extract_full_json?
124
-          if store_payload!(previous_payloads(1), doc)
125
-            log "Storing new result for '#{name}': #{doc.inspect}"
126
-            create_event :payload => doc
128
+        request.on_success do |response|
129
+          body = response.body
130
+          if (encoding = options['force_encoding']).present?
131
+            body = body.encode(Encoding::UTF_8, encoding)
127 132
           end
128
-        else
129
-          output = {}
130
-          options['extract'].each do |name, extraction_details|
131
-            if extraction_type == "json"
132
-              result = Utils.values_at(doc, extraction_details['path'])
133
-              log "Extracting #{extraction_type} at #{extraction_details['path']}: #{result}"
134
-            else
135
-              case
136
-              when css = extraction_details['css']
137
-                nodes = doc.css(css)
138
-              when xpath = extraction_details['xpath']
139
-                nodes = doc.xpath(xpath)
133
+          doc = parse(body)
134
+
135
+          if extract_full_json?
136
+            if store_payload!(previous_payloads(1), doc)
137
+              log "Storing new result for '#{name}': #{doc.inspect}"
138
+              create_event :payload => doc
139
+            end
140
+          else
141
+            output = {}
142
+            options['extract'].each do |name, extraction_details|
143
+              if extraction_type == "json"
144
+                result = Utils.values_at(doc, extraction_details['path'])
145
+                log "Extracting #{extraction_type} at #{extraction_details['path']}: #{result}"
140 146
               else
141
-                error "'css' or 'xpath' is required for HTML or XML extraction"
142
-                return
143
-              end
144
-              unless Nokogiri::XML::NodeSet === nodes
145
-                error "The result of HTML/XML extraction was not a NodeSet"
146
-                return
147
-              end
148
-              result = nodes.map { |node|
149
-                if extraction_details['attr']
150
-                  node.attr(extraction_details['attr'])
151
-                elsif extraction_details['text']
152
-                  node.text()
147
+                case
148
+                when css = extraction_details['css']
149
+                  nodes = doc.css(css)
150
+                when xpath = extraction_details['xpath']
151
+                  nodes = doc.xpath(xpath)
153 152
                 else
154
-                  error "'attr' or 'text' is required on HTML or XML extraction patterns"
153
+                  error "'css' or 'xpath' is required for HTML or XML extraction"
155 154
                   return
156 155
                 end
157
-              }
158
-              log "Extracting #{extraction_type} at #{xpath || css}: #{result}"
156
+                unless Nokogiri::XML::NodeSet === nodes
157
+                  error "The result of HTML/XML extraction was not a NodeSet"
158
+                  return
159
+                end
160
+                result = nodes.map { |node|
161
+                  if extraction_details['attr']
162
+                    node.attr(extraction_details['attr'])
163
+                  elsif extraction_details['text']
164
+                    node.text()
165
+                  else
166
+                    error "'attr' or 'text' is required on HTML or XML extraction patterns"
167
+                    return
168
+                  end
169
+                }
170
+                log "Extracting #{extraction_type} at #{xpath || css}: #{result}"
171
+              end
172
+              output[name] = result
159 173
             end
160
-            output[name] = result
161
-          end
162 174
 
163
-          num_unique_lengths = options['extract'].keys.map { |name| output[name].length }.uniq
175
+            num_unique_lengths = options['extract'].keys.map { |name| output[name].length }.uniq
164 176
 
165
-          if num_unique_lengths.length != 1
166
-            error "Got an uneven number of matches for #{options['name']}: #{options['extract'].inspect}"
167
-            return
168
-          end
169
-      
170
-          old_events = previous_payloads num_unique_lengths.first
171
-          num_unique_lengths.first.times do |index|
172
-            result = {}
173
-            options['extract'].keys.each do |name|
174
-              result[name] = output[name][index]
175
-              if name.to_s == 'url'
176
-                result[name] = URI.join(options['url'], result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil?
177
-              end
177
+            if num_unique_lengths.length != 1
178
+              error "Got an uneven number of matches for #{options['name']}: #{options['extract'].inspect}"
179
+              return
178 180
             end
181
+        
182
+            old_events = previous_payloads num_unique_lengths.first
183
+            num_unique_lengths.first.times do |index|
184
+              result = {}
185
+              options['extract'].keys.each do |name|
186
+                result[name] = output[name][index]
187
+                if name.to_s == 'url'
188
+                  result[name] = URI.join(options['url'], result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil?
189
+                end
190
+              end
179 191
 
180
-            if store_payload!(old_events, result)
181
-              log "Storing new parsed result for '#{name}': #{result.inspect}"
182
-              create_event :payload => result
192
+              if store_payload!(old_events, result)
193
+                log "Storing new parsed result for '#{name}': #{result.inspect}"
194
+                create_event :payload => result
195
+              end
183 196
             end
184 197
           end
185 198
         end
199
+
200
+        hydra.queue request
201
+        hydra.run
186 202
       end
187
-      hydra.queue request
188
-      hydra.run
189 203
     end
190 204
 
191 205
     private

+ 4 - 4
deployment/site-cookbooks/huginn_development/recipes/default.rb

@@ -16,7 +16,7 @@ group "huginn" do
16 16
   action :create
17 17
 end
18 18
 
19
-%w("ruby1.9.1" "ruby1.9.1-dev" "libxslt-dev" "libxml2-dev" "curl").each do |pkg|
19
+%w("ruby1.9.1" "ruby1.9.1-dev" "libxslt-dev" "libxml2-dev" "curl" "libmysqlclient-dev").each do |pkg|
20 20
   package pkg do
21 21
     action :install
22 22
   end
@@ -49,9 +49,9 @@ bash "huginn dependencies" do
49 49
     export LC_ALL="en_US.UTF-8"
50 50
     sudo bundle install
51 51
     sed s/REPLACE_ME_NOW\!/$(sudo rake secret)/ .env.example > .env
52
-    sudo rake db:create
53
-    sudo rake db:migrate
54
-    sudo rake db:seed
52
+    sudo bundle exec rake db:create
53
+    sudo bundle exec rake db:migrate
54
+    sudo bundle exec rake db:seed
55 55
     EOH
56 56
 end
57 57
 

+ 4 - 4
deployment/site-cookbooks/huginn_production/recipes/default.rb

@@ -14,7 +14,7 @@ group "huginn" do
14 14
   members ["huginn"]
15 15
 end
16 16
 
17
-%w("ruby1.9.1" "ruby1.9.1-dev" "libxslt-dev" "libxml2-dev" "curl" "libshadow-ruby1.8").each do |pkg|
17
+%w("ruby1.9.1" "ruby1.9.1-dev" "libxslt-dev" "libxml2-dev" "curl" "libshadow-ruby1.8" "libmysqlclient-dev").each do |pkg|
18 18
   package("#{pkg}")
19 19
 end
20 20
 
@@ -84,9 +84,9 @@ deploy "/home/huginn" do
84 84
       sudo cp /home/huginn/shared/config/nginx.conf /etc/nginx/ 
85 85
       sudo bundle install
86 86
       sed -i s/REPLACE_ME_NOW\!/$(sudo rake secret)/ .env
87
-      sudo rake db:create
88
-      sudo rake db:migrate
89
-      sudo rake db:seed
87
+      sudo bundle exec rake db:create
88
+      sudo bundle exec rake db:migrate
89
+      sudo bundle exec rake db:seed
90 90
       sudo foreman export upstart /etc/init -a huginn -u huginn -l log
91 91
       sudo start huginn
92 92
       EOH

+ 2 - 2
lib/utils.rb

@@ -56,7 +56,7 @@ module Utils
56 56
       escape = false
57 57
     end
58 58
 
59
-    result = JsonPath.new(path, :allow_eval => false).on(data.is_a?(String) ? data : data.to_json)
59
+    result = JsonPath.new(path, :allow_eval => ENV['ALLOW_JSONPATH_EVAL'] == "true").on(data.is_a?(String) ? data : data.to_json)
60 60
     if escape
61 61
       result.map {|r| CGI::escape r }
62 62
     else
@@ -79,4 +79,4 @@ module Utils
79 79
   def self.pretty_jsonify(thing)
80 80
     JSON.pretty_generate(thing).gsub('</', '<\/')
81 81
   end
82
-end
82
+end

+ 24 - 0
spec/models/agents/website_agent_spec.rb

@@ -91,6 +91,30 @@ describe Agents::WebsiteAgent do
91 91
         @checker.check
92 92
         @checker.logs.first.message.should =~ /Got an uneven number of matches/
93 93
       end
94
+
95
+      it "should accept an array for url" do
96
+        @site['url'] = ["http://xkcd.com/1/", "http://xkcd.com/2/"]
97
+        @checker.options = @site
98
+        lambda { @checker.save! }.should_not raise_error;
99
+        lambda { @checker.check }.should_not raise_error;
100
+      end
101
+
102
+      it "should parse events from all urls in array" do
103
+        lambda {
104
+          @site['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
105
+          @site['mode'] = 'all'
106
+          @checker.options = @site
107
+          @checker.check
108
+        }.should change { Event.count }.by(2)
109
+      end
110
+
111
+      it "should follow unique rules when parsing array of urls" do
112
+        lambda {
113
+          @site['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
114
+          @checker.options = @site
115
+          @checker.check
116
+        }.should change { Event.count }.by(1)
117
+      end
94 118
     end
95 119
 
96 120
     describe 'encoding' do