@@ -78,6 +78,14 @@ AWS_ACCESS_KEY="your aws access key" |
||
78 | 78 |
# Set AWS_SANDBOX to true if you're developing Huginn code. |
79 | 79 |
AWS_SANDBOX=false |
80 | 80 |
|
81 |
+######################## |
|
82 |
+# Various Settings # |
|
83 |
+######################## |
|
84 |
+ |
|
85 |
+# Allow JSONPath eval expresions. i.e., $..price[?(@ < 20)] |
|
86 |
+# You should not allow this on a shared Huginn box because it is not secure. |
|
87 |
+ALLOW_JSONPATH_EVAL=false |
|
88 |
+ |
|
81 | 89 |
# Use Graphviz for generating diagrams instead of using Google Chart |
82 | 90 |
# Tools. Specify a dot(1) command path built with SVG support |
83 | 91 |
# enabled. |
@@ -10,7 +10,7 @@ gem 'bootstrap-kaminari-views', '~> 0.0.2' |
||
10 | 10 |
gem 'rufus-scheduler', '~> 3.0.7', require: false |
11 | 11 |
gem 'json', '~> 1.8.1' |
12 | 12 |
gem 'jsonpath', '~> 0.5.3' |
13 |
-gem 'twilio-ruby', '~> 3.10.0' |
|
13 |
+gem 'twilio-ruby', '~> 3.11.5' |
|
14 | 14 |
gem 'ruby-growl', '~> 4.1.0' |
15 | 15 |
|
16 | 16 |
gem 'delayed_job', '~> 4.0.0' |
@@ -40,7 +40,7 @@ gem 'nokogiri', '~> 1.6.1' |
||
40 | 40 |
|
41 | 41 |
gem 'wunderground', '~> 1.2.0' |
42 | 42 |
gem 'forecast_io', '~> 2.0.0' |
43 |
-gem 'rturk', '~> 2.11.0' |
|
43 |
+gem 'rturk', '~> 2.12.1' |
|
44 | 44 |
|
45 | 45 |
gem 'twitter', '~> 5.7.1' |
46 | 46 |
gem 'twitter-stream', github: 'cantino/twitter-stream', branch: 'master' |
@@ -215,7 +215,7 @@ GEM |
||
215 | 215 |
rspec-core (~> 2.14.0) |
216 | 216 |
rspec-expectations (~> 2.14.0) |
217 | 217 |
rspec-mocks (~> 2.14.0) |
218 |
- rturk (2.11.3) |
|
218 |
+ rturk (2.12.1) |
|
219 | 219 |
erector |
220 | 220 |
nokogiri |
221 | 221 |
rest-client |
@@ -263,7 +263,7 @@ GEM |
||
263 | 263 |
treetop (1.4.15) |
264 | 264 |
polyglot |
265 | 265 |
polyglot (>= 0.3.1) |
266 |
- twilio-ruby (3.10.1) |
|
266 |
+ twilio-ruby (3.11.5) |
|
267 | 267 |
builder (>= 2.1.2) |
268 | 268 |
jwt (>= 0.1.2) |
269 | 269 |
multi_json (>= 1.3.0) |
@@ -289,7 +289,7 @@ GEM |
||
289 | 289 |
macaddr (~> 1.0) |
290 | 290 |
warden (1.2.3) |
291 | 291 |
rack (>= 1.0) |
292 |
- webmock (1.13.0) |
|
292 |
+ webmock (1.17.4) |
|
293 | 293 |
addressable (>= 2.2.7) |
294 | 294 |
crack (>= 0.3.2) |
295 | 295 |
weibo_2 (0.1.6) |
@@ -337,14 +337,14 @@ DEPENDENCIES |
||
337 | 337 |
rr |
338 | 338 |
rspec |
339 | 339 |
rspec-rails |
340 |
- rturk (~> 2.11.0) |
|
340 |
+ rturk (~> 2.12.1) |
|
341 | 341 |
ruby-growl (~> 4.1.0) |
342 | 342 |
rufus-scheduler (~> 3.0.7) |
343 | 343 |
sass-rails (~> 4.0.0) |
344 | 344 |
select2-rails (~> 3.5.4) |
345 | 345 |
shoulda-matchers |
346 | 346 |
therubyracer (~> 0.12.1) |
347 |
- twilio-ruby (~> 3.10.0) |
|
347 |
+ twilio-ruby (~> 3.11.5) |
|
348 | 348 |
twitter (~> 5.7.1) |
349 | 349 |
twitter-stream! |
350 | 350 |
typhoeus (~> 0.6.3) |
@@ -24,7 +24,7 @@ Follow [@tectonic](https://twitter.com/tectonic) for updates as Huginn evolves, |
||
24 | 24 |
|
25 | 25 |
### We need your help! |
26 | 26 |
|
27 |
-Want to help with Huginn? Try tackling [issues tagged with #help-wanted](https://github.com/cantino/huginn/issues?direction=desc&labels=help-wanted&page=1&sort=created&state=open). |
|
27 |
+Want to help with Huginn? All contributions are encouraged! You could make UI improvements, add new Agents, write documentation and tutorials, or try tackling [issues tagged with #help-wanted](https://github.com/cantino/huginn/issues?direction=desc&labels=help-wanted&page=1&sort=created&state=open). |
|
28 | 28 |
|
29 | 29 |
## Examples |
30 | 30 |
|
@@ -7,17 +7,16 @@ module Agents |
||
7 | 7 |
cannot_create_events! |
8 | 8 |
|
9 | 9 |
description <<-MD |
10 |
- The TwilioAgent receives and collects events and sends them via text message or gives you a call when scheduled. |
|
10 |
+ The TwilioAgent receives and collects events and sends them via text message (up to 160 characters) or gives you a call when scheduled. |
|
11 | 11 |
|
12 |
- It is assumed that events have a `message`, `text`, or `sms` key, the value of which is sent as the content of the text message/call. You can use Event Formatting Agent if your event does not provide these keys. |
|
12 |
+ It is assumed that events have a `message`, `text`, or `sms` key, the value of which is sent as the content of the text message/call. You can use the EventFormattingAgent if your event does not provide these keys. |
|
13 | 13 |
|
14 | 14 |
Set `receiver_cell` to the number to receive text messages/call and `sender_cell` to the number sending them. |
15 | 15 |
|
16 | 16 |
`expected_receive_period_in_days` is maximum number of days that you would expect to pass between events being received by this agent. |
17 | 17 |
|
18 |
- If you would like to receive calls, then set `receive_call` to true. `server_url` needs to be |
|
19 |
- filled only if you are making calls. Dont forget to include http/https in `server_url`. |
|
20 |
- |
|
18 |
+ If you would like to receive calls, set `receive_call` to `true`. In this case, `server_url` must be set to the URL of your |
|
19 |
+ Huginn installation (probably "https://#{ENV['DOMAIN']}"), which must be web-accessible. Be sure to set http/https correctly. |
|
21 | 20 |
MD |
22 | 21 |
|
23 | 22 |
def default_options |
@@ -43,13 +42,14 @@ module Agents |
||
43 | 42 |
@client = Twilio::REST::Client.new options['account_sid'], options['auth_token'] |
44 | 43 |
memory['pending_calls'] ||= {} |
45 | 44 |
incoming_events.each do |event| |
46 |
- message = (event.payload['message'] || event.payload['text'] || event.payload['sms']).to_s |
|
47 |
- if message != "" |
|
45 |
+ message = (event.payload['message'].presence || event.payload['text'].presence || event.payload['sms'].presence).to_s |
|
46 |
+ if message.present? |
|
48 | 47 |
if options['receive_call'].to_s == 'true' |
49 | 48 |
secret = SecureRandom.hex 3 |
50 | 49 |
memory['pending_calls'][secret] = message |
51 | 50 |
make_call secret |
52 | 51 |
end |
52 |
+ |
|
53 | 53 |
if options['receive_text'].to_s == 'true' |
54 | 54 |
message = message.slice 0..160 |
55 | 55 |
send_message message |
@@ -71,11 +71,11 @@ module Agents |
||
71 | 71 |
def make_call(secret) |
72 | 72 |
@client.account.calls.create :from => options['sender_cell'], |
73 | 73 |
:to => options['receiver_cell'], |
74 |
- :url => post_url(options['server_url'],secret) |
|
74 |
+ :url => post_url(options['server_url'], secret) |
|
75 | 75 |
end |
76 | 76 |
|
77 |
- def post_url(server_url,secret) |
|
78 |
- "#{server_url}/users/#{self.user.id}/web_requests/#{self.id}/#{secret}" |
|
77 |
+ def post_url(server_url, secret) |
|
78 |
+ "#{server_url}/users/#{user.id}/web_requests/#{id}/#{secret}" |
|
79 | 79 |
end |
80 | 80 |
|
81 | 81 |
def receive_web_request(params, method, format) |
@@ -16,6 +16,8 @@ module Agents |
||
16 | 16 |
|
17 | 17 |
Specify a `url` and select a `mode` for when to create Events based on the scraped data, either `all` or `on_change`. |
18 | 18 |
|
19 |
+ `url` can be a single url, or an array of urls (for example, for multiple pages with the exact same structure but different content to scrape) |
|
20 |
+ |
|
19 | 21 |
The `type` value can be `xml`, `html`, or `json`. |
20 | 22 |
|
21 | 23 |
To tell the Agent how to parse the content, specify `extract` as a hash with keys naming the extractions and values of hashes. |
@@ -107,85 +109,97 @@ module Agents |
||
107 | 109 |
log "Fetching #{options['url']}" |
108 | 110 |
request_opts = { :followlocation => true } |
109 | 111 |
request_opts[:userpwd] = options['basic_auth'] if options['basic_auth'].present? |
110 |
- request = Typhoeus::Request.new(options['url'], request_opts) |
|
111 | 112 |
|
112 |
- request.on_failure do |response| |
|
113 |
- error "Failed: #{response.inspect}" |
|
113 |
+ requests = [] |
|
114 |
+ |
|
115 |
+ if options['url'].kind_of?(Array) |
|
116 |
+ options['url'].each do |url| |
|
117 |
+ requests.push(Typhoeus::Request.new(url, request_opts)) |
|
118 |
+ end |
|
119 |
+ else |
|
120 |
+ requests.push(Typhoeus::Request.new(options['url'], request_opts)) |
|
114 | 121 |
end |
115 | 122 |
|
116 |
- request.on_success do |response| |
|
117 |
- body = response.body |
|
118 |
- if (encoding = options['force_encoding']).present? |
|
119 |
- body = body.encode(Encoding::UTF_8, encoding) |
|
123 |
+ requests.each do |request| |
|
124 |
+ request.on_failure do |response| |
|
125 |
+ error "Failed: #{response.inspect}" |
|
120 | 126 |
end |
121 |
- doc = parse(body) |
|
122 | 127 |
|
123 |
- if extract_full_json? |
|
124 |
- if store_payload!(previous_payloads(1), doc) |
|
125 |
- log "Storing new result for '#{name}': #{doc.inspect}" |
|
126 |
- create_event :payload => doc |
|
128 |
+ request.on_success do |response| |
|
129 |
+ body = response.body |
|
130 |
+ if (encoding = options['force_encoding']).present? |
|
131 |
+ body = body.encode(Encoding::UTF_8, encoding) |
|
127 | 132 |
end |
128 |
- else |
|
129 |
- output = {} |
|
130 |
- options['extract'].each do |name, extraction_details| |
|
131 |
- if extraction_type == "json" |
|
132 |
- result = Utils.values_at(doc, extraction_details['path']) |
|
133 |
- log "Extracting #{extraction_type} at #{extraction_details['path']}: #{result}" |
|
134 |
- else |
|
135 |
- case |
|
136 |
- when css = extraction_details['css'] |
|
137 |
- nodes = doc.css(css) |
|
138 |
- when xpath = extraction_details['xpath'] |
|
139 |
- nodes = doc.xpath(xpath) |
|
133 |
+ doc = parse(body) |
|
134 |
+ |
|
135 |
+ if extract_full_json? |
|
136 |
+ if store_payload!(previous_payloads(1), doc) |
|
137 |
+ log "Storing new result for '#{name}': #{doc.inspect}" |
|
138 |
+ create_event :payload => doc |
|
139 |
+ end |
|
140 |
+ else |
|
141 |
+ output = {} |
|
142 |
+ options['extract'].each do |name, extraction_details| |
|
143 |
+ if extraction_type == "json" |
|
144 |
+ result = Utils.values_at(doc, extraction_details['path']) |
|
145 |
+ log "Extracting #{extraction_type} at #{extraction_details['path']}: #{result}" |
|
140 | 146 |
else |
141 |
- error "'css' or 'xpath' is required for HTML or XML extraction" |
|
142 |
- return |
|
143 |
- end |
|
144 |
- unless Nokogiri::XML::NodeSet === nodes |
|
145 |
- error "The result of HTML/XML extraction was not a NodeSet" |
|
146 |
- return |
|
147 |
- end |
|
148 |
- result = nodes.map { |node| |
|
149 |
- if extraction_details['attr'] |
|
150 |
- node.attr(extraction_details['attr']) |
|
151 |
- elsif extraction_details['text'] |
|
152 |
- node.text() |
|
147 |
+ case |
|
148 |
+ when css = extraction_details['css'] |
|
149 |
+ nodes = doc.css(css) |
|
150 |
+ when xpath = extraction_details['xpath'] |
|
151 |
+ nodes = doc.xpath(xpath) |
|
153 | 152 |
else |
154 |
- error "'attr' or 'text' is required on HTML or XML extraction patterns" |
|
153 |
+ error "'css' or 'xpath' is required for HTML or XML extraction" |
|
155 | 154 |
return |
156 | 155 |
end |
157 |
- } |
|
158 |
- log "Extracting #{extraction_type} at #{xpath || css}: #{result}" |
|
156 |
+ unless Nokogiri::XML::NodeSet === nodes |
|
157 |
+ error "The result of HTML/XML extraction was not a NodeSet" |
|
158 |
+ return |
|
159 |
+ end |
|
160 |
+ result = nodes.map { |node| |
|
161 |
+ if extraction_details['attr'] |
|
162 |
+ node.attr(extraction_details['attr']) |
|
163 |
+ elsif extraction_details['text'] |
|
164 |
+ node.text() |
|
165 |
+ else |
|
166 |
+ error "'attr' or 'text' is required on HTML or XML extraction patterns" |
|
167 |
+ return |
|
168 |
+ end |
|
169 |
+ } |
|
170 |
+ log "Extracting #{extraction_type} at #{xpath || css}: #{result}" |
|
171 |
+ end |
|
172 |
+ output[name] = result |
|
159 | 173 |
end |
160 |
- output[name] = result |
|
161 |
- end |
|
162 | 174 |
|
163 |
- num_unique_lengths = options['extract'].keys.map { |name| output[name].length }.uniq |
|
175 |
+ num_unique_lengths = options['extract'].keys.map { |name| output[name].length }.uniq |
|
164 | 176 |
|
165 |
- if num_unique_lengths.length != 1 |
|
166 |
- error "Got an uneven number of matches for #{options['name']}: #{options['extract'].inspect}" |
|
167 |
- return |
|
168 |
- end |
|
169 |
- |
|
170 |
- old_events = previous_payloads num_unique_lengths.first |
|
171 |
- num_unique_lengths.first.times do |index| |
|
172 |
- result = {} |
|
173 |
- options['extract'].keys.each do |name| |
|
174 |
- result[name] = output[name][index] |
|
175 |
- if name.to_s == 'url' |
|
176 |
- result[name] = URI.join(options['url'], result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil? |
|
177 |
- end |
|
177 |
+ if num_unique_lengths.length != 1 |
|
178 |
+ error "Got an uneven number of matches for #{options['name']}: #{options['extract'].inspect}" |
|
179 |
+ return |
|
178 | 180 |
end |
181 |
+ |
|
182 |
+ old_events = previous_payloads num_unique_lengths.first |
|
183 |
+ num_unique_lengths.first.times do |index| |
|
184 |
+ result = {} |
|
185 |
+ options['extract'].keys.each do |name| |
|
186 |
+ result[name] = output[name][index] |
|
187 |
+ if name.to_s == 'url' |
|
188 |
+ result[name] = URI.join(options['url'], result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil? |
|
189 |
+ end |
|
190 |
+ end |
|
179 | 191 |
|
180 |
- if store_payload!(old_events, result) |
|
181 |
- log "Storing new parsed result for '#{name}': #{result.inspect}" |
|
182 |
- create_event :payload => result |
|
192 |
+ if store_payload!(old_events, result) |
|
193 |
+ log "Storing new parsed result for '#{name}': #{result.inspect}" |
|
194 |
+ create_event :payload => result |
|
195 |
+ end |
|
183 | 196 |
end |
184 | 197 |
end |
185 | 198 |
end |
199 |
+ |
|
200 |
+ hydra.queue request |
|
201 |
+ hydra.run |
|
186 | 202 |
end |
187 |
- hydra.queue request |
|
188 |
- hydra.run |
|
189 | 203 |
end |
190 | 204 |
|
191 | 205 |
private |
@@ -16,7 +16,7 @@ group "huginn" do |
||
16 | 16 |
action :create |
17 | 17 |
end |
18 | 18 |
|
19 |
-%w("ruby1.9.1" "ruby1.9.1-dev" "libxslt-dev" "libxml2-dev" "curl").each do |pkg| |
|
19 |
+%w("ruby1.9.1" "ruby1.9.1-dev" "libxslt-dev" "libxml2-dev" "curl" "libmysqlclient-dev").each do |pkg| |
|
20 | 20 |
package pkg do |
21 | 21 |
action :install |
22 | 22 |
end |
@@ -49,9 +49,9 @@ bash "huginn dependencies" do |
||
49 | 49 |
export LC_ALL="en_US.UTF-8" |
50 | 50 |
sudo bundle install |
51 | 51 |
sed s/REPLACE_ME_NOW\!/$(sudo rake secret)/ .env.example > .env |
52 |
- sudo rake db:create |
|
53 |
- sudo rake db:migrate |
|
54 |
- sudo rake db:seed |
|
52 |
+ sudo bundle exec rake db:create |
|
53 |
+ sudo bundle exec rake db:migrate |
|
54 |
+ sudo bundle exec rake db:seed |
|
55 | 55 |
EOH |
56 | 56 |
end |
57 | 57 |
|
@@ -14,7 +14,7 @@ group "huginn" do |
||
14 | 14 |
members ["huginn"] |
15 | 15 |
end |
16 | 16 |
|
17 |
-%w("ruby1.9.1" "ruby1.9.1-dev" "libxslt-dev" "libxml2-dev" "curl" "libshadow-ruby1.8").each do |pkg| |
|
17 |
+%w("ruby1.9.1" "ruby1.9.1-dev" "libxslt-dev" "libxml2-dev" "curl" "libshadow-ruby1.8" "libmysqlclient-dev").each do |pkg| |
|
18 | 18 |
package("#{pkg}") |
19 | 19 |
end |
20 | 20 |
|
@@ -84,9 +84,9 @@ deploy "/home/huginn" do |
||
84 | 84 |
sudo cp /home/huginn/shared/config/nginx.conf /etc/nginx/ |
85 | 85 |
sudo bundle install |
86 | 86 |
sed -i s/REPLACE_ME_NOW\!/$(sudo rake secret)/ .env |
87 |
- sudo rake db:create |
|
88 |
- sudo rake db:migrate |
|
89 |
- sudo rake db:seed |
|
87 |
+ sudo bundle exec rake db:create |
|
88 |
+ sudo bundle exec rake db:migrate |
|
89 |
+ sudo bundle exec rake db:seed |
|
90 | 90 |
sudo foreman export upstart /etc/init -a huginn -u huginn -l log |
91 | 91 |
sudo start huginn |
92 | 92 |
EOH |
@@ -56,7 +56,7 @@ module Utils |
||
56 | 56 |
escape = false |
57 | 57 |
end |
58 | 58 |
|
59 |
- result = JsonPath.new(path, :allow_eval => false).on(data.is_a?(String) ? data : data.to_json) |
|
59 |
+ result = JsonPath.new(path, :allow_eval => ENV['ALLOW_JSONPATH_EVAL'] == "true").on(data.is_a?(String) ? data : data.to_json) |
|
60 | 60 |
if escape |
61 | 61 |
result.map {|r| CGI::escape r } |
62 | 62 |
else |
@@ -79,4 +79,4 @@ module Utils |
||
79 | 79 |
def self.pretty_jsonify(thing) |
80 | 80 |
JSON.pretty_generate(thing).gsub('</', '<\/') |
81 | 81 |
end |
82 |
-end |
|
82 |
+end |
@@ -91,6 +91,30 @@ describe Agents::WebsiteAgent do |
||
91 | 91 |
@checker.check |
92 | 92 |
@checker.logs.first.message.should =~ /Got an uneven number of matches/ |
93 | 93 |
end |
94 |
+ |
|
95 |
+ it "should accept an array for url" do |
|
96 |
+ @site['url'] = ["http://xkcd.com/1/", "http://xkcd.com/2/"] |
|
97 |
+ @checker.options = @site |
|
98 |
+ lambda { @checker.save! }.should_not raise_error; |
|
99 |
+ lambda { @checker.check }.should_not raise_error; |
|
100 |
+ end |
|
101 |
+ |
|
102 |
+ it "should parse events from all urls in array" do |
|
103 |
+ lambda { |
|
104 |
+ @site['url'] = ["http://xkcd.com/", "http://xkcd.com/"] |
|
105 |
+ @site['mode'] = 'all' |
|
106 |
+ @checker.options = @site |
|
107 |
+ @checker.check |
|
108 |
+ }.should change { Event.count }.by(2) |
|
109 |
+ end |
|
110 |
+ |
|
111 |
+ it "should follow unique rules when parsing array of urls" do |
|
112 |
+ lambda { |
|
113 |
+ @site['url'] = ["http://xkcd.com/", "http://xkcd.com/"] |
|
114 |
+ @checker.options = @site |
|
115 |
+ @checker.check |
|
116 |
+ }.should change { Event.count }.by(1) |
|
117 |
+ end |
|
94 | 118 |
end |
95 | 119 |
|
96 | 120 |
describe 'encoding' do |