| @@ -78,6 +78,14 @@ AWS_ACCESS_KEY="your aws access key" | ||
| 78 | 78 | # Set AWS_SANDBOX to true if you're developing Huginn code. | 
| 79 | 79 | AWS_SANDBOX=false | 
| 80 | 80 |  | 
| 81 | +######################## | |
| 82 | +# Various Settings # | |
| 83 | +######################## | |
| 84 | + | |
| 85 | +# Allow JSONPath eval expresions. i.e., $..price[?(@ < 20)] | |
| 86 | +# You should not allow this on a shared Huginn box because it is not secure. | |
| 87 | +ALLOW_JSONPATH_EVAL=false | |
| 88 | + | |
| 81 | 89 | # Use Graphviz for generating diagrams instead of using Google Chart | 
| 82 | 90 | # Tools. Specify a dot(1) command path built with SVG support | 
| 83 | 91 | # enabled. | 
| @@ -10,7 +10,7 @@ gem 'bootstrap-kaminari-views', '~> 0.0.2' | ||
| 10 | 10 | gem 'rufus-scheduler', '~> 3.0.7', require: false | 
| 11 | 11 | gem 'json', '~> 1.8.1' | 
| 12 | 12 | gem 'jsonpath', '~> 0.5.3' | 
| 13 | -gem 'twilio-ruby', '~> 3.10.0' | |
| 13 | +gem 'twilio-ruby', '~> 3.11.5' | |
| 14 | 14 | gem 'ruby-growl', '~> 4.1.0' | 
| 15 | 15 |  | 
| 16 | 16 | gem 'delayed_job', '~> 4.0.0' | 
| @@ -40,7 +40,7 @@ gem 'nokogiri', '~> 1.6.1' | ||
| 40 | 40 |  | 
| 41 | 41 | gem 'wunderground', '~> 1.2.0' | 
| 42 | 42 | gem 'forecast_io', '~> 2.0.0' | 
| 43 | -gem 'rturk', '~> 2.11.0' | |
| 43 | +gem 'rturk', '~> 2.12.1' | |
| 44 | 44 |  | 
| 45 | 45 | gem 'twitter', '~> 5.7.1' | 
| 46 | 46 | gem 'twitter-stream', github: 'cantino/twitter-stream', branch: 'master' | 
| @@ -215,7 +215,7 @@ GEM | ||
| 215 | 215 | rspec-core (~> 2.14.0) | 
| 216 | 216 | rspec-expectations (~> 2.14.0) | 
| 217 | 217 | rspec-mocks (~> 2.14.0) | 
| 218 | - rturk (2.11.3) | |
| 218 | + rturk (2.12.1) | |
| 219 | 219 | erector | 
| 220 | 220 | nokogiri | 
| 221 | 221 | rest-client | 
| @@ -263,7 +263,7 @@ GEM | ||
| 263 | 263 | treetop (1.4.15) | 
| 264 | 264 | polyglot | 
| 265 | 265 | polyglot (>= 0.3.1) | 
| 266 | - twilio-ruby (3.10.1) | |
| 266 | + twilio-ruby (3.11.5) | |
| 267 | 267 | builder (>= 2.1.2) | 
| 268 | 268 | jwt (>= 0.1.2) | 
| 269 | 269 | multi_json (>= 1.3.0) | 
| @@ -289,7 +289,7 @@ GEM | ||
| 289 | 289 | macaddr (~> 1.0) | 
| 290 | 290 | warden (1.2.3) | 
| 291 | 291 | rack (>= 1.0) | 
| 292 | - webmock (1.13.0) | |
| 292 | + webmock (1.17.4) | |
| 293 | 293 | addressable (>= 2.2.7) | 
| 294 | 294 | crack (>= 0.3.2) | 
| 295 | 295 | weibo_2 (0.1.6) | 
| @@ -337,14 +337,14 @@ DEPENDENCIES | ||
| 337 | 337 | rr | 
| 338 | 338 | rspec | 
| 339 | 339 | rspec-rails | 
| 340 | - rturk (~> 2.11.0) | |
| 340 | + rturk (~> 2.12.1) | |
| 341 | 341 | ruby-growl (~> 4.1.0) | 
| 342 | 342 | rufus-scheduler (~> 3.0.7) | 
| 343 | 343 | sass-rails (~> 4.0.0) | 
| 344 | 344 | select2-rails (~> 3.5.4) | 
| 345 | 345 | shoulda-matchers | 
| 346 | 346 | therubyracer (~> 0.12.1) | 
| 347 | - twilio-ruby (~> 3.10.0) | |
| 347 | + twilio-ruby (~> 3.11.5) | |
| 348 | 348 | twitter (~> 5.7.1) | 
| 349 | 349 | twitter-stream! | 
| 350 | 350 | typhoeus (~> 0.6.3) | 
| @@ -24,7 +24,7 @@ Follow [@tectonic](https://twitter.com/tectonic) for updates as Huginn evolves, | ||
| 24 | 24 |  | 
| 25 | 25 | ### We need your help! | 
| 26 | 26 |  | 
| 27 | -Want to help with Huginn? Try tackling [issues tagged with #help-wanted](https://github.com/cantino/huginn/issues?direction=desc&labels=help-wanted&page=1&sort=created&state=open). | |
| 27 | +Want to help with Huginn? All contributions are encouraged! You could make UI improvements, add new Agents, write documentation and tutorials, or try tackling [issues tagged with #help-wanted](https://github.com/cantino/huginn/issues?direction=desc&labels=help-wanted&page=1&sort=created&state=open). | |
| 28 | 28 |  | 
| 29 | 29 | ## Examples | 
| 30 | 30 |  | 
| @@ -7,17 +7,16 @@ module Agents | ||
| 7 | 7 | cannot_create_events! | 
| 8 | 8 |  | 
| 9 | 9 | description <<-MD | 
| 10 | - The TwilioAgent receives and collects events and sends them via text message or gives you a call when scheduled. | |
| 10 | + The TwilioAgent receives and collects events and sends them via text message (up to 160 characters) or gives you a call when scheduled. | |
| 11 | 11 |  | 
| 12 | - It is assumed that events have a `message`, `text`, or `sms` key, the value of which is sent as the content of the text message/call. You can use Event Formatting Agent if your event does not provide these keys. | |
| 12 | + It is assumed that events have a `message`, `text`, or `sms` key, the value of which is sent as the content of the text message/call. You can use the EventFormattingAgent if your event does not provide these keys. | |
| 13 | 13 |  | 
| 14 | 14 | Set `receiver_cell` to the number to receive text messages/call and `sender_cell` to the number sending them. | 
| 15 | 15 |  | 
| 16 | 16 | `expected_receive_period_in_days` is maximum number of days that you would expect to pass between events being received by this agent. | 
| 17 | 17 |  | 
| 18 | - If you would like to receive calls, then set `receive_call` to true. `server_url` needs to be | |
| 19 | - filled only if you are making calls. Dont forget to include http/https in `server_url`. | |
| 20 | - | |
| 18 | + If you would like to receive calls, set `receive_call` to `true`. In this case, `server_url` must be set to the URL of your | |
| 19 | +      Huginn installation (probably "https://#{ENV['DOMAIN']}"), which must be web-accessible.  Be sure to set http/https correctly. | |
| 21 | 20 | MD | 
| 22 | 21 |  | 
| 23 | 22 | def default_options | 
| @@ -43,13 +42,14 @@ module Agents | ||
| 43 | 42 | @client = Twilio::REST::Client.new options['account_sid'], options['auth_token'] | 
| 44 | 43 |        memory['pending_calls'] ||= {} | 
| 45 | 44 | incoming_events.each do |event| | 
| 46 | - message = (event.payload['message'] || event.payload['text'] || event.payload['sms']).to_s | |
| 47 | - if message != "" | |
| 45 | + message = (event.payload['message'].presence || event.payload['text'].presence || event.payload['sms'].presence).to_s | |
| 46 | + if message.present? | |
| 48 | 47 | if options['receive_call'].to_s == 'true' | 
| 49 | 48 | secret = SecureRandom.hex 3 | 
| 50 | 49 | memory['pending_calls'][secret] = message | 
| 51 | 50 | make_call secret | 
| 52 | 51 | end | 
| 52 | + | |
| 53 | 53 | if options['receive_text'].to_s == 'true' | 
| 54 | 54 | message = message.slice 0..160 | 
| 55 | 55 | send_message message | 
| @@ -71,11 +71,11 @@ module Agents | ||
| 71 | 71 | def make_call(secret) | 
| 72 | 72 | @client.account.calls.create :from => options['sender_cell'], | 
| 73 | 73 | :to => options['receiver_cell'], | 
| 74 | - :url => post_url(options['server_url'],secret) | |
| 74 | + :url => post_url(options['server_url'], secret) | |
| 75 | 75 | end | 
| 76 | 76 |  | 
| 77 | - def post_url(server_url,secret) | |
| 78 | -      "#{server_url}/users/#{self.user.id}/web_requests/#{self.id}/#{secret}" | |
| 77 | + def post_url(server_url, secret) | |
| 78 | +      "#{server_url}/users/#{user.id}/web_requests/#{id}/#{secret}" | |
| 79 | 79 | end | 
| 80 | 80 |  | 
| 81 | 81 | def receive_web_request(params, method, format) | 
| @@ -16,6 +16,8 @@ module Agents | ||
| 16 | 16 |  | 
| 17 | 17 | Specify a `url` and select a `mode` for when to create Events based on the scraped data, either `all` or `on_change`. | 
| 18 | 18 |  | 
| 19 | + `url` can be a single url, or an array of urls (for example, for multiple pages with the exact same structure but different content to scrape) | |
| 20 | + | |
| 19 | 21 | The `type` value can be `xml`, `html`, or `json`. | 
| 20 | 22 |  | 
| 21 | 23 | To tell the Agent how to parse the content, specify `extract` as a hash with keys naming the extractions and values of hashes. | 
| @@ -107,85 +109,97 @@ module Agents | ||
| 107 | 109 |        log "Fetching #{options['url']}" | 
| 108 | 110 |        request_opts = { :followlocation => true } | 
| 109 | 111 | request_opts[:userpwd] = options['basic_auth'] if options['basic_auth'].present? | 
| 110 | - request = Typhoeus::Request.new(options['url'], request_opts) | |
| 111 | 112 |  | 
| 112 | - request.on_failure do |response| | |
| 113 | -        error "Failed: #{response.inspect}" | |
| 113 | + requests = [] | |
| 114 | + | |
| 115 | + if options['url'].kind_of?(Array) | |
| 116 | + options['url'].each do |url| | |
| 117 | + requests.push(Typhoeus::Request.new(url, request_opts)) | |
| 118 | + end | |
| 119 | + else | |
| 120 | + requests.push(Typhoeus::Request.new(options['url'], request_opts)) | |
| 114 | 121 | end | 
| 115 | 122 |  | 
| 116 | - request.on_success do |response| | |
| 117 | - body = response.body | |
| 118 | - if (encoding = options['force_encoding']).present? | |
| 119 | - body = body.encode(Encoding::UTF_8, encoding) | |
| 123 | + requests.each do |request| | |
| 124 | + request.on_failure do |response| | |
| 125 | +          error "Failed: #{response.inspect}" | |
| 120 | 126 | end | 
| 121 | - doc = parse(body) | |
| 122 | 127 |  | 
| 123 | - if extract_full_json? | |
| 124 | - if store_payload!(previous_payloads(1), doc) | |
| 125 | -            log "Storing new result for '#{name}': #{doc.inspect}" | |
| 126 | - create_event :payload => doc | |
| 128 | + request.on_success do |response| | |
| 129 | + body = response.body | |
| 130 | + if (encoding = options['force_encoding']).present? | |
| 131 | + body = body.encode(Encoding::UTF_8, encoding) | |
| 127 | 132 | end | 
| 128 | - else | |
| 129 | -          output = {} | |
| 130 | - options['extract'].each do |name, extraction_details| | |
| 131 | - if extraction_type == "json" | |
| 132 | - result = Utils.values_at(doc, extraction_details['path']) | |
| 133 | -              log "Extracting #{extraction_type} at #{extraction_details['path']}: #{result}" | |
| 134 | - else | |
| 135 | - case | |
| 136 | - when css = extraction_details['css'] | |
| 137 | - nodes = doc.css(css) | |
| 138 | - when xpath = extraction_details['xpath'] | |
| 139 | - nodes = doc.xpath(xpath) | |
| 133 | + doc = parse(body) | |
| 134 | + | |
| 135 | + if extract_full_json? | |
| 136 | + if store_payload!(previous_payloads(1), doc) | |
| 137 | +              log "Storing new result for '#{name}': #{doc.inspect}" | |
| 138 | + create_event :payload => doc | |
| 139 | + end | |
| 140 | + else | |
| 141 | +            output = {} | |
| 142 | + options['extract'].each do |name, extraction_details| | |
| 143 | + if extraction_type == "json" | |
| 144 | + result = Utils.values_at(doc, extraction_details['path']) | |
| 145 | +                log "Extracting #{extraction_type} at #{extraction_details['path']}: #{result}" | |
| 140 | 146 | else | 
| 141 | - error "'css' or 'xpath' is required for HTML or XML extraction" | |
| 142 | - return | |
| 143 | - end | |
| 144 | - unless Nokogiri::XML::NodeSet === nodes | |
| 145 | - error "The result of HTML/XML extraction was not a NodeSet" | |
| 146 | - return | |
| 147 | - end | |
| 148 | -              result = nodes.map { |node| | |
| 149 | - if extraction_details['attr'] | |
| 150 | - node.attr(extraction_details['attr']) | |
| 151 | - elsif extraction_details['text'] | |
| 152 | - node.text() | |
| 147 | + case | |
| 148 | + when css = extraction_details['css'] | |
| 149 | + nodes = doc.css(css) | |
| 150 | + when xpath = extraction_details['xpath'] | |
| 151 | + nodes = doc.xpath(xpath) | |
| 153 | 152 | else | 
| 154 | - error "'attr' or 'text' is required on HTML or XML extraction patterns" | |
| 153 | + error "'css' or 'xpath' is required for HTML or XML extraction" | |
| 155 | 154 | return | 
| 156 | 155 | end | 
| 157 | - } | |
| 158 | -              log "Extracting #{extraction_type} at #{xpath || css}: #{result}" | |
| 156 | + unless Nokogiri::XML::NodeSet === nodes | |
| 157 | + error "The result of HTML/XML extraction was not a NodeSet" | |
| 158 | + return | |
| 159 | + end | |
| 160 | +                result = nodes.map { |node| | |
| 161 | + if extraction_details['attr'] | |
| 162 | + node.attr(extraction_details['attr']) | |
| 163 | + elsif extraction_details['text'] | |
| 164 | + node.text() | |
| 165 | + else | |
| 166 | + error "'attr' or 'text' is required on HTML or XML extraction patterns" | |
| 167 | + return | |
| 168 | + end | |
| 169 | + } | |
| 170 | +                log "Extracting #{extraction_type} at #{xpath || css}: #{result}" | |
| 171 | + end | |
| 172 | + output[name] = result | |
| 159 | 173 | end | 
| 160 | - output[name] = result | |
| 161 | - end | |
| 162 | 174 |  | 
| 163 | -          num_unique_lengths = options['extract'].keys.map { |name| output[name].length }.uniq | |
| 175 | +            num_unique_lengths = options['extract'].keys.map { |name| output[name].length }.uniq | |
| 164 | 176 |  | 
| 165 | - if num_unique_lengths.length != 1 | |
| 166 | -            error "Got an uneven number of matches for #{options['name']}: #{options['extract'].inspect}" | |
| 167 | - return | |
| 168 | - end | |
| 169 | - | |
| 170 | - old_events = previous_payloads num_unique_lengths.first | |
| 171 | - num_unique_lengths.first.times do |index| | |
| 172 | -            result = {} | |
| 173 | - options['extract'].keys.each do |name| | |
| 174 | - result[name] = output[name][index] | |
| 175 | - if name.to_s == 'url' | |
| 176 | - result[name] = URI.join(options['url'], result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil? | |
| 177 | - end | |
| 177 | + if num_unique_lengths.length != 1 | |
| 178 | +              error "Got an uneven number of matches for #{options['name']}: #{options['extract'].inspect}" | |
| 179 | + return | |
| 178 | 180 | end | 
| 181 | + | |
| 182 | + old_events = previous_payloads num_unique_lengths.first | |
| 183 | + num_unique_lengths.first.times do |index| | |
| 184 | +              result = {} | |
| 185 | + options['extract'].keys.each do |name| | |
| 186 | + result[name] = output[name][index] | |
| 187 | + if name.to_s == 'url' | |
| 188 | + result[name] = URI.join(options['url'], result[name]).to_s if (result[name] =~ URI::DEFAULT_PARSER.regexp[:ABS_URI]).nil? | |
| 189 | + end | |
| 190 | + end | |
| 179 | 191 |  | 
| 180 | - if store_payload!(old_events, result) | |
| 181 | -              log "Storing new parsed result for '#{name}': #{result.inspect}" | |
| 182 | - create_event :payload => result | |
| 192 | + if store_payload!(old_events, result) | |
| 193 | +                log "Storing new parsed result for '#{name}': #{result.inspect}" | |
| 194 | + create_event :payload => result | |
| 195 | + end | |
| 183 | 196 | end | 
| 184 | 197 | end | 
| 185 | 198 | end | 
| 199 | + | |
| 200 | + hydra.queue request | |
| 201 | + hydra.run | |
| 186 | 202 | end | 
| 187 | - hydra.queue request | |
| 188 | - hydra.run | |
| 189 | 203 | end | 
| 190 | 204 |  | 
| 191 | 205 | private | 
| @@ -16,7 +16,7 @@ group "huginn" do | ||
| 16 | 16 | action :create | 
| 17 | 17 | end | 
| 18 | 18 |  | 
| 19 | -%w("ruby1.9.1" "ruby1.9.1-dev" "libxslt-dev" "libxml2-dev" "curl").each do |pkg| | |
| 19 | +%w("ruby1.9.1" "ruby1.9.1-dev" "libxslt-dev" "libxml2-dev" "curl" "libmysqlclient-dev").each do |pkg| | |
| 20 | 20 | package pkg do | 
| 21 | 21 | action :install | 
| 22 | 22 | end | 
| @@ -49,9 +49,9 @@ bash "huginn dependencies" do | ||
| 49 | 49 | export LC_ALL="en_US.UTF-8" | 
| 50 | 50 | sudo bundle install | 
| 51 | 51 | sed s/REPLACE_ME_NOW\!/$(sudo rake secret)/ .env.example > .env | 
| 52 | - sudo rake db:create | |
| 53 | - sudo rake db:migrate | |
| 54 | - sudo rake db:seed | |
| 52 | + sudo bundle exec rake db:create | |
| 53 | + sudo bundle exec rake db:migrate | |
| 54 | + sudo bundle exec rake db:seed | |
| 55 | 55 | EOH | 
| 56 | 56 | end | 
| 57 | 57 |  | 
| @@ -14,7 +14,7 @@ group "huginn" do | ||
| 14 | 14 | members ["huginn"] | 
| 15 | 15 | end | 
| 16 | 16 |  | 
| 17 | -%w("ruby1.9.1" "ruby1.9.1-dev" "libxslt-dev" "libxml2-dev" "curl" "libshadow-ruby1.8").each do |pkg| | |
| 17 | +%w("ruby1.9.1" "ruby1.9.1-dev" "libxslt-dev" "libxml2-dev" "curl" "libshadow-ruby1.8" "libmysqlclient-dev").each do |pkg| | |
| 18 | 18 |    package("#{pkg}") | 
| 19 | 19 | end | 
| 20 | 20 |  | 
| @@ -84,9 +84,9 @@ deploy "/home/huginn" do | ||
| 84 | 84 | sudo cp /home/huginn/shared/config/nginx.conf /etc/nginx/ | 
| 85 | 85 | sudo bundle install | 
| 86 | 86 | sed -i s/REPLACE_ME_NOW\!/$(sudo rake secret)/ .env | 
| 87 | - sudo rake db:create | |
| 88 | - sudo rake db:migrate | |
| 89 | - sudo rake db:seed | |
| 87 | + sudo bundle exec rake db:create | |
| 88 | + sudo bundle exec rake db:migrate | |
| 89 | + sudo bundle exec rake db:seed | |
| 90 | 90 | sudo foreman export upstart /etc/init -a huginn -u huginn -l log | 
| 91 | 91 | sudo start huginn | 
| 92 | 92 | EOH | 
| @@ -56,7 +56,7 @@ module Utils | ||
| 56 | 56 | escape = false | 
| 57 | 57 | end | 
| 58 | 58 |  | 
| 59 | - result = JsonPath.new(path, :allow_eval => false).on(data.is_a?(String) ? data : data.to_json) | |
| 59 | + result = JsonPath.new(path, :allow_eval => ENV['ALLOW_JSONPATH_EVAL'] == "true").on(data.is_a?(String) ? data : data.to_json) | |
| 60 | 60 | if escape | 
| 61 | 61 |        result.map {|r| CGI::escape r } | 
| 62 | 62 | else | 
| @@ -79,4 +79,4 @@ module Utils | ||
| 79 | 79 | def self.pretty_jsonify(thing) | 
| 80 | 80 |      JSON.pretty_generate(thing).gsub('</', '<\/') | 
| 81 | 81 | end | 
| 82 | -end | |
| 82 | +end | 
| @@ -91,6 +91,30 @@ describe Agents::WebsiteAgent do | ||
| 91 | 91 | @checker.check | 
| 92 | 92 | @checker.logs.first.message.should =~ /Got an uneven number of matches/ | 
| 93 | 93 | end | 
| 94 | + | |
| 95 | + it "should accept an array for url" do | |
| 96 | + @site['url'] = ["http://xkcd.com/1/", "http://xkcd.com/2/"] | |
| 97 | + @checker.options = @site | |
| 98 | +        lambda { @checker.save! }.should_not raise_error; | |
| 99 | +        lambda { @checker.check }.should_not raise_error; | |
| 100 | + end | |
| 101 | + | |
| 102 | + it "should parse events from all urls in array" do | |
| 103 | +        lambda { | |
| 104 | + @site['url'] = ["http://xkcd.com/", "http://xkcd.com/"] | |
| 105 | + @site['mode'] = 'all' | |
| 106 | + @checker.options = @site | |
| 107 | + @checker.check | |
| 108 | +        }.should change { Event.count }.by(2) | |
| 109 | + end | |
| 110 | + | |
| 111 | + it "should follow unique rules when parsing array of urls" do | |
| 112 | +        lambda { | |
| 113 | + @site['url'] = ["http://xkcd.com/", "http://xkcd.com/"] | |
| 114 | + @checker.options = @site | |
| 115 | + @checker.check | |
| 116 | +        }.should change { Event.count }.by(1) | |
| 117 | + end | |
| 94 | 118 | end | 
| 95 | 119 |  | 
| 96 | 120 | describe 'encoding' do |