| @@ -47,6 +47,8 @@ module Agents | ||
| 47 | 47 |  | 
| 48 | 48 | Set `force_encoding` to an encoding name if the website does not return a Content-Type header with a proper charset. | 
| 49 | 49 |  | 
| 50 | +      Set `user_agent` to a custom User-Agent name if the website does not like the default value ("Faraday v#{Faraday::VERSION}"). | |
| 51 | + | |
| 50 | 52 | The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload. | 
| 51 | 53 | MD | 
| 52 | 54 |  | 
| @@ -105,6 +107,10 @@ module Agents | ||
| 105 | 107 | end | 
| 106 | 108 | end | 
| 107 | 109 |  | 
| 110 | + if options['user_agent'].present? | |
| 111 | + errors.add(:base, "user_agent must be a string") unless options['user_agent'].is_a?(String) | |
| 112 | + end | |
| 113 | + | |
| 108 | 114 | begin | 
| 109 | 115 | basic_auth_credentials() | 
| 110 | 116 | rescue => e | 
| @@ -281,6 +287,10 @@ module Agents | ||
| 281 | 287 |  | 
| 282 | 288 | def faraday | 
| 283 | 289 |        @faraday ||= Faraday.new { |builder| | 
| 290 | + if (user_agent = options['user_agent']).present? | |
| 291 | + builder.headers[:user_agent] = user_agent | |
| 292 | + end | |
| 293 | + | |
| 284 | 294 | builder.use FaradayMiddleware::FollowRedirects | 
| 285 | 295 | builder.request :url_encoded | 
| 286 | 296 | if userinfo = basic_auth_credentials() | 
| @@ -376,4 +376,35 @@ describe Agents::WebsiteAgent do | ||
| 376 | 376 | end | 
| 377 | 377 | end | 
| 378 | 378 | end | 
| 379 | + | |
| 380 | + describe "checking with User-Agent" do | |
| 381 | + before do | |
| 382 | + stub_request(:any, /example/). | |
| 383 | +        with(headers: { 'User-Agent' => 'Sushi' }). | |
| 384 | +        to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200) | |
| 385 | +      @site = { | |
| 386 | + 'name' => "XKCD", | |
| 387 | + 'expected_update_period_in_days' => 2, | |
| 388 | + 'type' => "html", | |
| 389 | + 'url' => "http://www.example.com", | |
| 390 | + 'mode' => 'on_change', | |
| 391 | +        'extract' => { | |
| 392 | +          'url' => { 'css' => "#comic img", 'attr' => "src" }, | |
| 393 | +          'title' => { 'css' => "#comic img", 'attr' => "alt" }, | |
| 394 | +          'hovertext' => { 'css' => "#comic img", 'attr' => "title" } | |
| 395 | + }, | |
| 396 | + 'user_agent' => "Sushi" | |
| 397 | + } | |
| 398 | + @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @site) | |
| 399 | + @checker.user = users(:bob) | |
| 400 | + @checker.save! | |
| 401 | + end | |
| 402 | + | |
| 403 | + describe "#check" do | |
| 404 | + it "should check for changes" do | |
| 405 | +        lambda { @checker.check }.should change { Event.count }.by(1) | |
| 406 | +        lambda { @checker.check }.should_not change { Event.count } | |
| 407 | + end | |
| 408 | + end | |
| 409 | + end | |
| 379 | 410 | end |