@@ -47,6 +47,8 @@ module Agents |
||
| 47 | 47 |
|
| 48 | 48 |
Set `force_encoding` to an encoding name if the website does not return a Content-Type header with a proper charset. |
| 49 | 49 |
|
| 50 |
+ Set `user_agent` to a custom User-Agent name if the website does not like the default value ("Faraday v#{Faraday::VERSION}").
|
|
| 51 |
+ |
|
| 50 | 52 |
The WebsiteAgent can also scrape based on incoming events. It will scrape the url contained in the `url` key of the incoming event payload. |
| 51 | 53 |
MD |
| 52 | 54 |
|
@@ -105,6 +107,10 @@ module Agents |
||
| 105 | 107 |
end |
| 106 | 108 |
end |
| 107 | 109 |
|
| 110 |
+ if options['user_agent'].present? |
|
| 111 |
+ errors.add(:base, "user_agent must be a string") unless options['user_agent'].is_a?(String) |
|
| 112 |
+ end |
|
| 113 |
+ |
|
| 108 | 114 |
begin |
| 109 | 115 |
basic_auth_credentials() |
| 110 | 116 |
rescue => e |
@@ -281,6 +287,10 @@ module Agents |
||
| 281 | 287 |
|
| 282 | 288 |
def faraday |
| 283 | 289 |
@faraday ||= Faraday.new { |builder|
|
| 290 |
+ if (user_agent = options['user_agent']).present? |
|
| 291 |
+ builder.headers[:user_agent] = user_agent |
|
| 292 |
+ end |
|
| 293 |
+ |
|
| 284 | 294 |
builder.use FaradayMiddleware::FollowRedirects |
| 285 | 295 |
builder.request :url_encoded |
| 286 | 296 |
if userinfo = basic_auth_credentials() |
@@ -376,4 +376,35 @@ describe Agents::WebsiteAgent do |
||
| 376 | 376 |
end |
| 377 | 377 |
end |
| 378 | 378 |
end |
| 379 |
+ |
|
| 380 |
+ describe "checking with User-Agent" do |
|
| 381 |
+ before do |
|
| 382 |
+ stub_request(:any, /example/). |
|
| 383 |
+ with(headers: { 'User-Agent' => 'Sushi' }).
|
|
| 384 |
+ to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
|
|
| 385 |
+ @site = {
|
|
| 386 |
+ 'name' => "XKCD", |
|
| 387 |
+ 'expected_update_period_in_days' => 2, |
|
| 388 |
+ 'type' => "html", |
|
| 389 |
+ 'url' => "http://www.example.com", |
|
| 390 |
+ 'mode' => 'on_change', |
|
| 391 |
+ 'extract' => {
|
|
| 392 |
+ 'url' => { 'css' => "#comic img", 'attr' => "src" },
|
|
| 393 |
+ 'title' => { 'css' => "#comic img", 'attr' => "alt" },
|
|
| 394 |
+ 'hovertext' => { 'css' => "#comic img", 'attr' => "title" }
|
|
| 395 |
+ }, |
|
| 396 |
+ 'user_agent' => "Sushi" |
|
| 397 |
+ } |
|
| 398 |
+ @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @site) |
|
| 399 |
+ @checker.user = users(:bob) |
|
| 400 |
+ @checker.save! |
|
| 401 |
+ end |
|
| 402 |
+ |
|
| 403 |
+ describe "#check" do |
|
| 404 |
+ it "should check for changes" do |
|
| 405 |
+ lambda { @checker.check }.should change { Event.count }.by(1)
|
|
| 406 |
+ lambda { @checker.check }.should_not change { Event.count }
|
|
| 407 |
+ end |
|
| 408 |
+ end |
|
| 409 |
+ end |
|
| 379 | 410 |
end |