website_agent_spec.rb 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476
  1. require 'spec_helper'
  2. describe Agents::WebsiteAgent do
  3. describe "checking without basic auth" do
  4. before do
  5. stub_request(:any, /xkcd/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
  6. @valid_options = {
  7. 'name' => "XKCD",
  8. 'expected_update_period_in_days' => "2",
  9. 'type' => "html",
  10. 'url' => "http://xkcd.com",
  11. 'mode' => 'on_change',
  12. 'extract' => {
  13. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  14. 'title' => { 'css' => "#comic img", 'value' => "@alt" },
  15. 'hovertext' => { 'css' => "#comic img", 'value' => "@title" }
  16. }
  17. }
  18. @checker = Agents::WebsiteAgent.new(:name => "xkcd", :options => @valid_options, :keep_events_for => 2)
  19. @checker.user = users(:bob)
  20. @checker.save!
  21. end
  22. it_behaves_like WebRequestConcern
  23. describe "validations" do
  24. before do
  25. @checker.should be_valid
  26. end
  27. it "should validate the integer fields" do
  28. @checker.options['expected_update_period_in_days'] = "2"
  29. @checker.should be_valid
  30. @checker.options['expected_update_period_in_days'] = "nonsense"
  31. @checker.should_not be_valid
  32. end
  33. it "should validate uniqueness_look_back" do
  34. @checker.options['uniqueness_look_back'] = "nonsense"
  35. @checker.should_not be_valid
  36. @checker.options['uniqueness_look_back'] = "2"
  37. @checker.should be_valid
  38. end
  39. it "should validate mode" do
  40. @checker.options['mode'] = "nonsense"
  41. @checker.should_not be_valid
  42. @checker.options['mode'] = "on_change"
  43. @checker.should be_valid
  44. @checker.options['mode'] = "all"
  45. @checker.should be_valid
  46. @checker.options['mode'] = ""
  47. @checker.should be_valid
  48. end
  49. it "should validate the force_encoding option" do
  50. @checker.options['force_encoding'] = ''
  51. @checker.should be_valid
  52. @checker.options['force_encoding'] = 'UTF-8'
  53. @checker.should be_valid
  54. @checker.options['force_encoding'] = ['UTF-8']
  55. @checker.should_not be_valid
  56. @checker.options['force_encoding'] = 'UTF-42'
  57. @checker.should_not be_valid
  58. end
  59. end
  60. describe "#check" do
  61. it "should check for changes (and update Event.expires_at)" do
  62. lambda { @checker.check }.should change { Event.count }.by(1)
  63. event = Event.last
  64. sleep 2
  65. lambda { @checker.check }.should_not change { Event.count }
  66. update_event = Event.last
  67. update_event.expires_at.should_not == event.expires_at
  68. end
  69. it "should always save events when in :all mode" do
  70. lambda {
  71. @valid_options['mode'] = 'all'
  72. @checker.options = @valid_options
  73. @checker.check
  74. @checker.check
  75. }.should change { Event.count }.by(2)
  76. end
  77. it "should take uniqueness_look_back into account during deduplication" do
  78. @valid_options['mode'] = 'all'
  79. @checker.options = @valid_options
  80. @checker.check
  81. @checker.check
  82. event = Event.last
  83. event.payload = "{}"
  84. event.save
  85. lambda {
  86. @valid_options['mode'] = 'on_change'
  87. @valid_options['uniqueness_look_back'] = 2
  88. @checker.options = @valid_options
  89. @checker.check
  90. }.should_not change { Event.count }
  91. lambda {
  92. @valid_options['mode'] = 'on_change'
  93. @valid_options['uniqueness_look_back'] = 1
  94. @checker.options = @valid_options
  95. @checker.check
  96. }.should change { Event.count }.by(1)
  97. end
  98. it "should log an error if the number of results for a set of extraction patterns differs" do
  99. @valid_options['extract']['url']['css'] = "div"
  100. @checker.options = @valid_options
  101. @checker.check
  102. @checker.logs.first.message.should =~ /Got an uneven number of matches/
  103. end
  104. it "should accept an array for url" do
  105. @valid_options['url'] = ["http://xkcd.com/1/", "http://xkcd.com/2/"]
  106. @checker.options = @valid_options
  107. lambda { @checker.save! }.should_not raise_error;
  108. lambda { @checker.check }.should_not raise_error;
  109. end
  110. it "should parse events from all urls in array" do
  111. lambda {
  112. @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
  113. @valid_options['mode'] = 'all'
  114. @checker.options = @valid_options
  115. @checker.check
  116. }.should change { Event.count }.by(2)
  117. end
  118. it "should follow unique rules when parsing array of urls" do
  119. lambda {
  120. @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
  121. @checker.options = @valid_options
  122. @checker.check
  123. }.should change { Event.count }.by(1)
  124. end
  125. end
  126. describe 'encoding' do
  127. it 'should be forced with force_encoding option' do
  128. huginn = "\u{601d}\u{8003}"
  129. stub_request(:any, /no-encoding/).to_return(:body => {
  130. :value => huginn,
  131. }.to_json.encode(Encoding::EUC_JP), :headers => {
  132. 'Content-Type' => 'application/json',
  133. }, :status => 200)
  134. site = {
  135. 'name' => "Some JSON Response",
  136. 'expected_update_period_in_days' => "2",
  137. 'type' => "json",
  138. 'url' => "http://no-encoding.example.com",
  139. 'mode' => 'on_change',
  140. 'extract' => {
  141. 'value' => { 'path' => 'value' },
  142. },
  143. 'force_encoding' => 'EUC-JP',
  144. }
  145. checker = Agents::WebsiteAgent.new(:name => "No Encoding Site", :options => site)
  146. checker.user = users(:bob)
  147. checker.save!
  148. checker.check
  149. event = Event.last
  150. event.payload['value'].should == huginn
  151. end
  152. it 'should be overridden with force_encoding option' do
  153. huginn = "\u{601d}\u{8003}"
  154. stub_request(:any, /wrong-encoding/).to_return(:body => {
  155. :value => huginn,
  156. }.to_json.encode(Encoding::EUC_JP), :headers => {
  157. 'Content-Type' => 'application/json; UTF-8',
  158. }, :status => 200)
  159. site = {
  160. 'name' => "Some JSON Response",
  161. 'expected_update_period_in_days' => "2",
  162. 'type' => "json",
  163. 'url' => "http://wrong-encoding.example.com",
  164. 'mode' => 'on_change',
  165. 'extract' => {
  166. 'value' => { 'path' => 'value' },
  167. },
  168. 'force_encoding' => 'EUC-JP',
  169. }
  170. checker = Agents::WebsiteAgent.new(:name => "Wrong Encoding Site", :options => site)
  171. checker.user = users(:bob)
  172. checker.save!
  173. checker.check
  174. event = Event.last
  175. event.payload['value'].should == huginn
  176. end
  177. end
  178. describe '#working?' do
  179. it 'checks if events have been received within the expected receive period' do
  180. stubbed_time = Time.now
  181. stub(Time).now { stubbed_time }
  182. @checker.should_not be_working # No events created
  183. @checker.check
  184. @checker.reload.should be_working # Just created events
  185. @checker.error "oh no!"
  186. @checker.reload.should_not be_working # There is a recent error
  187. stubbed_time = 20.minutes.from_now
  188. @checker.events.delete_all
  189. @checker.check
  190. @checker.reload.should be_working # There is a newer event now
  191. stubbed_time = 2.days.from_now
  192. @checker.reload.should_not be_working # Two days have passed without a new event having been created
  193. end
  194. end
  195. describe "parsing" do
  196. it "parses CSS" do
  197. @checker.check
  198. event = Event.last
  199. event.payload['url'].should == "http://imgs.xkcd.com/comics/evolving.png"
  200. event.payload['title'].should == "Evolving"
  201. event.payload['hovertext'].should =~ /^Biologists play reverse/
  202. end
  203. it "parses XPath" do
  204. @valid_options['extract'].each { |key, value|
  205. value.delete('css')
  206. value['xpath'] = "//*[@id='comic']//img"
  207. }
  208. @checker.options = @valid_options
  209. @checker.check
  210. event = Event.last
  211. event.payload['url'].should == "http://imgs.xkcd.com/comics/evolving.png"
  212. event.payload['title'].should == "Evolving"
  213. event.payload['hovertext'].should =~ /^Biologists play reverse/
  214. end
  215. it "should turn relative urls to absolute" do
  216. rel_site = {
  217. 'name' => "XKCD",
  218. 'expected_update_period_in_days' => "2",
  219. 'type' => "html",
  220. 'url' => "http://xkcd.com",
  221. 'mode' => "on_change",
  222. 'extract' => {
  223. 'url' => {'css' => "#topLeft a", 'value' => "@href"},
  224. }
  225. }
  226. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  227. rel.user = users(:bob)
  228. rel.save!
  229. rel.check
  230. event = Event.last
  231. event.payload['url'].should == "http://xkcd.com/about"
  232. end
  233. it "should return an integer value if XPath evaluates to one" do
  234. rel_site = {
  235. 'name' => "XKCD",
  236. 'expected_update_period_in_days' => 2,
  237. 'type' => "html",
  238. 'url' => "http://xkcd.com",
  239. 'mode' => "on_change",
  240. 'extract' => {
  241. 'num_links' => {'css' => "#comicLinks", 'value' => "count(./a)"}
  242. }
  243. }
  244. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  245. rel.user = users(:bob)
  246. rel.save!
  247. rel.check
  248. event = Event.last
  249. event.payload['num_links'].should == "9"
  250. end
  251. it "should return all texts concatenated if XPath returns many text nodes" do
  252. rel_site = {
  253. 'name' => "XKCD",
  254. 'expected_update_period_in_days' => 2,
  255. 'type' => "html",
  256. 'url' => "http://xkcd.com",
  257. 'mode' => "on_change",
  258. 'extract' => {
  259. 'slogan' => {'css' => "#slogan", 'value' => ".//text()"}
  260. }
  261. }
  262. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  263. rel.user = users(:bob)
  264. rel.save!
  265. rel.check
  266. event = Event.last
  267. event.payload['slogan'].should == "A webcomic of romance, sarcasm, math, and language."
  268. end
  269. describe "JSON" do
  270. it "works with paths" do
  271. json = {
  272. 'response' => {
  273. 'version' => 2,
  274. 'title' => "hello!"
  275. }
  276. }
  277. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  278. site = {
  279. 'name' => "Some JSON Response",
  280. 'expected_update_period_in_days' => "2",
  281. 'type' => "json",
  282. 'url' => "http://json-site.com",
  283. 'mode' => 'on_change',
  284. 'extract' => {
  285. 'version' => {'path' => "response.version"},
  286. 'title' => {'path' => "response.title"}
  287. }
  288. }
  289. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  290. checker.user = users(:bob)
  291. checker.save!
  292. checker.check
  293. event = Event.last
  294. event.payload['version'].should == 2
  295. event.payload['title'].should == "hello!"
  296. end
  297. it "can handle arrays" do
  298. json = {
  299. 'response' => {
  300. 'data' => [
  301. {'title' => "first", 'version' => 2},
  302. {'title' => "second", 'version' => 2.5}
  303. ]
  304. }
  305. }
  306. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  307. site = {
  308. 'name' => "Some JSON Response",
  309. 'expected_update_period_in_days' => "2",
  310. 'type' => "json",
  311. 'url' => "http://json-site.com",
  312. 'mode' => 'on_change',
  313. 'extract' => {
  314. :title => {'path' => "response.data[*].title"},
  315. :version => {'path' => "response.data[*].version"}
  316. }
  317. }
  318. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  319. checker.user = users(:bob)
  320. checker.save!
  321. lambda {
  322. checker.check
  323. }.should change { Event.count }.by(2)
  324. event = Event.all[-1]
  325. event.payload['version'].should == 2.5
  326. event.payload['title'].should == "second"
  327. event = Event.all[-2]
  328. event.payload['version'].should == 2
  329. event.payload['title'].should == "first"
  330. end
  331. it "stores the whole object if :extract is not specified" do
  332. json = {
  333. 'response' => {
  334. 'version' => 2,
  335. 'title' => "hello!"
  336. }
  337. }
  338. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  339. site = {
  340. 'name' => "Some JSON Response",
  341. 'expected_update_period_in_days' => "2",
  342. 'type' => "json",
  343. 'url' => "http://json-site.com",
  344. 'mode' => 'on_change'
  345. }
  346. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  347. checker.user = users(:bob)
  348. checker.save!
  349. checker.check
  350. event = Event.last
  351. event.payload['response']['version'].should == 2
  352. event.payload['response']['title'].should == "hello!"
  353. end
  354. end
  355. end
  356. describe "#receive" do
  357. it "should scrape from the url element in incoming event payload" do
  358. @event = Event.new
  359. @event.agent = agents(:bob_rain_notifier_agent)
  360. @event.payload = { 'url' => "http://xkcd.com" }
  361. lambda {
  362. @checker.options = @valid_options
  363. @checker.receive([@event])
  364. }.should change { Event.count }.by(1)
  365. end
  366. end
  367. end
  368. describe "checking with http basic auth" do
  369. before do
  370. stub_request(:any, /example/).
  371. with(headers: { 'Authorization' => "Basic #{['user:pass'].pack('m').chomp}" }).
  372. to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
  373. @valid_options = {
  374. 'name' => "XKCD",
  375. 'expected_update_period_in_days' => "2",
  376. 'type' => "html",
  377. 'url' => "http://www.example.com",
  378. 'mode' => 'on_change',
  379. 'extract' => {
  380. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  381. 'title' => { 'css' => "#comic img", 'value' => "@alt" },
  382. 'hovertext' => { 'css' => "#comic img", 'value' => "@title" }
  383. },
  384. 'basic_auth' => "user:pass"
  385. }
  386. @checker = Agents::WebsiteAgent.new(:name => "auth", :options => @valid_options)
  387. @checker.user = users(:bob)
  388. @checker.save!
  389. end
  390. describe "#check" do
  391. it "should check for changes" do
  392. lambda { @checker.check }.should change { Event.count }.by(1)
  393. lambda { @checker.check }.should_not change { Event.count }
  394. end
  395. end
  396. end
  397. describe "checking with headers" do
  398. before do
  399. stub_request(:any, /example/).
  400. with(headers: { 'foo' => 'bar', 'user_agent' => /Faraday/ }).
  401. to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
  402. @valid_options = {
  403. 'name' => "XKCD",
  404. 'expected_update_period_in_days' => "2",
  405. 'type' => "html",
  406. 'url' => "http://www.example.com",
  407. 'mode' => 'on_change',
  408. 'headers' => { 'foo' => 'bar' },
  409. 'extract' => {
  410. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  411. }
  412. }
  413. @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @valid_options)
  414. @checker.user = users(:bob)
  415. @checker.save!
  416. end
  417. describe "#check" do
  418. it "should check for changes" do
  419. lambda { @checker.check }.should change { Event.count }.by(1)
  420. end
  421. end
  422. end
  423. end