website_agent_spec.rb 21KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627
  1. require 'spec_helper'
  2. describe Agents::WebsiteAgent do
  3. describe "checking without basic auth" do
  4. before do
  5. stub_request(:any, /xkcd/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")),
  6. status: 200,
  7. headers: {
  8. 'X-Status-Message' => 'OK'
  9. })
  10. @valid_options = {
  11. 'name' => "XKCD",
  12. 'expected_update_period_in_days' => "2",
  13. 'type' => "html",
  14. 'url' => "http://xkcd.com",
  15. 'mode' => 'on_change',
  16. 'extract' => {
  17. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  18. 'title' => { 'css' => "#comic img", 'value' => "@alt" },
  19. 'hovertext' => { 'css' => "#comic img", 'value' => "@title" }
  20. }
  21. }
  22. @checker = Agents::WebsiteAgent.new(:name => "xkcd", :options => @valid_options, :keep_events_for => 2)
  23. @checker.user = users(:bob)
  24. @checker.save!
  25. end
  26. it_behaves_like WebRequestConcern
  27. describe "validations" do
  28. before do
  29. expect(@checker).to be_valid
  30. end
  31. it "should validate the integer fields" do
  32. @checker.options['expected_update_period_in_days'] = "2"
  33. expect(@checker).to be_valid
  34. @checker.options['expected_update_period_in_days'] = "nonsense"
  35. expect(@checker).not_to be_valid
  36. end
  37. it "should validate uniqueness_look_back" do
  38. @checker.options['uniqueness_look_back'] = "nonsense"
  39. expect(@checker).not_to be_valid
  40. @checker.options['uniqueness_look_back'] = "2"
  41. expect(@checker).to be_valid
  42. end
  43. it "should validate mode" do
  44. @checker.options['mode'] = "nonsense"
  45. expect(@checker).not_to be_valid
  46. @checker.options['mode'] = "on_change"
  47. expect(@checker).to be_valid
  48. @checker.options['mode'] = "all"
  49. expect(@checker).to be_valid
  50. @checker.options['mode'] = ""
  51. expect(@checker).to be_valid
  52. end
  53. it "should validate the force_encoding option" do
  54. @checker.options['force_encoding'] = ''
  55. expect(@checker).to be_valid
  56. @checker.options['force_encoding'] = 'UTF-8'
  57. expect(@checker).to be_valid
  58. @checker.options['force_encoding'] = ['UTF-8']
  59. expect(@checker).not_to be_valid
  60. @checker.options['force_encoding'] = 'UTF-42'
  61. expect(@checker).not_to be_valid
  62. end
  63. end
  64. describe "#check" do
  65. it "should check for changes (and update Event.expires_at)" do
  66. expect { @checker.check }.to change { Event.count }.by(1)
  67. event = Event.last
  68. sleep 2
  69. expect { @checker.check }.not_to change { Event.count }
  70. update_event = Event.last
  71. expect(update_event.expires_at).not_to eq(event.expires_at)
  72. end
  73. it "should always save events when in :all mode" do
  74. expect {
  75. @valid_options['mode'] = 'all'
  76. @checker.options = @valid_options
  77. @checker.check
  78. @checker.check
  79. }.to change { Event.count }.by(2)
  80. end
  81. it "should take uniqueness_look_back into account during deduplication" do
  82. @valid_options['mode'] = 'all'
  83. @checker.options = @valid_options
  84. @checker.check
  85. @checker.check
  86. event = Event.last
  87. event.payload = "{}"
  88. event.save
  89. expect {
  90. @valid_options['mode'] = 'on_change'
  91. @valid_options['uniqueness_look_back'] = 2
  92. @checker.options = @valid_options
  93. @checker.check
  94. }.not_to change { Event.count }
  95. expect {
  96. @valid_options['mode'] = 'on_change'
  97. @valid_options['uniqueness_look_back'] = 1
  98. @checker.options = @valid_options
  99. @checker.check
  100. }.to change { Event.count }.by(1)
  101. end
  102. it "should log an error if the number of results for a set of extraction patterns differs" do
  103. @valid_options['extract']['url']['css'] = "div"
  104. @checker.options = @valid_options
  105. @checker.check
  106. expect(@checker.logs.first.message).to match(/Got an uneven number of matches/)
  107. end
  108. it "should accept an array for url" do
  109. @valid_options['url'] = ["http://xkcd.com/1/", "http://xkcd.com/2/"]
  110. @checker.options = @valid_options
  111. expect { @checker.save! }.not_to raise_error;
  112. expect { @checker.check }.not_to raise_error;
  113. end
  114. it "should parse events from all urls in array" do
  115. expect {
  116. @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
  117. @valid_options['mode'] = 'all'
  118. @checker.options = @valid_options
  119. @checker.check
  120. }.to change { Event.count }.by(2)
  121. end
  122. it "should follow unique rules when parsing array of urls" do
  123. expect {
  124. @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
  125. @checker.options = @valid_options
  126. @checker.check
  127. }.to change { Event.count }.by(1)
  128. end
  129. end
  130. describe 'unzipping' do
  131. it 'should unzip with unzip option' do
  132. json = {
  133. 'response' => {
  134. 'version' => 2,
  135. 'title' => "hello!"
  136. }
  137. }
  138. zipped = ActiveSupport::Gzip.compress(json.to_json)
  139. stub_request(:any, /gzip/).to_return(:body => zipped, :status => 200)
  140. site = {
  141. 'name' => "Some JSON Response",
  142. 'expected_update_period_in_days' => "2",
  143. 'type' => "json",
  144. 'url' => "http://gzip.com",
  145. 'mode' => 'on_change',
  146. 'extract' => {
  147. 'version' => { 'path' => 'response.version' },
  148. },
  149. 'unzip' => 'gzip',
  150. }
  151. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  152. checker.user = users(:bob)
  153. checker.save!
  154. checker.check
  155. event = Event.last
  156. puts event.payload
  157. expect(event.payload['version']).to eq(2)
  158. end
  159. end
  160. describe 'encoding' do
  161. it 'should be forced with force_encoding option' do
  162. huginn = "\u{601d}\u{8003}"
  163. stub_request(:any, /no-encoding/).to_return(:body => {
  164. :value => huginn,
  165. }.to_json.encode(Encoding::EUC_JP), :headers => {
  166. 'Content-Type' => 'application/json',
  167. }, :status => 200)
  168. site = {
  169. 'name' => "Some JSON Response",
  170. 'expected_update_period_in_days' => "2",
  171. 'type' => "json",
  172. 'url' => "http://no-encoding.example.com",
  173. 'mode' => 'on_change',
  174. 'extract' => {
  175. 'value' => { 'path' => 'value' },
  176. },
  177. 'force_encoding' => 'EUC-JP',
  178. }
  179. checker = Agents::WebsiteAgent.new(:name => "No Encoding Site", :options => site)
  180. checker.user = users(:bob)
  181. checker.save!
  182. checker.check
  183. event = Event.last
  184. expect(event.payload['value']).to eq(huginn)
  185. end
  186. it 'should be overridden with force_encoding option' do
  187. huginn = "\u{601d}\u{8003}"
  188. stub_request(:any, /wrong-encoding/).to_return(:body => {
  189. :value => huginn,
  190. }.to_json.encode(Encoding::EUC_JP), :headers => {
  191. 'Content-Type' => 'application/json; UTF-8',
  192. }, :status => 200)
  193. site = {
  194. 'name' => "Some JSON Response",
  195. 'expected_update_period_in_days' => "2",
  196. 'type' => "json",
  197. 'url' => "http://wrong-encoding.example.com",
  198. 'mode' => 'on_change',
  199. 'extract' => {
  200. 'value' => { 'path' => 'value' },
  201. },
  202. 'force_encoding' => 'EUC-JP',
  203. }
  204. checker = Agents::WebsiteAgent.new(:name => "Wrong Encoding Site", :options => site)
  205. checker.user = users(:bob)
  206. checker.save!
  207. checker.check
  208. event = Event.last
  209. expect(event.payload['value']).to eq(huginn)
  210. end
  211. end
  212. describe '#working?' do
  213. it 'checks if events have been received within the expected receive period' do
  214. stubbed_time = Time.now
  215. stub(Time).now { stubbed_time }
  216. expect(@checker).not_to be_working # No events created
  217. @checker.check
  218. expect(@checker.reload).to be_working # Just created events
  219. @checker.error "oh no!"
  220. expect(@checker.reload).not_to be_working # There is a recent error
  221. stubbed_time = 20.minutes.from_now
  222. @checker.events.delete_all
  223. @checker.check
  224. expect(@checker.reload).to be_working # There is a newer event now
  225. stubbed_time = 2.days.from_now
  226. expect(@checker.reload).not_to be_working # Two days have passed without a new event having been created
  227. end
  228. end
  229. describe "parsing" do
  230. it "parses CSS" do
  231. @checker.check
  232. event = Event.last
  233. expect(event.payload['url']).to eq("http://imgs.xkcd.com/comics/evolving.png")
  234. expect(event.payload['title']).to eq("Evolving")
  235. expect(event.payload['hovertext']).to match(/^Biologists play reverse/)
  236. end
  237. it "parses XPath" do
  238. @valid_options['extract'].each { |key, value|
  239. value.delete('css')
  240. value['xpath'] = "//*[@id='comic']//img"
  241. }
  242. @checker.options = @valid_options
  243. @checker.check
  244. event = Event.last
  245. expect(event.payload['url']).to eq("http://imgs.xkcd.com/comics/evolving.png")
  246. expect(event.payload['title']).to eq("Evolving")
  247. expect(event.payload['hovertext']).to match(/^Biologists play reverse/)
  248. end
  249. it "should turn relative urls to absolute" do
  250. rel_site = {
  251. 'name' => "XKCD",
  252. 'expected_update_period_in_days' => "2",
  253. 'type' => "html",
  254. 'url' => "http://xkcd.com",
  255. 'mode' => "on_change",
  256. 'extract' => {
  257. 'url' => {'css' => "#topLeft a", 'value' => "@href"},
  258. }
  259. }
  260. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  261. rel.user = users(:bob)
  262. rel.save!
  263. rel.check
  264. event = Event.last
  265. expect(event.payload['url']).to eq("http://xkcd.com/about")
  266. end
  267. it "should return an integer value if XPath evaluates to one" do
  268. rel_site = {
  269. 'name' => "XKCD",
  270. 'expected_update_period_in_days' => 2,
  271. 'type' => "html",
  272. 'url' => "http://xkcd.com",
  273. 'mode' => "on_change",
  274. 'extract' => {
  275. 'num_links' => {'css' => "#comicLinks", 'value' => "count(./a)"}
  276. }
  277. }
  278. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  279. rel.user = users(:bob)
  280. rel.save!
  281. rel.check
  282. event = Event.last
  283. expect(event.payload['num_links']).to eq("9")
  284. end
  285. it "should return all texts concatenated if XPath returns many text nodes" do
  286. rel_site = {
  287. 'name' => "XKCD",
  288. 'expected_update_period_in_days' => 2,
  289. 'type' => "html",
  290. 'url' => "http://xkcd.com",
  291. 'mode' => "on_change",
  292. 'extract' => {
  293. 'slogan' => {'css' => "#slogan", 'value' => ".//text()"}
  294. }
  295. }
  296. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  297. rel.user = users(:bob)
  298. rel.save!
  299. rel.check
  300. event = Event.last
  301. expect(event.payload['slogan']).to eq("A webcomic of romance, sarcasm, math, and language.")
  302. end
  303. it "should interpolate _response_" do
  304. @valid_options['extract']['response_info'] =
  305. @valid_options['extract']['url'].merge(
  306. 'value' => '"{{ "The reponse was " | append:_response_.status | append:" " | append:_response_.headers.X-Status-Message | append:"." }}"'
  307. )
  308. @checker.options = @valid_options
  309. @checker.check
  310. event = Event.last
  311. expect(event.payload['response_info']).to eq('The reponse was 200 OK.')
  312. end
  313. describe "JSON" do
  314. it "works with paths" do
  315. json = {
  316. 'response' => {
  317. 'version' => 2,
  318. 'title' => "hello!"
  319. }
  320. }
  321. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  322. site = {
  323. 'name' => "Some JSON Response",
  324. 'expected_update_period_in_days' => "2",
  325. 'type' => "json",
  326. 'url' => "http://json-site.com",
  327. 'mode' => 'on_change',
  328. 'extract' => {
  329. 'version' => {'path' => "response.version"},
  330. 'title' => {'path' => "response.title"}
  331. }
  332. }
  333. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  334. checker.user = users(:bob)
  335. checker.save!
  336. checker.check
  337. event = Event.last
  338. expect(event.payload['version']).to eq(2)
  339. expect(event.payload['title']).to eq("hello!")
  340. end
  341. it "can handle arrays" do
  342. json = {
  343. 'response' => {
  344. 'data' => [
  345. {'title' => "first", 'version' => 2},
  346. {'title' => "second", 'version' => 2.5}
  347. ]
  348. }
  349. }
  350. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  351. site = {
  352. 'name' => "Some JSON Response",
  353. 'expected_update_period_in_days' => "2",
  354. 'type' => "json",
  355. 'url' => "http://json-site.com",
  356. 'mode' => 'on_change',
  357. 'extract' => {
  358. :title => {'path' => "response.data[*].title"},
  359. :version => {'path' => "response.data[*].version"}
  360. }
  361. }
  362. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  363. checker.user = users(:bob)
  364. checker.save!
  365. expect {
  366. checker.check
  367. }.to change { Event.count }.by(2)
  368. (event2, event1) = Event.last(2)
  369. expect(event1.payload['version']).to eq(2.5)
  370. expect(event1.payload['title']).to eq("second")
  371. expect(event2.payload['version']).to eq(2)
  372. expect(event2.payload['title']).to eq("first")
  373. end
  374. it "stores the whole object if :extract is not specified" do
  375. json = {
  376. 'response' => {
  377. 'version' => 2,
  378. 'title' => "hello!"
  379. }
  380. }
  381. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  382. site = {
  383. 'name' => "Some JSON Response",
  384. 'expected_update_period_in_days' => "2",
  385. 'type' => "json",
  386. 'url' => "http://json-site.com",
  387. 'mode' => 'on_change'
  388. }
  389. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  390. checker.user = users(:bob)
  391. checker.save!
  392. checker.check
  393. event = Event.last
  394. expect(event.payload['response']['version']).to eq(2)
  395. expect(event.payload['response']['title']).to eq("hello!")
  396. end
  397. end
  398. describe "text parsing" do
  399. before do
  400. stub_request(:any, /text-site/).to_return(body: <<-EOF, status: 200)
  401. water: wet
  402. fire: hot
  403. EOF
  404. site = {
  405. 'name' => 'Some Text Response',
  406. 'expected_update_period_in_days' => '2',
  407. 'type' => 'text',
  408. 'url' => 'http://text-site.com',
  409. 'mode' => 'on_change',
  410. 'extract' => {
  411. 'word' => { 'regexp' => '^(.+?): (.+)$', index: 1 },
  412. 'property' => { 'regexp' => '^(.+?): (.+)$', index: 2 },
  413. }
  414. }
  415. @checker = Agents::WebsiteAgent.new(name: 'Text Site', options: site)
  416. @checker.user = users(:bob)
  417. @checker.save!
  418. end
  419. it "works with regexp" do
  420. @checker.options = @checker.options.merge('extract' => {
  421. 'word' => { 'regexp' => '^(?<word>.+?): (?<property>.+)$', index: 'word' },
  422. 'property' => { 'regexp' => '^(?<word>.+?): (?<property>.+)$', index: 'property' },
  423. })
  424. expect {
  425. @checker.check
  426. }.to change { Event.count }.by(2)
  427. event1, event2 = Event.last(2)
  428. expect(event1.payload['word']).to eq('water')
  429. expect(event1.payload['property']).to eq('wet')
  430. expect(event2.payload['word']).to eq('fire')
  431. expect(event2.payload['property']).to eq('hot')
  432. end
  433. it "works with regexp with named capture" do
  434. expect {
  435. @checker.check
  436. }.to change { Event.count }.by(2)
  437. event1, event2 = Event.last(2)
  438. expect(event1.payload['word']).to eq('water')
  439. expect(event1.payload['property']).to eq('wet')
  440. expect(event2.payload['word']).to eq('fire')
  441. expect(event2.payload['property']).to eq('hot')
  442. end
  443. end
  444. end
  445. describe "#receive" do
  446. before do
  447. @event = Event.new
  448. @event.agent = agents(:bob_rain_notifier_agent)
  449. @event.payload = {
  450. 'url' => 'http://xkcd.com',
  451. 'link' => 'Random',
  452. }
  453. end
  454. it "should scrape from the url element in incoming event payload" do
  455. expect {
  456. @checker.options = @valid_options
  457. @checker.receive([@event])
  458. }.to change { Event.count }.by(1)
  459. end
  460. it "should interpolate values from incoming event payload" do
  461. expect {
  462. @valid_options['extract'] = {
  463. 'from' => {
  464. 'xpath' => '*[1]',
  465. 'value' => '{{url | to_xpath}}'
  466. },
  467. 'to' => {
  468. 'xpath' => '(//a[@href and text()={{link | to_xpath}}])[1]',
  469. 'value' => '@href'
  470. },
  471. }
  472. @checker.options = @valid_options
  473. @checker.receive([@event])
  474. }.to change { Event.count }.by(1)
  475. expect(Event.last.payload).to eq({
  476. 'from' => 'http://xkcd.com',
  477. 'to' => 'http://dynamic.xkcd.com/random/comic/',
  478. })
  479. end
  480. it "should interpolate values from incoming event payload and _response_" do
  481. @event.payload['title'] = 'XKCD'
  482. expect {
  483. @valid_options['extract'] = {
  484. 'response_info' => @valid_options['extract']['url'].merge(
  485. 'value' => '{% capture sentence %}The reponse from {{title}} was {{_response_.status}} {{_response_.headers.X-Status-Message}}.{% endcapture %}{{sentence | to_xpath}}'
  486. )
  487. }
  488. @checker.options = @valid_options
  489. @checker.receive([@event])
  490. }.to change { Event.count }.by(1)
  491. expect(Event.last.payload['response_info']).to eq('The reponse from XKCD was 200 OK.')
  492. end
  493. it "should support merging of events" do
  494. expect {
  495. @checker.options = @valid_options
  496. @checker.options[:mode] = "merge"
  497. @checker.receive([@event])
  498. }.to change { Event.count }.by(1)
  499. last_payload = Event.last.payload
  500. expect(last_payload['link']).to eq('Random')
  501. end
  502. end
  503. end
  504. describe "checking with http basic auth" do
  505. before do
  506. stub_request(:any, /example/).
  507. with(headers: { 'Authorization' => "Basic #{['user:pass'].pack('m').chomp}" }).
  508. to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
  509. @valid_options = {
  510. 'name' => "XKCD",
  511. 'expected_update_period_in_days' => "2",
  512. 'type' => "html",
  513. 'url' => "http://www.example.com",
  514. 'mode' => 'on_change',
  515. 'extract' => {
  516. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  517. 'title' => { 'css' => "#comic img", 'value' => "@alt" },
  518. 'hovertext' => { 'css' => "#comic img", 'value' => "@title" }
  519. },
  520. 'basic_auth' => "user:pass"
  521. }
  522. @checker = Agents::WebsiteAgent.new(:name => "auth", :options => @valid_options)
  523. @checker.user = users(:bob)
  524. @checker.save!
  525. end
  526. describe "#check" do
  527. it "should check for changes" do
  528. expect { @checker.check }.to change { Event.count }.by(1)
  529. expect { @checker.check }.not_to change { Event.count }
  530. end
  531. end
  532. end
  533. describe "checking with headers" do
  534. before do
  535. stub_request(:any, /example/).
  536. with(headers: { 'foo' => 'bar' }).
  537. to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
  538. @valid_options = {
  539. 'name' => "XKCD",
  540. 'expected_update_period_in_days' => "2",
  541. 'type' => "html",
  542. 'url' => "http://www.example.com",
  543. 'mode' => 'on_change',
  544. 'headers' => { 'foo' => 'bar' },
  545. 'extract' => {
  546. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  547. }
  548. }
  549. @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @valid_options)
  550. @checker.user = users(:bob)
  551. @checker.save!
  552. end
  553. describe "#check" do
  554. it "should check for changes" do
  555. expect { @checker.check }.to change { Event.count }.by(1)
  556. end
  557. end
  558. end
  559. end