website_agent_spec.rb 38KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084
  1. require 'rails_helper'
  2. describe Agents::WebsiteAgent do
  3. describe "checking without basic auth" do
  4. before do
  5. stub_request(:any, /xkcd/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")),
  6. status: 200,
  7. headers: {
  8. 'X-Status-Message' => 'OK'
  9. })
  10. @valid_options = {
  11. 'name' => "XKCD",
  12. 'expected_update_period_in_days' => "2",
  13. 'type' => "html",
  14. 'url' => "http://xkcd.com",
  15. 'mode' => 'on_change',
  16. 'extract' => {
  17. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  18. 'title' => { 'css' => "#comic img", 'value' => "@alt" },
  19. 'hovertext' => { 'css' => "#comic img", 'value' => "@title" }
  20. }
  21. }
  22. @checker = Agents::WebsiteAgent.new(:name => "xkcd", :options => @valid_options, :keep_events_for => 2.days)
  23. @checker.user = users(:bob)
  24. @checker.save!
  25. end
  26. it_behaves_like WebRequestConcern
  27. describe "validations" do
  28. before do
  29. expect(@checker).to be_valid
  30. end
  31. it "should validate the integer fields" do
  32. @checker.options['expected_update_period_in_days'] = "2"
  33. expect(@checker).to be_valid
  34. @checker.options['expected_update_period_in_days'] = "nonsense"
  35. expect(@checker).not_to be_valid
  36. end
  37. it "should validate uniqueness_look_back" do
  38. @checker.options['uniqueness_look_back'] = "nonsense"
  39. expect(@checker).not_to be_valid
  40. @checker.options['uniqueness_look_back'] = "2"
  41. expect(@checker).to be_valid
  42. end
  43. it "should validate mode" do
  44. @checker.options['mode'] = "nonsense"
  45. expect(@checker).not_to be_valid
  46. @checker.options['mode'] = "on_change"
  47. expect(@checker).to be_valid
  48. @checker.options['mode'] = "all"
  49. expect(@checker).to be_valid
  50. @checker.options['mode'] = ""
  51. expect(@checker).to be_valid
  52. end
  53. it "should validate the force_encoding option" do
  54. @checker.options['force_encoding'] = ''
  55. expect(@checker).to be_valid
  56. @checker.options['force_encoding'] = 'UTF-8'
  57. expect(@checker).to be_valid
  58. @checker.options['force_encoding'] = ['UTF-8']
  59. expect(@checker).not_to be_valid
  60. @checker.options['force_encoding'] = 'UTF-42'
  61. expect(@checker).not_to be_valid
  62. end
  63. context "in 'json' type" do
  64. it "should ensure that all extractions have a 'path'" do
  65. @checker.options['type'] = 'json'
  66. @checker.options['extract'] = {
  67. 'url' => { 'foo' => 'bar' },
  68. }
  69. expect(@checker).to_not be_valid
  70. expect(@checker.errors_on(:base)).to include(/When type is json, all extractions must have a path attribute/)
  71. @checker.options['type'] = 'json'
  72. @checker.options['extract'] = {
  73. 'url' => { 'path' => 'bar' },
  74. }
  75. expect(@checker).to be_valid
  76. end
  77. end
  78. end
  79. describe "#check" do
  80. it "should check for changes (and update Event.expires_at)" do
  81. expect { @checker.check }.to change { Event.count }.by(1)
  82. event = Event.last
  83. sleep 2
  84. expect { @checker.check }.not_to change { Event.count }
  85. update_event = Event.last
  86. expect(update_event.expires_at).not_to eq(event.expires_at)
  87. end
  88. it "should always save events when in :all mode" do
  89. expect {
  90. @valid_options['mode'] = 'all'
  91. @checker.options = @valid_options
  92. @checker.check
  93. @checker.check
  94. }.to change { Event.count }.by(2)
  95. end
  96. it "should take uniqueness_look_back into account during deduplication" do
  97. @valid_options['mode'] = 'all'
  98. @checker.options = @valid_options
  99. @checker.check
  100. @checker.check
  101. event = Event.last
  102. event.payload = "{}"
  103. event.save
  104. expect {
  105. @valid_options['mode'] = 'on_change'
  106. @valid_options['uniqueness_look_back'] = 2
  107. @checker.options = @valid_options
  108. @checker.check
  109. }.not_to change { Event.count }
  110. expect {
  111. @valid_options['mode'] = 'on_change'
  112. @valid_options['uniqueness_look_back'] = 1
  113. @checker.options = @valid_options
  114. @checker.check
  115. }.to change { Event.count }.by(1)
  116. end
  117. it "should log an error if the number of results for a set of extraction patterns differs" do
  118. @valid_options['extract']['url']['css'] = "div"
  119. @checker.options = @valid_options
  120. @checker.check
  121. expect(@checker.logs.first.message).to match(/Got an uneven number of matches/)
  122. end
  123. it "should accept an array for url" do
  124. @valid_options['url'] = ["http://xkcd.com/1/", "http://xkcd.com/2/"]
  125. @checker.options = @valid_options
  126. expect { @checker.save! }.not_to raise_error;
  127. expect { @checker.check }.not_to raise_error;
  128. end
  129. it "should parse events from all urls in array" do
  130. expect {
  131. @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
  132. @valid_options['mode'] = 'all'
  133. @checker.options = @valid_options
  134. @checker.check
  135. }.to change { Event.count }.by(2)
  136. end
  137. it "should follow unique rules when parsing array of urls" do
  138. expect {
  139. @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
  140. @checker.options = @valid_options
  141. @checker.check
  142. }.to change { Event.count }.by(1)
  143. end
  144. end
  145. describe 'unzipping' do
  146. it 'should unzip automatically if the response has Content-Encoding: gzip' do
  147. json = {
  148. 'response' => {
  149. 'version' => 2,
  150. 'title' => "hello!"
  151. }
  152. }
  153. zipped = ActiveSupport::Gzip.compress(json.to_json)
  154. stub_request(:any, /gzip/).to_return(body: zipped, headers: { 'Content-Encoding' => 'gzip' }, status: 200)
  155. site = {
  156. 'name' => "Some JSON Response",
  157. 'expected_update_period_in_days' => "2",
  158. 'type' => "json",
  159. 'url' => "http://gzip.com",
  160. 'mode' => 'on_change',
  161. 'extract' => {
  162. 'version' => { 'path' => 'response.version' },
  163. },
  164. # no unzip option
  165. }
  166. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  167. checker.user = users(:bob)
  168. checker.save!
  169. checker.check
  170. event = Event.last
  171. expect(event.payload['version']).to eq(2)
  172. end
  173. it 'should unzip with unzip option' do
  174. json = {
  175. 'response' => {
  176. 'version' => 2,
  177. 'title' => "hello!"
  178. }
  179. }
  180. zipped = ActiveSupport::Gzip.compress(json.to_json)
  181. stub_request(:any, /gzip/).to_return(body: zipped, status: 200)
  182. site = {
  183. 'name' => "Some JSON Response",
  184. 'expected_update_period_in_days' => "2",
  185. 'type' => "json",
  186. 'url' => "http://gzip.com",
  187. 'mode' => 'on_change',
  188. 'extract' => {
  189. 'version' => { 'path' => 'response.version' },
  190. },
  191. 'unzip' => 'gzip',
  192. }
  193. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  194. checker.user = users(:bob)
  195. checker.save!
  196. checker.check
  197. event = Event.last
  198. expect(event.payload['version']).to eq(2)
  199. end
  200. it 'should either avoid or support a raw deflate stream (#1018)' do
  201. stub_request(:any, /deflate/).with(headers: { 'Accept-Encoding' => /\A(?!.*deflate)/ }).
  202. to_return(body: 'hello',
  203. status: 200)
  204. stub_request(:any, /deflate/).with(headers: { 'Accept-Encoding' => /deflate/ }).
  205. to_return(body: "\xcb\x48\xcd\xc9\xc9\x07\x00\x06\x2c".b,
  206. headers: { 'Content-Encoding' => 'deflate' },
  207. status: 200)
  208. site = {
  209. 'name' => 'Some Response',
  210. 'expected_update_period_in_days' => '2',
  211. 'type' => 'text',
  212. 'url' => 'http://deflate',
  213. 'mode' => 'on_change',
  214. 'extract' => {
  215. 'content' => { 'regexp' => '.+', 'index' => 0 }
  216. }
  217. }
  218. checker = Agents::WebsiteAgent.new(name: "Deflate Test", options: site)
  219. checker.user = users(:bob)
  220. checker.save!
  221. expect {
  222. checker.check
  223. }.to change { Event.count }.by(1)
  224. event = Event.last
  225. expect(event.payload['content']).to eq('hello')
  226. end
  227. end
  228. describe 'encoding' do
  229. it 'should be forced with force_encoding option' do
  230. huginn = "\u{601d}\u{8003}"
  231. stub_request(:any, /no-encoding/).to_return(body: {
  232. value: huginn,
  233. }.to_json.encode(Encoding::EUC_JP).b, headers: {
  234. 'Content-Type' => 'application/json',
  235. }, status: 200)
  236. site = {
  237. 'name' => "Some JSON Response",
  238. 'expected_update_period_in_days' => "2",
  239. 'type' => "json",
  240. 'url' => "http://no-encoding.example.com",
  241. 'mode' => 'on_change',
  242. 'extract' => {
  243. 'value' => { 'path' => 'value' },
  244. },
  245. 'force_encoding' => 'EUC-JP',
  246. }
  247. checker = Agents::WebsiteAgent.new(name: "No Encoding Site", options: site)
  248. checker.user = users(:bob)
  249. checker.save!
  250. expect { checker.check }.to change { Event.count }.by(1)
  251. event = Event.last
  252. expect(event.payload['value']).to eq(huginn)
  253. end
  254. it 'should be overridden with force_encoding option' do
  255. huginn = "\u{601d}\u{8003}"
  256. stub_request(:any, /wrong-encoding/).to_return(body: {
  257. value: huginn,
  258. }.to_json.encode(Encoding::EUC_JP).b, headers: {
  259. 'Content-Type' => 'application/json; UTF-8',
  260. }, status: 200)
  261. site = {
  262. 'name' => "Some JSON Response",
  263. 'expected_update_period_in_days' => "2",
  264. 'type' => "json",
  265. 'url' => "http://wrong-encoding.example.com",
  266. 'mode' => 'on_change',
  267. 'extract' => {
  268. 'value' => { 'path' => 'value' },
  269. },
  270. 'force_encoding' => 'EUC-JP',
  271. }
  272. checker = Agents::WebsiteAgent.new(name: "Wrong Encoding Site", options: site)
  273. checker.user = users(:bob)
  274. checker.save!
  275. expect { checker.check }.to change { Event.count }.by(1)
  276. event = Event.last
  277. expect(event.payload['value']).to eq(huginn)
  278. end
  279. it 'should be determined by charset in Content-Type' do
  280. huginn = "\u{601d}\u{8003}"
  281. stub_request(:any, /charset-euc-jp/).to_return(body: {
  282. value: huginn,
  283. }.to_json.encode(Encoding::EUC_JP), headers: {
  284. 'Content-Type' => 'application/json; charset=EUC-JP',
  285. }, status: 200)
  286. site = {
  287. 'name' => "Some JSON Response",
  288. 'expected_update_period_in_days' => "2",
  289. 'type' => "json",
  290. 'url' => "http://charset-euc-jp.example.com",
  291. 'mode' => 'on_change',
  292. 'extract' => {
  293. 'value' => { 'path' => 'value' },
  294. },
  295. }
  296. checker = Agents::WebsiteAgent.new(name: "Charset reader", options: site)
  297. checker.user = users(:bob)
  298. checker.save!
  299. expect { checker.check }.to change { Event.count }.by(1)
  300. event = Event.last
  301. expect(event.payload['value']).to eq(huginn)
  302. end
  303. it 'should default to UTF-8 when unknown charset is found' do
  304. huginn = "\u{601d}\u{8003}"
  305. stub_request(:any, /charset-unknown/).to_return(body: {
  306. value: huginn,
  307. }.to_json.b, headers: {
  308. 'Content-Type' => 'application/json; charset=unicode',
  309. }, status: 200)
  310. site = {
  311. 'name' => "Some JSON Response",
  312. 'expected_update_period_in_days' => "2",
  313. 'type' => "json",
  314. 'url' => "http://charset-unknown.example.com",
  315. 'mode' => 'on_change',
  316. 'extract' => {
  317. 'value' => { 'path' => 'value' },
  318. },
  319. }
  320. checker = Agents::WebsiteAgent.new(name: "Charset reader", options: site)
  321. checker.user = users(:bob)
  322. checker.save!
  323. expect { checker.check }.to change { Event.count }.by(1)
  324. event = Event.last
  325. expect(event.payload['value']).to eq(huginn)
  326. end
  327. end
  328. describe '#working?' do
  329. it 'checks if events have been received within the expected receive period' do
  330. stubbed_time = Time.now
  331. stub(Time).now { stubbed_time }
  332. expect(@checker).not_to be_working # No events created
  333. @checker.check
  334. expect(@checker.reload).to be_working # Just created events
  335. @checker.error "oh no!"
  336. expect(@checker.reload).not_to be_working # There is a recent error
  337. stubbed_time = 20.minutes.from_now
  338. @checker.events.delete_all
  339. @checker.check
  340. expect(@checker.reload).to be_working # There is a newer event now
  341. stubbed_time = 2.days.from_now
  342. expect(@checker.reload).not_to be_working # Two days have passed without a new event having been created
  343. end
  344. end
  345. describe "parsing" do
  346. it "parses CSS" do
  347. @checker.check
  348. event = Event.last
  349. expect(event.payload['url']).to eq("http://imgs.xkcd.com/comics/evolving.png")
  350. expect(event.payload['title']).to eq("Evolving")
  351. expect(event.payload['hovertext']).to match(/^Biologists play reverse/)
  352. end
  353. it "parses XPath" do
  354. @valid_options['extract'].each { |key, value|
  355. value.delete('css')
  356. value['xpath'] = "//*[@id='comic']//img"
  357. }
  358. @checker.options = @valid_options
  359. @checker.check
  360. event = Event.last
  361. expect(event.payload['url']).to eq("http://imgs.xkcd.com/comics/evolving.png")
  362. expect(event.payload['title']).to eq("Evolving")
  363. expect(event.payload['hovertext']).to match(/^Biologists play reverse/)
  364. end
  365. it "should turn relative urls to absolute" do
  366. rel_site = {
  367. 'name' => "XKCD",
  368. 'expected_update_period_in_days' => "2",
  369. 'type' => "html",
  370. 'url' => "http://xkcd.com",
  371. 'mode' => "on_change",
  372. 'extract' => {
  373. 'url' => {'css' => "#topLeft a", 'value' => "@href"},
  374. }
  375. }
  376. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  377. rel.user = users(:bob)
  378. rel.save!
  379. rel.check
  380. event = Event.last
  381. expect(event.payload['url']).to eq("http://xkcd.com/about")
  382. end
  383. it "should return an integer value if XPath evaluates to one" do
  384. rel_site = {
  385. 'name' => "XKCD",
  386. 'expected_update_period_in_days' => 2,
  387. 'type' => "html",
  388. 'url' => "http://xkcd.com",
  389. 'mode' => "on_change",
  390. 'extract' => {
  391. 'num_links' => {'css' => "#comicLinks", 'value' => "count(./a)"}
  392. }
  393. }
  394. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  395. rel.user = users(:bob)
  396. rel.save!
  397. rel.check
  398. event = Event.last
  399. expect(event.payload['num_links']).to eq("9")
  400. end
  401. it "should return all texts concatenated if XPath returns many text nodes" do
  402. rel_site = {
  403. 'name' => "XKCD",
  404. 'expected_update_period_in_days' => 2,
  405. 'type' => "html",
  406. 'url' => "http://xkcd.com",
  407. 'mode' => "on_change",
  408. 'extract' => {
  409. 'slogan' => {'css' => "#slogan", 'value' => ".//text()"}
  410. }
  411. }
  412. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  413. rel.user = users(:bob)
  414. rel.save!
  415. rel.check
  416. event = Event.last
  417. expect(event.payload['slogan']).to eq("A webcomic of romance, sarcasm, math, and language.")
  418. end
  419. it "should interpolate _response_" do
  420. @valid_options['extract']['response_info'] =
  421. @valid_options['extract']['url'].merge(
  422. 'value' => '"{{ "The reponse was " | append:_response_.status | append:" " | append:_response_.headers.X-Status-Message | append:"." }}"'
  423. )
  424. @checker.options = @valid_options
  425. @checker.check
  426. event = Event.last
  427. expect(event.payload['response_info']).to eq('The reponse was 200 OK.')
  428. end
  429. describe "XML" do
  430. before do
  431. stub_request(:any, /github_rss/).to_return(
  432. body: File.read(Rails.root.join("spec/data_fixtures/github_rss.atom")),
  433. status: 200
  434. )
  435. @checker = Agents::WebsiteAgent.new(name: 'github', options: {
  436. 'name' => 'GitHub',
  437. 'expected_update_period_in_days' => '2',
  438. 'type' => 'xml',
  439. 'url' => 'http://example.com/github_rss.atom',
  440. 'mode' => 'on_change',
  441. 'extract' => {
  442. 'title' => { 'xpath' => '/feed/entry', 'value' => 'normalize-space(./title)' },
  443. 'url' => { 'xpath' => '/feed/entry', 'value' => './link[1]/@href' },
  444. 'thumbnail' => { 'xpath' => '/feed/entry', 'value' => './thumbnail/@url' },
  445. }
  446. }, keep_events_for: 2.days)
  447. @checker.user = users(:bob)
  448. @checker.save!
  449. end
  450. it "works with XPath" do
  451. expect {
  452. @checker.check
  453. }.to change { Event.count }.by(20)
  454. event = Event.last
  455. expect(event.payload['title']).to eq('Shift to dev group')
  456. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  457. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  458. end
  459. it "works with XPath with namespaces unstripped" do
  460. @checker.options['use_namespaces'] = 'true'
  461. @checker.save!
  462. expect {
  463. @checker.check
  464. }.to change { Event.count }.by(0)
  465. @checker.options['extract'] = {
  466. 'title' => { 'xpath' => '/xmlns:feed/xmlns:entry', 'value' => 'normalize-space(./xmlns:title)' },
  467. 'url' => { 'xpath' => '/xmlns:feed/xmlns:entry', 'value' => './xmlns:link[1]/@href' },
  468. 'thumbnail' => { 'xpath' => '/xmlns:feed/xmlns:entry', 'value' => './media:thumbnail/@url' },
  469. }
  470. @checker.save!
  471. expect {
  472. @checker.check
  473. }.to change { Event.count }.by(20)
  474. event = Event.last
  475. expect(event.payload['title']).to eq('Shift to dev group')
  476. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  477. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  478. end
  479. it "works with CSS selectors" do
  480. @checker.options['extract'] = {
  481. 'title' => { 'css' => 'feed > entry', 'value' => 'normalize-space(./title)' },
  482. 'url' => { 'css' => 'feed > entry', 'value' => './link[1]/@href' },
  483. 'thumbnail' => { 'css' => 'feed > entry', 'value' => './thumbnail/@url' },
  484. }
  485. @checker.save!
  486. expect {
  487. @checker.check
  488. }.to change { Event.count }.by(20)
  489. event = Event.last
  490. expect(event.payload['title']).to be_empty
  491. expect(event.payload['thumbnail']).to be_empty
  492. @checker.options['extract'] = {
  493. 'title' => { 'css' => 'feed > entry', 'value' => 'normalize-space(./xmlns:title)' },
  494. 'url' => { 'css' => 'feed > entry', 'value' => './xmlns:link[1]/@href' },
  495. 'thumbnail' => { 'css' => 'feed > entry', 'value' => './media:thumbnail/@url' },
  496. }
  497. @checker.save!
  498. expect {
  499. @checker.check
  500. }.to change { Event.count }.by(20)
  501. event = Event.last
  502. expect(event.payload['title']).to eq('Shift to dev group')
  503. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  504. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  505. end
  506. it "works with CSS selectors with namespaces stripped" do
  507. @checker.options['extract'] = {
  508. 'title' => { 'css' => 'feed > entry', 'value' => 'normalize-space(./title)' },
  509. 'url' => { 'css' => 'feed > entry', 'value' => './link[1]/@href' },
  510. 'thumbnail' => { 'css' => 'feed > entry', 'value' => './thumbnail/@url' },
  511. }
  512. @checker.options['use_namespaces'] = 'false'
  513. @checker.save!
  514. expect {
  515. @checker.check
  516. }.to change { Event.count }.by(20)
  517. event = Event.last
  518. expect(event.payload['title']).to eq('Shift to dev group')
  519. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  520. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  521. end
  522. end
  523. describe "XML with cdata" do
  524. before do
  525. stub_request(:any, /cdata_rss/).to_return(
  526. body: File.read(Rails.root.join("spec/data_fixtures/cdata_rss.atom")),
  527. status: 200
  528. )
  529. @checker = Agents::WebsiteAgent.new(name: 'cdata', options: {
  530. 'name' => 'CDATA',
  531. 'expected_update_period_in_days' => '2',
  532. 'type' => 'xml',
  533. 'url' => 'http://example.com/cdata_rss.atom',
  534. 'mode' => 'on_change',
  535. 'extract' => {
  536. 'author' => { 'xpath' => '/feed/entry/author/name', 'value' => './/text()'},
  537. 'title' => { 'xpath' => '/feed/entry/title', 'value' => './/text()' },
  538. 'content' => { 'xpath' => '/feed/entry/content', 'value' => './/text()' },
  539. }
  540. }, keep_events_for: 2.days)
  541. @checker.user = users(:bob)
  542. @checker.save!
  543. end
  544. it "works with XPath" do
  545. expect {
  546. @checker.check
  547. }.to change { Event.count }.by(10)
  548. event = Event.last
  549. expect(event.payload['author']).to eq('bill98')
  550. expect(event.payload['title']).to eq('Help: Rainmeter Skins • Test if Today is Between 2 Dates')
  551. expect(event.payload['content']).to start_with('Can I ')
  552. end
  553. end
  554. describe "JSON" do
  555. it "works with paths" do
  556. json = {
  557. 'response' => {
  558. 'version' => 2,
  559. 'title' => "hello!"
  560. }
  561. }
  562. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  563. site = {
  564. 'name' => "Some JSON Response",
  565. 'expected_update_period_in_days' => "2",
  566. 'type' => "json",
  567. 'url' => "http://json-site.com",
  568. 'mode' => 'on_change',
  569. 'extract' => {
  570. 'version' => {'path' => "response.version"},
  571. 'title' => {'path' => "response.title"}
  572. }
  573. }
  574. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  575. checker.user = users(:bob)
  576. checker.save!
  577. checker.check
  578. event = Event.last
  579. expect(event.payload['version']).to eq(2)
  580. expect(event.payload['title']).to eq("hello!")
  581. end
  582. it "can handle arrays" do
  583. json = {
  584. 'response' => {
  585. 'data' => [
  586. {'title' => "first", 'version' => 2},
  587. {'title' => "second", 'version' => 2.5}
  588. ]
  589. }
  590. }
  591. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  592. site = {
  593. 'name' => "Some JSON Response",
  594. 'expected_update_period_in_days' => "2",
  595. 'type' => "json",
  596. 'url' => "http://json-site.com",
  597. 'mode' => 'on_change',
  598. 'extract' => {
  599. :title => {'path' => "response.data[*].title"},
  600. :version => {'path' => "response.data[*].version"}
  601. }
  602. }
  603. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  604. checker.user = users(:bob)
  605. checker.save!
  606. expect {
  607. checker.check
  608. }.to change { Event.count }.by(2)
  609. (event2, event1) = Event.last(2)
  610. expect(event1.payload['version']).to eq(2.5)
  611. expect(event1.payload['title']).to eq("second")
  612. expect(event2.payload['version']).to eq(2)
  613. expect(event2.payload['title']).to eq("first")
  614. end
  615. it "stores the whole object if :extract is not specified" do
  616. json = {
  617. 'response' => {
  618. 'version' => 2,
  619. 'title' => "hello!"
  620. }
  621. }
  622. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  623. site = {
  624. 'name' => "Some JSON Response",
  625. 'expected_update_period_in_days' => "2",
  626. 'type' => "json",
  627. 'url' => "http://json-site.com",
  628. 'mode' => 'on_change'
  629. }
  630. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  631. checker.user = users(:bob)
  632. checker.save!
  633. checker.check
  634. event = Event.last
  635. expect(event.payload['response']['version']).to eq(2)
  636. expect(event.payload['response']['title']).to eq("hello!")
  637. end
  638. end
  639. describe "text parsing" do
  640. before do
  641. stub_request(:any, /text-site/).to_return(body: <<-EOF, status: 200)
  642. water: wet
  643. fire: hot
  644. EOF
  645. site = {
  646. 'name' => 'Some Text Response',
  647. 'expected_update_period_in_days' => '2',
  648. 'type' => 'text',
  649. 'url' => 'http://text-site.com',
  650. 'mode' => 'on_change',
  651. 'extract' => {
  652. 'word' => { 'regexp' => '^(.+?): (.+)$', index: 1 },
  653. 'property' => { 'regexp' => '^(.+?): (.+)$', index: '2' },
  654. }
  655. }
  656. @checker = Agents::WebsiteAgent.new(name: 'Text Site', options: site)
  657. @checker.user = users(:bob)
  658. @checker.save!
  659. end
  660. it "works with regexp with named capture" do
  661. @checker.options = @checker.options.merge('extract' => {
  662. 'word' => { 'regexp' => '^(?<word>.+?): (?<property>.+)$', index: 'word' },
  663. 'property' => { 'regexp' => '^(?<word>.+?): (?<property>.+)$', index: 'property' },
  664. })
  665. expect {
  666. @checker.check
  667. }.to change { Event.count }.by(2)
  668. event1, event2 = Event.last(2)
  669. expect(event1.payload['word']).to eq('water')
  670. expect(event1.payload['property']).to eq('wet')
  671. expect(event2.payload['word']).to eq('fire')
  672. expect(event2.payload['property']).to eq('hot')
  673. end
  674. it "works with regexp" do
  675. expect {
  676. @checker.check
  677. }.to change { Event.count }.by(2)
  678. event1, event2 = Event.last(2)
  679. expect(event1.payload['word']).to eq('water')
  680. expect(event1.payload['property']).to eq('wet')
  681. expect(event2.payload['word']).to eq('fire')
  682. expect(event2.payload['property']).to eq('hot')
  683. end
  684. end
  685. end
  686. describe "#receive" do
  687. describe "with a url or url_from_event" do
  688. before do
  689. @event = Event.new
  690. @event.agent = agents(:bob_rain_notifier_agent)
  691. @event.payload = {
  692. 'url' => 'http://foo.com',
  693. 'link' => 'Random'
  694. }
  695. end
  696. it "should use url_from_event as the url to scrape" do
  697. stub = stub_request(:any, 'http://example.org/?url=http%3A%2F%2Ffoo.com')
  698. @checker.options = @valid_options.merge(
  699. 'url_from_event' => 'http://example.org/?url={{url | uri_escape}}'
  700. )
  701. @checker.receive([@event])
  702. expect(stub).to have_been_requested
  703. end
  704. it "should use the Agent's `url` option if url_from_event is not set" do
  705. expect {
  706. @checker.options = @valid_options
  707. @checker.receive([@event])
  708. }.to change { Event.count }.by(1)
  709. end
  710. it "should allow url_from_event to be an array of urls" do
  711. stub1 = stub_request(:any, 'http://example.org/?url=http%3A%2F%2Ffoo.com')
  712. stub2 = stub_request(:any, 'http://google.org/?url=http%3A%2F%2Ffoo.com')
  713. @checker.options = @valid_options.merge(
  714. 'url_from_event' => ['http://example.org/?url={{url | uri_escape}}', 'http://google.org/?url={{url | uri_escape}}']
  715. )
  716. @checker.receive([@event])
  717. expect(stub1).to have_been_requested
  718. expect(stub2).to have_been_requested
  719. end
  720. it "should interpolate values from incoming event payload" do
  721. stub_request(:any, /foo/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), status: 200)
  722. expect {
  723. @valid_options['url_from_event'] = '{{ url }}'
  724. @valid_options['extract'] = {
  725. 'from' => {
  726. 'xpath' => '*[1]',
  727. 'value' => '{{url | to_xpath}}'
  728. },
  729. 'to' => {
  730. 'xpath' => '(//a[@href and text()={{link | to_xpath}}])[1]',
  731. 'value' => '@href'
  732. },
  733. }
  734. @checker.options = @valid_options
  735. @checker.receive([@event])
  736. }.to change { Event.count }.by(1)
  737. expect(Event.last.payload).to eq({
  738. 'from' => 'http://foo.com',
  739. 'to' => 'http://dynamic.xkcd.com/random/comic/',
  740. })
  741. end
  742. it "should use the options url if no url is in the event payload, and `url_from_event` is not provided" do
  743. @checker.options['mode'] = 'merge'
  744. @event.payload.delete('url')
  745. expect {
  746. @checker.receive([@event])
  747. }.to change { Event.count }.by(1)
  748. expect(Event.last.payload['title']).to eq('Evolving')
  749. expect(Event.last.payload['link']).to eq('Random')
  750. end
  751. it "should interpolate values from incoming event payload and _response_" do
  752. @event.payload['title'] = 'XKCD'
  753. expect {
  754. @valid_options['extract'] = {
  755. 'response_info' => @valid_options['extract']['url'].merge(
  756. 'value' => '{% capture sentence %}The reponse from {{title}} was {{_response_.status}} {{_response_.headers.X-Status-Message}}.{% endcapture %}{{sentence | to_xpath}}'
  757. )
  758. }
  759. @checker.options = @valid_options
  760. @checker.receive([@event])
  761. }.to change { Event.count }.by(1)
  762. expect(Event.last.payload['response_info']).to eq('The reponse from XKCD was 200 OK.')
  763. end
  764. it "should support merging of events" do
  765. expect {
  766. @checker.options = @valid_options
  767. @checker.options[:mode] = "merge"
  768. @checker.receive([@event])
  769. }.to change { Event.count }.by(1)
  770. last_payload = Event.last.payload
  771. expect(last_payload['link']).to eq('Random')
  772. end
  773. end
  774. describe "with a data_from_event" do
  775. describe "with json data" do
  776. before do
  777. @event = Event.new
  778. @event.agent = agents(:bob_rain_notifier_agent)
  779. @event.payload = {
  780. 'something' => 'some value',
  781. 'some_object' => {
  782. 'some_data' => { hello: 'world' }.to_json
  783. }
  784. }
  785. @event.save!
  786. @checker.options = @valid_options.merge(
  787. 'type' => 'json',
  788. 'data_from_event' => '{{ some_object.some_data }}',
  789. 'extract' => {
  790. 'value' => { 'path' => 'hello' }
  791. }
  792. )
  793. end
  794. it "should extract from the event data in the incoming event payload" do
  795. expect {
  796. @checker.receive([@event])
  797. }.to change { Event.count }.by(1)
  798. expect(@checker.events.last.payload).to eq({ 'value' => 'world' })
  799. end
  800. it "should support merge mode" do
  801. @checker.options['mode'] = "merge"
  802. expect {
  803. @checker.receive([@event])
  804. }.to change { Event.count }.by(1)
  805. expect(@checker.events.last.payload).to eq(@event.payload.merge('value' => 'world'))
  806. end
  807. it "should output an error when nothing can be found at the path" do
  808. @checker.options = @checker.options.merge(
  809. 'data_from_event' => '{{ some_object.mistake }}'
  810. )
  811. expect {
  812. @checker.receive([@event])
  813. }.to_not change { Event.count }
  814. expect(@checker.logs.last.message).to match(/No data was found in the Event payload using the template {{ some_object\.mistake }}/)
  815. end
  816. it "should output an error when the data cannot be parsed" do
  817. @event.update_attribute :payload, @event.payload.merge('some_object' => { 'some_data' => '{invalid json' })
  818. expect {
  819. @checker.receive([@event])
  820. }.to_not change { Event.count }
  821. expect(@checker.logs.last.message).to match(/Error when handling event data:/)
  822. end
  823. end
  824. describe "with HTML data" do
  825. before do
  826. @event = Event.new
  827. @event.agent = agents(:bob_rain_notifier_agent)
  828. @event.payload = {
  829. 'url' => 'http://xkcd.com',
  830. 'some_object' => {
  831. 'some_data' => "<div><span class='title'>Title!</span><span class='body'>Body!</span></div>"
  832. }
  833. }
  834. @event.save!
  835. @checker.options = @valid_options.merge(
  836. 'type' => 'html',
  837. 'data_from_event' => '{{ some_object.some_data }}',
  838. 'extract' => {
  839. 'title' => { 'css' => ".title", 'value' => ".//text()" },
  840. 'body' => { 'css' => "div span.body", 'value' => ".//text()" }
  841. }
  842. )
  843. end
  844. it "should extract from the event data in the incoming event payload" do
  845. expect {
  846. @checker.receive([@event])
  847. }.to change { Event.count }.by(1)
  848. expect(@checker.events.last.payload).to eq({ 'title' => 'Title!', 'body' => 'Body!' })
  849. end
  850. end
  851. end
  852. end
  853. end
  854. describe "checking with http basic auth" do
  855. before do
  856. stub_request(:any, /example/).
  857. with(headers: { 'Authorization' => "Basic #{['user:pass'].pack('m').chomp}" }).
  858. to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
  859. @valid_options = {
  860. 'name' => "XKCD",
  861. 'expected_update_period_in_days' => "2",
  862. 'type' => "html",
  863. 'url' => "http://www.example.com",
  864. 'mode' => 'on_change',
  865. 'extract' => {
  866. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  867. 'title' => { 'css' => "#comic img", 'value' => "@alt" },
  868. 'hovertext' => { 'css' => "#comic img", 'value' => "@title" }
  869. },
  870. 'basic_auth' => "user:pass"
  871. }
  872. @checker = Agents::WebsiteAgent.new(:name => "auth", :options => @valid_options)
  873. @checker.user = users(:bob)
  874. @checker.save!
  875. end
  876. describe "#check" do
  877. it "should check for changes" do
  878. expect { @checker.check }.to change { Event.count }.by(1)
  879. expect { @checker.check }.not_to change { Event.count }
  880. end
  881. end
  882. end
  883. describe "checking with headers" do
  884. before do
  885. stub_request(:any, /example/).
  886. with(headers: { 'foo' => 'bar' }).
  887. to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
  888. @valid_options = {
  889. 'name' => "XKCD",
  890. 'expected_update_period_in_days' => "2",
  891. 'type' => "html",
  892. 'url' => "http://www.example.com",
  893. 'mode' => 'on_change',
  894. 'headers' => { 'foo' => 'bar' },
  895. 'extract' => {
  896. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  897. }
  898. }
  899. @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @valid_options)
  900. @checker.user = users(:bob)
  901. @checker.save!
  902. end
  903. describe "#check" do
  904. it "should check for changes" do
  905. expect { @checker.check }.to change { Event.count }.by(1)
  906. end
  907. end
  908. end
  909. describe "checking urls" do
  910. before do
  911. stub_request(:any, /example/).
  912. to_return(:body => File.read(Rails.root.join("spec/data_fixtures/urlTest.html")), :status => 200)
  913. @valid_options = {
  914. 'name' => "Url Test",
  915. 'expected_update_period_in_days' => "2",
  916. 'type' => "html",
  917. 'url' => "http://www.example.com",
  918. 'mode' => 'all',
  919. 'extract' => {
  920. 'url' => { 'css' => "a", 'value' => "@href" },
  921. }
  922. }
  923. @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @valid_options)
  924. @checker.user = users(:bob)
  925. @checker.save!
  926. end
  927. describe "#check" do
  928. before do
  929. expect { @checker.check }.to change { Event.count }.by(7)
  930. @events = Event.last(7)
  931. end
  932. it "should check hostname" do
  933. event = @events[0]
  934. expect(event.payload['url']).to eq("http://google.com")
  935. end
  936. it "should check unescaped query" do
  937. event = @events[1]
  938. expect(event.payload['url']).to eq("https://www.google.ca/search?q=some%20query")
  939. end
  940. it "should check properly escaped query" do
  941. event = @events[2]
  942. expect(event.payload['url']).to eq("https://www.google.ca/search?q=some%20query")
  943. end
  944. it "should check unescaped unicode url" do
  945. event = @events[3]
  946. expect(event.payload['url']).to eq("http://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  947. end
  948. it "should check unescaped unicode query" do
  949. event = @events[4]
  950. expect(event.payload['url']).to eq("https://www.google.ca/search?q=%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  951. end
  952. it "should check properly escaped unicode url" do
  953. event = @events[5]
  954. expect(event.payload['url']).to eq("http://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  955. end
  956. it "should check properly escaped unicode query" do
  957. event = @events[6]
  958. expect(event.payload['url']).to eq("https://www.google.ca/search?q=%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  959. end
  960. end
  961. end
  962. end