Няма описание http://j1x-huginn.herokuapp.com

website_agent_spec.rb 34KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978
  1. require 'rails_helper'
  2. describe Agents::WebsiteAgent do
  3. describe "checking without basic auth" do
  4. before do
  5. stub_request(:any, /xkcd/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")),
  6. status: 200,
  7. headers: {
  8. 'X-Status-Message' => 'OK'
  9. })
  10. @valid_options = {
  11. 'name' => "XKCD",
  12. 'expected_update_period_in_days' => "2",
  13. 'type' => "html",
  14. 'url' => "http://xkcd.com",
  15. 'mode' => 'on_change',
  16. 'extract' => {
  17. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  18. 'title' => { 'css' => "#comic img", 'value' => "@alt" },
  19. 'hovertext' => { 'css' => "#comic img", 'value' => "@title" }
  20. }
  21. }
  22. @checker = Agents::WebsiteAgent.new(:name => "xkcd", :options => @valid_options, :keep_events_for => 2.days)
  23. @checker.user = users(:bob)
  24. @checker.save!
  25. end
  26. it_behaves_like WebRequestConcern
  27. describe "validations" do
  28. before do
  29. expect(@checker).to be_valid
  30. end
  31. it "should validate the integer fields" do
  32. @checker.options['expected_update_period_in_days'] = "2"
  33. expect(@checker).to be_valid
  34. @checker.options['expected_update_period_in_days'] = "nonsense"
  35. expect(@checker).not_to be_valid
  36. end
  37. it "should validate uniqueness_look_back" do
  38. @checker.options['uniqueness_look_back'] = "nonsense"
  39. expect(@checker).not_to be_valid
  40. @checker.options['uniqueness_look_back'] = "2"
  41. expect(@checker).to be_valid
  42. end
  43. it "should validate mode" do
  44. @checker.options['mode'] = "nonsense"
  45. expect(@checker).not_to be_valid
  46. @checker.options['mode'] = "on_change"
  47. expect(@checker).to be_valid
  48. @checker.options['mode'] = "all"
  49. expect(@checker).to be_valid
  50. @checker.options['mode'] = ""
  51. expect(@checker).to be_valid
  52. end
  53. it "should validate the force_encoding option" do
  54. @checker.options['force_encoding'] = ''
  55. expect(@checker).to be_valid
  56. @checker.options['force_encoding'] = 'UTF-8'
  57. expect(@checker).to be_valid
  58. @checker.options['force_encoding'] = ['UTF-8']
  59. expect(@checker).not_to be_valid
  60. @checker.options['force_encoding'] = 'UTF-42'
  61. expect(@checker).not_to be_valid
  62. end
  63. context "in 'json' type" do
  64. it "should ensure that all extractions have a 'path'" do
  65. @checker.options['type'] = 'json'
  66. @checker.options['extract'] = {
  67. 'url' => { 'foo' => 'bar' },
  68. }
  69. expect(@checker).to_not be_valid
  70. expect(@checker.errors_on(:base)).to include(/When type is json, all extractions must have a path attribute/)
  71. @checker.options['type'] = 'json'
  72. @checker.options['extract'] = {
  73. 'url' => { 'path' => 'bar' },
  74. }
  75. expect(@checker).to be_valid
  76. end
  77. end
  78. end
  79. describe "#check" do
  80. it "should check for changes (and update Event.expires_at)" do
  81. expect { @checker.check }.to change { Event.count }.by(1)
  82. event = Event.last
  83. sleep 2
  84. expect { @checker.check }.not_to change { Event.count }
  85. update_event = Event.last
  86. expect(update_event.expires_at).not_to eq(event.expires_at)
  87. end
  88. it "should always save events when in :all mode" do
  89. expect {
  90. @valid_options['mode'] = 'all'
  91. @checker.options = @valid_options
  92. @checker.check
  93. @checker.check
  94. }.to change { Event.count }.by(2)
  95. end
  96. it "should take uniqueness_look_back into account during deduplication" do
  97. @valid_options['mode'] = 'all'
  98. @checker.options = @valid_options
  99. @checker.check
  100. @checker.check
  101. event = Event.last
  102. event.payload = "{}"
  103. event.save
  104. expect {
  105. @valid_options['mode'] = 'on_change'
  106. @valid_options['uniqueness_look_back'] = 2
  107. @checker.options = @valid_options
  108. @checker.check
  109. }.not_to change { Event.count }
  110. expect {
  111. @valid_options['mode'] = 'on_change'
  112. @valid_options['uniqueness_look_back'] = 1
  113. @checker.options = @valid_options
  114. @checker.check
  115. }.to change { Event.count }.by(1)
  116. end
  117. it "should log an error if the number of results for a set of extraction patterns differs" do
  118. @valid_options['extract']['url']['css'] = "div"
  119. @checker.options = @valid_options
  120. @checker.check
  121. expect(@checker.logs.first.message).to match(/Got an uneven number of matches/)
  122. end
  123. it "should accept an array for url" do
  124. @valid_options['url'] = ["http://xkcd.com/1/", "http://xkcd.com/2/"]
  125. @checker.options = @valid_options
  126. expect { @checker.save! }.not_to raise_error;
  127. expect { @checker.check }.not_to raise_error;
  128. end
  129. it "should parse events from all urls in array" do
  130. expect {
  131. @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
  132. @valid_options['mode'] = 'all'
  133. @checker.options = @valid_options
  134. @checker.check
  135. }.to change { Event.count }.by(2)
  136. end
  137. it "should follow unique rules when parsing array of urls" do
  138. expect {
  139. @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
  140. @checker.options = @valid_options
  141. @checker.check
  142. }.to change { Event.count }.by(1)
  143. end
  144. end
  145. describe 'unzipping' do
  146. it 'should unzip automatically if the response has Content-Encoding: gzip' do
  147. json = {
  148. 'response' => {
  149. 'version' => 2,
  150. 'title' => "hello!"
  151. }
  152. }
  153. zipped = ActiveSupport::Gzip.compress(json.to_json)
  154. stub_request(:any, /gzip/).to_return(body: zipped, headers: { 'Content-Encoding' => 'gzip' }, status: 200)
  155. site = {
  156. 'name' => "Some JSON Response",
  157. 'expected_update_period_in_days' => "2",
  158. 'type' => "json",
  159. 'url' => "http://gzip.com",
  160. 'mode' => 'on_change',
  161. 'extract' => {
  162. 'version' => { 'path' => 'response.version' },
  163. },
  164. # no unzip option
  165. }
  166. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  167. checker.user = users(:bob)
  168. checker.save!
  169. checker.check
  170. event = Event.last
  171. expect(event.payload['version']).to eq(2)
  172. end
  173. it 'should unzip with unzip option' do
  174. json = {
  175. 'response' => {
  176. 'version' => 2,
  177. 'title' => "hello!"
  178. }
  179. }
  180. zipped = ActiveSupport::Gzip.compress(json.to_json)
  181. stub_request(:any, /gzip/).to_return(body: zipped, status: 200)
  182. site = {
  183. 'name' => "Some JSON Response",
  184. 'expected_update_period_in_days' => "2",
  185. 'type' => "json",
  186. 'url' => "http://gzip.com",
  187. 'mode' => 'on_change',
  188. 'extract' => {
  189. 'version' => { 'path' => 'response.version' },
  190. },
  191. 'unzip' => 'gzip',
  192. }
  193. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  194. checker.user = users(:bob)
  195. checker.save!
  196. checker.check
  197. event = Event.last
  198. expect(event.payload['version']).to eq(2)
  199. end
  200. it 'should either avoid or support a raw deflate stream (#1018)' do
  201. stub_request(:any, /deflate/).with(headers: { 'Accept-Encoding' => /\A(?!.*deflate)/ }).
  202. to_return(body: 'hello',
  203. status: 200)
  204. stub_request(:any, /deflate/).with(headers: { 'Accept-Encoding' => /deflate/ }).
  205. to_return(body: "\xcb\x48\xcd\xc9\xc9\x07\x00\x06\x2c".b,
  206. headers: { 'Content-Encoding' => 'deflate' },
  207. status: 200)
  208. site = {
  209. 'name' => 'Some Response',
  210. 'expected_update_period_in_days' => '2',
  211. 'type' => 'text',
  212. 'url' => 'http://deflate',
  213. 'mode' => 'on_change',
  214. 'extract' => {
  215. 'content' => { 'regexp' => '.+', 'index' => 0 }
  216. }
  217. }
  218. checker = Agents::WebsiteAgent.new(name: "Deflate Test", options: site)
  219. checker.user = users(:bob)
  220. checker.save!
  221. expect {
  222. checker.check
  223. }.to change { Event.count }.by(1)
  224. event = Event.last
  225. expect(event.payload['content']).to eq('hello')
  226. end
  227. end
  228. describe 'encoding' do
  229. it 'should be forced with force_encoding option' do
  230. huginn = "\u{601d}\u{8003}"
  231. stub_request(:any, /no-encoding/).to_return(body: {
  232. value: huginn,
  233. }.to_json.encode(Encoding::EUC_JP).b, headers: {
  234. 'Content-Type' => 'application/json',
  235. }, status: 200)
  236. site = {
  237. 'name' => "Some JSON Response",
  238. 'expected_update_period_in_days' => "2",
  239. 'type' => "json",
  240. 'url' => "http://no-encoding.example.com",
  241. 'mode' => 'on_change',
  242. 'extract' => {
  243. 'value' => { 'path' => 'value' },
  244. },
  245. 'force_encoding' => 'EUC-JP',
  246. }
  247. checker = Agents::WebsiteAgent.new(name: "No Encoding Site", options: site)
  248. checker.user = users(:bob)
  249. checker.save!
  250. expect { checker.check }.to change { Event.count }.by(1)
  251. event = Event.last
  252. expect(event.payload['value']).to eq(huginn)
  253. end
  254. it 'should be overridden with force_encoding option' do
  255. huginn = "\u{601d}\u{8003}"
  256. stub_request(:any, /wrong-encoding/).to_return(body: {
  257. value: huginn,
  258. }.to_json.encode(Encoding::EUC_JP).b, headers: {
  259. 'Content-Type' => 'application/json; UTF-8',
  260. }, status: 200)
  261. site = {
  262. 'name' => "Some JSON Response",
  263. 'expected_update_period_in_days' => "2",
  264. 'type' => "json",
  265. 'url' => "http://wrong-encoding.example.com",
  266. 'mode' => 'on_change',
  267. 'extract' => {
  268. 'value' => { 'path' => 'value' },
  269. },
  270. 'force_encoding' => 'EUC-JP',
  271. }
  272. checker = Agents::WebsiteAgent.new(name: "Wrong Encoding Site", options: site)
  273. checker.user = users(:bob)
  274. checker.save!
  275. expect { checker.check }.to change { Event.count }.by(1)
  276. event = Event.last
  277. expect(event.payload['value']).to eq(huginn)
  278. end
  279. it 'should be determined by charset in Content-Type' do
  280. huginn = "\u{601d}\u{8003}"
  281. stub_request(:any, /charset-euc-jp/).to_return(body: {
  282. value: huginn,
  283. }.to_json.encode(Encoding::EUC_JP), headers: {
  284. 'Content-Type' => 'application/json; charset=EUC-JP',
  285. }, status: 200)
  286. site = {
  287. 'name' => "Some JSON Response",
  288. 'expected_update_period_in_days' => "2",
  289. 'type' => "json",
  290. 'url' => "http://charset-euc-jp.example.com",
  291. 'mode' => 'on_change',
  292. 'extract' => {
  293. 'value' => { 'path' => 'value' },
  294. },
  295. }
  296. checker = Agents::WebsiteAgent.new(name: "Charset reader", options: site)
  297. checker.user = users(:bob)
  298. checker.save!
  299. expect { checker.check }.to change { Event.count }.by(1)
  300. event = Event.last
  301. expect(event.payload['value']).to eq(huginn)
  302. end
  303. it 'should default to UTF-8 when unknown charset is found' do
  304. huginn = "\u{601d}\u{8003}"
  305. stub_request(:any, /charset-unknown/).to_return(body: {
  306. value: huginn,
  307. }.to_json.b, headers: {
  308. 'Content-Type' => 'application/json; charset=unicode',
  309. }, status: 200)
  310. site = {
  311. 'name' => "Some JSON Response",
  312. 'expected_update_period_in_days' => "2",
  313. 'type' => "json",
  314. 'url' => "http://charset-unknown.example.com",
  315. 'mode' => 'on_change',
  316. 'extract' => {
  317. 'value' => { 'path' => 'value' },
  318. },
  319. }
  320. checker = Agents::WebsiteAgent.new(name: "Charset reader", options: site)
  321. checker.user = users(:bob)
  322. checker.save!
  323. expect { checker.check }.to change { Event.count }.by(1)
  324. event = Event.last
  325. expect(event.payload['value']).to eq(huginn)
  326. end
  327. end
  328. describe '#working?' do
  329. it 'checks if events have been received within the expected receive period' do
  330. stubbed_time = Time.now
  331. stub(Time).now { stubbed_time }
  332. expect(@checker).not_to be_working # No events created
  333. @checker.check
  334. expect(@checker.reload).to be_working # Just created events
  335. @checker.error "oh no!"
  336. expect(@checker.reload).not_to be_working # There is a recent error
  337. stubbed_time = 20.minutes.from_now
  338. @checker.events.delete_all
  339. @checker.check
  340. expect(@checker.reload).to be_working # There is a newer event now
  341. stubbed_time = 2.days.from_now
  342. expect(@checker.reload).not_to be_working # Two days have passed without a new event having been created
  343. end
  344. end
  345. describe "parsing" do
  346. it "parses CSS" do
  347. @checker.check
  348. event = Event.last
  349. expect(event.payload['url']).to eq("http://imgs.xkcd.com/comics/evolving.png")
  350. expect(event.payload['title']).to eq("Evolving")
  351. expect(event.payload['hovertext']).to match(/^Biologists play reverse/)
  352. end
  353. it "parses XPath" do
  354. @valid_options['extract'].each { |key, value|
  355. value.delete('css')
  356. value['xpath'] = "//*[@id='comic']//img"
  357. }
  358. @checker.options = @valid_options
  359. @checker.check
  360. event = Event.last
  361. expect(event.payload['url']).to eq("http://imgs.xkcd.com/comics/evolving.png")
  362. expect(event.payload['title']).to eq("Evolving")
  363. expect(event.payload['hovertext']).to match(/^Biologists play reverse/)
  364. end
  365. it "should turn relative urls to absolute" do
  366. rel_site = {
  367. 'name' => "XKCD",
  368. 'expected_update_period_in_days' => "2",
  369. 'type' => "html",
  370. 'url' => "http://xkcd.com",
  371. 'mode' => "on_change",
  372. 'extract' => {
  373. 'url' => {'css' => "#topLeft a", 'value' => "@href"},
  374. }
  375. }
  376. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  377. rel.user = users(:bob)
  378. rel.save!
  379. rel.check
  380. event = Event.last
  381. expect(event.payload['url']).to eq("http://xkcd.com/about")
  382. end
  383. it "should return an integer value if XPath evaluates to one" do
  384. rel_site = {
  385. 'name' => "XKCD",
  386. 'expected_update_period_in_days' => 2,
  387. 'type' => "html",
  388. 'url' => "http://xkcd.com",
  389. 'mode' => "on_change",
  390. 'extract' => {
  391. 'num_links' => {'css' => "#comicLinks", 'value' => "count(./a)"}
  392. }
  393. }
  394. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  395. rel.user = users(:bob)
  396. rel.save!
  397. rel.check
  398. event = Event.last
  399. expect(event.payload['num_links']).to eq("9")
  400. end
  401. it "should return all texts concatenated if XPath returns many text nodes" do
  402. rel_site = {
  403. 'name' => "XKCD",
  404. 'expected_update_period_in_days' => 2,
  405. 'type' => "html",
  406. 'url' => "http://xkcd.com",
  407. 'mode' => "on_change",
  408. 'extract' => {
  409. 'slogan' => {'css' => "#slogan", 'value' => ".//text()"}
  410. }
  411. }
  412. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  413. rel.user = users(:bob)
  414. rel.save!
  415. rel.check
  416. event = Event.last
  417. expect(event.payload['slogan']).to eq("A webcomic of romance, sarcasm, math, and language.")
  418. end
  419. it "should interpolate _response_" do
  420. @valid_options['extract']['response_info'] =
  421. @valid_options['extract']['url'].merge(
  422. 'value' => '"{{ "The reponse was " | append:_response_.status | append:" " | append:_response_.headers.X-Status-Message | append:"." }}"'
  423. )
  424. @checker.options = @valid_options
  425. @checker.check
  426. event = Event.last
  427. expect(event.payload['response_info']).to eq('The reponse was 200 OK.')
  428. end
  429. describe "XML" do
  430. before do
  431. stub_request(:any, /github_rss/).to_return(
  432. body: File.read(Rails.root.join("spec/data_fixtures/github_rss.atom")),
  433. status: 200
  434. )
  435. @checker = Agents::WebsiteAgent.new(name: 'github', options: {
  436. 'name' => 'GitHub',
  437. 'expected_update_period_in_days' => '2',
  438. 'type' => 'xml',
  439. 'url' => 'http://example.com/github_rss.atom',
  440. 'mode' => 'on_change',
  441. 'extract' => {
  442. 'title' => { 'xpath' => '/feed/entry', 'value' => 'normalize-space(./title)' },
  443. 'url' => { 'xpath' => '/feed/entry', 'value' => './link[1]/@href' },
  444. 'thumbnail' => { 'xpath' => '/feed/entry', 'value' => './thumbnail/@url' },
  445. }
  446. }, keep_events_for: 2.days)
  447. @checker.user = users(:bob)
  448. @checker.save!
  449. end
  450. it "works with XPath" do
  451. expect {
  452. @checker.check
  453. }.to change { Event.count }.by(20)
  454. event = Event.last
  455. expect(event.payload['title']).to eq('Shift to dev group')
  456. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  457. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  458. end
  459. it "works with XPath with namespaces unstripped" do
  460. @checker.options['use_namespaces'] = 'true'
  461. @checker.save!
  462. expect {
  463. @checker.check
  464. }.to change { Event.count }.by(0)
  465. @checker.options['extract'] = {
  466. 'title' => { 'xpath' => '/xmlns:feed/xmlns:entry', 'value' => 'normalize-space(./xmlns:title)' },
  467. 'url' => { 'xpath' => '/xmlns:feed/xmlns:entry', 'value' => './xmlns:link[1]/@href' },
  468. 'thumbnail' => { 'xpath' => '/xmlns:feed/xmlns:entry', 'value' => './media:thumbnail/@url' },
  469. }
  470. @checker.save!
  471. expect {
  472. @checker.check
  473. }.to change { Event.count }.by(20)
  474. event = Event.last
  475. expect(event.payload['title']).to eq('Shift to dev group')
  476. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  477. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  478. end
  479. it "works with CSS selectors" do
  480. @checker.options['extract'] = {
  481. 'title' => { 'css' => 'feed > entry', 'value' => 'normalize-space(./title)' },
  482. 'url' => { 'css' => 'feed > entry', 'value' => './link[1]/@href' },
  483. 'thumbnail' => { 'css' => 'feed > entry', 'value' => './thumbnail/@url' },
  484. }
  485. @checker.save!
  486. expect {
  487. @checker.check
  488. }.to change { Event.count }.by(20)
  489. event = Event.last
  490. expect(event.payload['title']).to be_empty
  491. expect(event.payload['thumbnail']).to be_empty
  492. @checker.options['extract'] = {
  493. 'title' => { 'css' => 'feed > entry', 'value' => 'normalize-space(./xmlns:title)' },
  494. 'url' => { 'css' => 'feed > entry', 'value' => './xmlns:link[1]/@href' },
  495. 'thumbnail' => { 'css' => 'feed > entry', 'value' => './media:thumbnail/@url' },
  496. }
  497. @checker.save!
  498. expect {
  499. @checker.check
  500. }.to change { Event.count }.by(20)
  501. event = Event.last
  502. expect(event.payload['title']).to eq('Shift to dev group')
  503. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  504. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  505. end
  506. it "works with CSS selectors with namespaces stripped" do
  507. @checker.options['extract'] = {
  508. 'title' => { 'css' => 'feed > entry', 'value' => 'normalize-space(./title)' },
  509. 'url' => { 'css' => 'feed > entry', 'value' => './link[1]/@href' },
  510. 'thumbnail' => { 'css' => 'feed > entry', 'value' => './thumbnail/@url' },
  511. }
  512. @checker.options['use_namespaces'] = 'false'
  513. @checker.save!
  514. expect {
  515. @checker.check
  516. }.to change { Event.count }.by(20)
  517. event = Event.last
  518. expect(event.payload['title']).to eq('Shift to dev group')
  519. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  520. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  521. end
  522. end
  523. describe "XML with cdata" do
  524. before do
  525. stub_request(:any, /cdata_rss/).to_return(
  526. body: File.read(Rails.root.join("spec/data_fixtures/cdata_rss.atom")),
  527. status: 200
  528. )
  529. @checker = Agents::WebsiteAgent.new(name: 'cdata', options: {
  530. 'name' => 'CDATA',
  531. 'expected_update_period_in_days' => '2',
  532. 'type' => 'xml',
  533. 'url' => 'http://example.com/cdata_rss.atom',
  534. 'mode' => 'on_change',
  535. 'extract' => {
  536. 'author' => { 'xpath' => '/feed/entry/author/name', 'value' => './/text()'},
  537. 'title' => { 'xpath' => '/feed/entry/title', 'value' => './/text()' },
  538. 'content' => { 'xpath' => '/feed/entry/content', 'value' => './/text()' },
  539. }
  540. }, keep_events_for: 2.days)
  541. @checker.user = users(:bob)
  542. @checker.save!
  543. end
  544. it "works with XPath" do
  545. expect {
  546. @checker.check
  547. }.to change { Event.count }.by(10)
  548. event = Event.last
  549. expect(event.payload['author']).to eq('bill98')
  550. expect(event.payload['title']).to eq('Help: Rainmeter Skins • Test if Today is Between 2 Dates')
  551. expect(event.payload['content']).to start_with('Can I ')
  552. end
  553. end
  554. describe "JSON" do
  555. it "works with paths" do
  556. json = {
  557. 'response' => {
  558. 'version' => 2,
  559. 'title' => "hello!"
  560. }
  561. }
  562. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  563. site = {
  564. 'name' => "Some JSON Response",
  565. 'expected_update_period_in_days' => "2",
  566. 'type' => "json",
  567. 'url' => "http://json-site.com",
  568. 'mode' => 'on_change',
  569. 'extract' => {
  570. 'version' => {'path' => "response.version"},
  571. 'title' => {'path' => "response.title"}
  572. }
  573. }
  574. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  575. checker.user = users(:bob)
  576. checker.save!
  577. checker.check
  578. event = Event.last
  579. expect(event.payload['version']).to eq(2)
  580. expect(event.payload['title']).to eq("hello!")
  581. end
  582. it "can handle arrays" do
  583. json = {
  584. 'response' => {
  585. 'data' => [
  586. {'title' => "first", 'version' => 2},
  587. {'title' => "second", 'version' => 2.5}
  588. ]
  589. }
  590. }
  591. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  592. site = {
  593. 'name' => "Some JSON Response",
  594. 'expected_update_period_in_days' => "2",
  595. 'type' => "json",
  596. 'url' => "http://json-site.com",
  597. 'mode' => 'on_change',
  598. 'extract' => {
  599. :title => {'path' => "response.data[*].title"},
  600. :version => {'path' => "response.data[*].version"}
  601. }
  602. }
  603. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  604. checker.user = users(:bob)
  605. checker.save!
  606. expect {
  607. checker.check
  608. }.to change { Event.count }.by(2)
  609. (event2, event1) = Event.last(2)
  610. expect(event1.payload['version']).to eq(2.5)
  611. expect(event1.payload['title']).to eq("second")
  612. expect(event2.payload['version']).to eq(2)
  613. expect(event2.payload['title']).to eq("first")
  614. end
  615. it "stores the whole object if :extract is not specified" do
  616. json = {
  617. 'response' => {
  618. 'version' => 2,
  619. 'title' => "hello!"
  620. }
  621. }
  622. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  623. site = {
  624. 'name' => "Some JSON Response",
  625. 'expected_update_period_in_days' => "2",
  626. 'type' => "json",
  627. 'url' => "http://json-site.com",
  628. 'mode' => 'on_change'
  629. }
  630. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  631. checker.user = users(:bob)
  632. checker.save!
  633. checker.check
  634. event = Event.last
  635. expect(event.payload['response']['version']).to eq(2)
  636. expect(event.payload['response']['title']).to eq("hello!")
  637. end
  638. end
  639. describe "text parsing" do
  640. before do
  641. stub_request(:any, /text-site/).to_return(body: <<-EOF, status: 200)
  642. water: wet
  643. fire: hot
  644. EOF
  645. site = {
  646. 'name' => 'Some Text Response',
  647. 'expected_update_period_in_days' => '2',
  648. 'type' => 'text',
  649. 'url' => 'http://text-site.com',
  650. 'mode' => 'on_change',
  651. 'extract' => {
  652. 'word' => { 'regexp' => '^(.+?): (.+)$', index: 1 },
  653. 'property' => { 'regexp' => '^(.+?): (.+)$', index: '2' },
  654. }
  655. }
  656. @checker = Agents::WebsiteAgent.new(name: 'Text Site', options: site)
  657. @checker.user = users(:bob)
  658. @checker.save!
  659. end
  660. it "works with regexp with named capture" do
  661. @checker.options = @checker.options.merge('extract' => {
  662. 'word' => { 'regexp' => '^(?<word>.+?): (?<property>.+)$', index: 'word' },
  663. 'property' => { 'regexp' => '^(?<word>.+?): (?<property>.+)$', index: 'property' },
  664. })
  665. expect {
  666. @checker.check
  667. }.to change { Event.count }.by(2)
  668. event1, event2 = Event.last(2)
  669. expect(event1.payload['word']).to eq('water')
  670. expect(event1.payload['property']).to eq('wet')
  671. expect(event2.payload['word']).to eq('fire')
  672. expect(event2.payload['property']).to eq('hot')
  673. end
  674. it "works with regexp" do
  675. expect {
  676. @checker.check
  677. }.to change { Event.count }.by(2)
  678. event1, event2 = Event.last(2)
  679. expect(event1.payload['word']).to eq('water')
  680. expect(event1.payload['property']).to eq('wet')
  681. expect(event2.payload['word']).to eq('fire')
  682. expect(event2.payload['property']).to eq('hot')
  683. end
  684. end
  685. end
  686. describe "#receive" do
  687. before do
  688. @event = Event.new
  689. @event.agent = agents(:bob_rain_notifier_agent)
  690. @event.payload = {
  691. 'url' => 'http://xkcd.com',
  692. 'link' => 'Random',
  693. }
  694. end
  695. it "should scrape from the url element in incoming event payload" do
  696. expect {
  697. @checker.options = @valid_options
  698. @checker.receive([@event])
  699. }.to change { Event.count }.by(1)
  700. end
  701. it "should use url_from_event as url to scrape if it exists when receiving an event" do
  702. stub = stub_request(:any, 'http://example.org/?url=http%3A%2F%2Fxkcd.com')
  703. @checker.options = @valid_options.merge(
  704. 'url_from_event' => 'http://example.org/?url={{url | uri_escape}}'
  705. )
  706. @checker.receive([@event])
  707. expect(stub).to have_been_requested
  708. end
  709. it "should allow url_from_event to be an array of urls" do
  710. stub1 = stub_request(:any, 'http://example.org/?url=http%3A%2F%2Fxkcd.com')
  711. stub2 = stub_request(:any, 'http://google.org/?url=http%3A%2F%2Fxkcd.com')
  712. @checker.options = @valid_options.merge(
  713. 'url_from_event' => ['http://example.org/?url={{url | uri_escape}}', 'http://google.org/?url={{url | uri_escape}}']
  714. )
  715. @checker.receive([@event])
  716. expect(stub1).to have_been_requested
  717. expect(stub2).to have_been_requested
  718. end
  719. it "should interpolate values from incoming event payload" do
  720. expect {
  721. @valid_options['extract'] = {
  722. 'from' => {
  723. 'xpath' => '*[1]',
  724. 'value' => '{{url | to_xpath}}'
  725. },
  726. 'to' => {
  727. 'xpath' => '(//a[@href and text()={{link | to_xpath}}])[1]',
  728. 'value' => '@href'
  729. },
  730. }
  731. @checker.options = @valid_options
  732. @checker.receive([@event])
  733. }.to change { Event.count }.by(1)
  734. expect(Event.last.payload).to eq({
  735. 'from' => 'http://xkcd.com',
  736. 'to' => 'http://dynamic.xkcd.com/random/comic/',
  737. })
  738. end
  739. it "should interpolate values from incoming event payload and _response_" do
  740. @event.payload['title'] = 'XKCD'
  741. expect {
  742. @valid_options['extract'] = {
  743. 'response_info' => @valid_options['extract']['url'].merge(
  744. 'value' => '{% capture sentence %}The reponse from {{title}} was {{_response_.status}} {{_response_.headers.X-Status-Message}}.{% endcapture %}{{sentence | to_xpath}}'
  745. )
  746. }
  747. @checker.options = @valid_options
  748. @checker.receive([@event])
  749. }.to change { Event.count }.by(1)
  750. expect(Event.last.payload['response_info']).to eq('The reponse from XKCD was 200 OK.')
  751. end
  752. it "should support merging of events" do
  753. expect {
  754. @checker.options = @valid_options
  755. @checker.options[:mode] = "merge"
  756. @checker.receive([@event])
  757. }.to change { Event.count }.by(1)
  758. last_payload = Event.last.payload
  759. expect(last_payload['link']).to eq('Random')
  760. end
  761. end
  762. end
  763. describe "checking with http basic auth" do
  764. before do
  765. stub_request(:any, /example/).
  766. with(headers: { 'Authorization' => "Basic #{['user:pass'].pack('m').chomp}" }).
  767. to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
  768. @valid_options = {
  769. 'name' => "XKCD",
  770. 'expected_update_period_in_days' => "2",
  771. 'type' => "html",
  772. 'url' => "http://www.example.com",
  773. 'mode' => 'on_change',
  774. 'extract' => {
  775. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  776. 'title' => { 'css' => "#comic img", 'value' => "@alt" },
  777. 'hovertext' => { 'css' => "#comic img", 'value' => "@title" }
  778. },
  779. 'basic_auth' => "user:pass"
  780. }
  781. @checker = Agents::WebsiteAgent.new(:name => "auth", :options => @valid_options)
  782. @checker.user = users(:bob)
  783. @checker.save!
  784. end
  785. describe "#check" do
  786. it "should check for changes" do
  787. expect { @checker.check }.to change { Event.count }.by(1)
  788. expect { @checker.check }.not_to change { Event.count }
  789. end
  790. end
  791. end
  792. describe "checking with headers" do
  793. before do
  794. stub_request(:any, /example/).
  795. with(headers: { 'foo' => 'bar' }).
  796. to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
  797. @valid_options = {
  798. 'name' => "XKCD",
  799. 'expected_update_period_in_days' => "2",
  800. 'type' => "html",
  801. 'url' => "http://www.example.com",
  802. 'mode' => 'on_change',
  803. 'headers' => { 'foo' => 'bar' },
  804. 'extract' => {
  805. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  806. }
  807. }
  808. @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @valid_options)
  809. @checker.user = users(:bob)
  810. @checker.save!
  811. end
  812. describe "#check" do
  813. it "should check for changes" do
  814. expect { @checker.check }.to change { Event.count }.by(1)
  815. end
  816. end
  817. end
  818. describe "checking urls" do
  819. before do
  820. stub_request(:any, /example/).
  821. to_return(:body => File.read(Rails.root.join("spec/data_fixtures/urlTest.html")), :status => 200)
  822. @valid_options = {
  823. 'name' => "Url Test",
  824. 'expected_update_period_in_days' => "2",
  825. 'type' => "html",
  826. 'url' => "http://www.example.com",
  827. 'mode' => 'all',
  828. 'extract' => {
  829. 'url' => { 'css' => "a", 'value' => "@href" },
  830. }
  831. }
  832. @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @valid_options)
  833. @checker.user = users(:bob)
  834. @checker.save!
  835. end
  836. describe "#check" do
  837. before do
  838. expect { @checker.check }.to change { Event.count }.by(7)
  839. @events = Event.last(7)
  840. end
  841. it "should check hostname" do
  842. event = @events[0]
  843. expect(event.payload['url']).to eq("http://google.com")
  844. end
  845. it "should check unescaped query" do
  846. event = @events[1]
  847. expect(event.payload['url']).to eq("https://www.google.ca/search?q=some%20query")
  848. end
  849. it "should check properly escaped query" do
  850. event = @events[2]
  851. expect(event.payload['url']).to eq("https://www.google.ca/search?q=some%20query")
  852. end
  853. it "should check unescaped unicode url" do
  854. event = @events[3]
  855. expect(event.payload['url']).to eq("http://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  856. end
  857. it "should check unescaped unicode query" do
  858. event = @events[4]
  859. expect(event.payload['url']).to eq("https://www.google.ca/search?q=%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  860. end
  861. it "should check properly escaped unicode url" do
  862. event = @events[5]
  863. expect(event.payload['url']).to eq("http://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  864. end
  865. it "should check properly escaped unicode query" do
  866. event = @events[6]
  867. expect(event.payload['url']).to eq("https://www.google.ca/search?q=%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  868. end
  869. end
  870. end
  871. end