website_agent_spec.rb 40KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133
  1. require 'rails_helper'
  2. describe Agents::WebsiteAgent do
  3. describe "checking without basic auth" do
  4. before do
  5. stub_request(:any, /xkcd/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")),
  6. status: 200,
  7. headers: {
  8. 'X-Status-Message' => 'OK'
  9. })
  10. @valid_options = {
  11. 'name' => "XKCD",
  12. 'expected_update_period_in_days' => "2",
  13. 'type' => "html",
  14. 'url' => "http://xkcd.com",
  15. 'mode' => 'on_change',
  16. 'extract' => {
  17. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  18. 'title' => { 'css' => "#comic img", 'value' => "@alt" },
  19. 'hovertext' => { 'css' => "#comic img", 'value' => "@title" }
  20. }
  21. }
  22. @checker = Agents::WebsiteAgent.new(:name => "xkcd", :options => @valid_options, :keep_events_for => 2.days)
  23. @checker.user = users(:bob)
  24. @checker.save!
  25. end
  26. it_behaves_like WebRequestConcern
  27. describe "validations" do
  28. before do
  29. expect(@checker).to be_valid
  30. end
  31. it "should validate the integer fields" do
  32. @checker.options['expected_update_period_in_days'] = "2"
  33. expect(@checker).to be_valid
  34. @checker.options['expected_update_period_in_days'] = "nonsense"
  35. expect(@checker).not_to be_valid
  36. end
  37. it 'should validate the consider_http_error_success fields' do
  38. @checker.options['consider_http_error_success'] = [404]
  39. expect(@checker).to be_valid
  40. @checker.options['consider_http_error_success'] = [404, 404]
  41. expect(@checker).not_to be_valid
  42. @checker.options['consider_http_error_success'] = [404.0]
  43. expect(@checker).not_to be_valid
  44. @checker.options['consider_http_error_success'] = ["not_a_code"]
  45. expect(@checker).not_to be_valid
  46. @checker.options['consider_http_error_success'] = []
  47. expect(@checker).not_to be_valid
  48. end
  49. it "should validate uniqueness_look_back" do
  50. @checker.options['uniqueness_look_back'] = "nonsense"
  51. expect(@checker).not_to be_valid
  52. @checker.options['uniqueness_look_back'] = "2"
  53. expect(@checker).to be_valid
  54. end
  55. it "should validate mode" do
  56. @checker.options['mode'] = "nonsense"
  57. expect(@checker).not_to be_valid
  58. @checker.options['mode'] = "on_change"
  59. expect(@checker).to be_valid
  60. @checker.options['mode'] = "all"
  61. expect(@checker).to be_valid
  62. @checker.options['mode'] = ""
  63. expect(@checker).to be_valid
  64. end
  65. it "should validate the force_encoding option" do
  66. @checker.options['force_encoding'] = ''
  67. expect(@checker).to be_valid
  68. @checker.options['force_encoding'] = 'UTF-8'
  69. expect(@checker).to be_valid
  70. @checker.options['force_encoding'] = ['UTF-8']
  71. expect(@checker).not_to be_valid
  72. @checker.options['force_encoding'] = 'UTF-42'
  73. expect(@checker).not_to be_valid
  74. end
  75. context "in 'json' type" do
  76. it "should ensure that all extractions have a 'path'" do
  77. @checker.options['type'] = 'json'
  78. @checker.options['extract'] = {
  79. 'url' => { 'foo' => 'bar' },
  80. }
  81. expect(@checker).to_not be_valid
  82. expect(@checker.errors_on(:base)).to include(/When type is json, all extractions must have a path attribute/)
  83. @checker.options['type'] = 'json'
  84. @checker.options['extract'] = {
  85. 'url' => { 'path' => 'bar' },
  86. }
  87. expect(@checker).to be_valid
  88. end
  89. end
  90. end
  91. describe "#check" do
  92. it "should check for changes (and update Event.expires_at)" do
  93. expect { @checker.check }.to change { Event.count }.by(1)
  94. event = Event.last
  95. sleep 2
  96. expect { @checker.check }.not_to change { Event.count }
  97. update_event = Event.last
  98. expect(update_event.expires_at).not_to eq(event.expires_at)
  99. end
  100. it "should always save events when in :all mode" do
  101. expect {
  102. @valid_options['mode'] = 'all'
  103. @checker.options = @valid_options
  104. @checker.check
  105. @checker.check
  106. }.to change { Event.count }.by(2)
  107. end
  108. it "should take uniqueness_look_back into account during deduplication" do
  109. @valid_options['mode'] = 'all'
  110. @checker.options = @valid_options
  111. @checker.check
  112. @checker.check
  113. event = Event.last
  114. event.payload = "{}"
  115. event.save
  116. expect {
  117. @valid_options['mode'] = 'on_change'
  118. @valid_options['uniqueness_look_back'] = 2
  119. @checker.options = @valid_options
  120. @checker.check
  121. }.not_to change { Event.count }
  122. expect {
  123. @valid_options['mode'] = 'on_change'
  124. @valid_options['uniqueness_look_back'] = 1
  125. @checker.options = @valid_options
  126. @checker.check
  127. }.to change { Event.count }.by(1)
  128. end
  129. it "should log an error if the number of results for a set of extraction patterns differs" do
  130. @valid_options['extract']['url']['css'] = "div"
  131. @checker.options = @valid_options
  132. @checker.check
  133. expect(@checker.logs.first.message).to match(/Got an uneven number of matches/)
  134. end
  135. it "should accept an array for url" do
  136. @valid_options['url'] = ["http://xkcd.com/1/", "http://xkcd.com/2/"]
  137. @checker.options = @valid_options
  138. expect { @checker.save! }.not_to raise_error;
  139. expect { @checker.check }.not_to raise_error;
  140. end
  141. it "should parse events from all urls in array" do
  142. expect {
  143. @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
  144. @valid_options['mode'] = 'all'
  145. @checker.options = @valid_options
  146. @checker.check
  147. }.to change { Event.count }.by(2)
  148. end
  149. it "should follow unique rules when parsing array of urls" do
  150. expect {
  151. @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
  152. @checker.options = @valid_options
  153. @checker.check
  154. }.to change { Event.count }.by(1)
  155. end
  156. end
  157. describe 'consider_http_error_success' do
  158. it 'should allow scraping from a 404 result' do
  159. json = {
  160. 'response' => {
  161. 'version' => 2,
  162. 'title' => "hello!"
  163. }
  164. }
  165. zipped = ActiveSupport::Gzip.compress(json.to_json)
  166. stub_request(:any, /gzip/).to_return(body: zipped, headers: { 'Content-Encoding' => 'gzip' }, status: 404)
  167. site = {
  168. 'name' => "Some JSON Response",
  169. 'expected_update_period_in_days' => "2",
  170. 'type' => "json",
  171. 'url' => "http://gzip.com",
  172. 'mode' => 'on_change',
  173. 'consider_http_error_success' => [404],
  174. 'extract' => {
  175. 'version' => { 'path' => 'response.version' },
  176. },
  177. # no unzip option
  178. }
  179. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  180. checker.user = users(:bob)
  181. checker.save!
  182. checker.check
  183. event = Event.last
  184. expect(event.payload['version']).to eq(2)
  185. end
  186. end
  187. describe 'unzipping' do
  188. it 'should unzip automatically if the response has Content-Encoding: gzip' do
  189. json = {
  190. 'response' => {
  191. 'version' => 2,
  192. 'title' => "hello!"
  193. }
  194. }
  195. zipped = ActiveSupport::Gzip.compress(json.to_json)
  196. stub_request(:any, /gzip/).to_return(body: zipped, headers: { 'Content-Encoding' => 'gzip' }, status: 200)
  197. site = {
  198. 'name' => "Some JSON Response",
  199. 'expected_update_period_in_days' => "2",
  200. 'type' => "json",
  201. 'url' => "http://gzip.com",
  202. 'mode' => 'on_change',
  203. 'extract' => {
  204. 'version' => { 'path' => 'response.version' },
  205. },
  206. # no unzip option
  207. }
  208. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  209. checker.user = users(:bob)
  210. checker.save!
  211. checker.check
  212. event = Event.last
  213. expect(event.payload['version']).to eq(2)
  214. end
  215. it 'should unzip with unzip option' do
  216. json = {
  217. 'response' => {
  218. 'version' => 2,
  219. 'title' => "hello!"
  220. }
  221. }
  222. zipped = ActiveSupport::Gzip.compress(json.to_json)
  223. stub_request(:any, /gzip/).to_return(body: zipped, status: 200)
  224. site = {
  225. 'name' => "Some JSON Response",
  226. 'expected_update_period_in_days' => "2",
  227. 'type' => "json",
  228. 'url' => "http://gzip.com",
  229. 'mode' => 'on_change',
  230. 'extract' => {
  231. 'version' => { 'path' => 'response.version' },
  232. },
  233. 'unzip' => 'gzip',
  234. }
  235. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  236. checker.user = users(:bob)
  237. checker.save!
  238. checker.check
  239. event = Event.last
  240. expect(event.payload['version']).to eq(2)
  241. end
  242. it 'should either avoid or support a raw deflate stream (#1018)' do
  243. stub_request(:any, /deflate/).with(headers: { 'Accept-Encoding' => /\A(?!.*deflate)/ }).
  244. to_return(body: 'hello',
  245. status: 200)
  246. stub_request(:any, /deflate/).with(headers: { 'Accept-Encoding' => /deflate/ }).
  247. to_return(body: "\xcb\x48\xcd\xc9\xc9\x07\x00\x06\x2c".b,
  248. headers: { 'Content-Encoding' => 'deflate' },
  249. status: 200)
  250. site = {
  251. 'name' => 'Some Response',
  252. 'expected_update_period_in_days' => '2',
  253. 'type' => 'text',
  254. 'url' => 'http://deflate',
  255. 'mode' => 'on_change',
  256. 'extract' => {
  257. 'content' => { 'regexp' => '.+', 'index' => 0 }
  258. }
  259. }
  260. checker = Agents::WebsiteAgent.new(name: "Deflate Test", options: site)
  261. checker.user = users(:bob)
  262. checker.save!
  263. expect {
  264. checker.check
  265. }.to change { Event.count }.by(1)
  266. event = Event.last
  267. expect(event.payload['content']).to eq('hello')
  268. end
  269. end
  270. describe 'encoding' do
  271. it 'should be forced with force_encoding option' do
  272. huginn = "\u{601d}\u{8003}"
  273. stub_request(:any, /no-encoding/).to_return(body: {
  274. value: huginn,
  275. }.to_json.encode(Encoding::EUC_JP).b, headers: {
  276. 'Content-Type' => 'application/json',
  277. }, status: 200)
  278. site = {
  279. 'name' => "Some JSON Response",
  280. 'expected_update_period_in_days' => "2",
  281. 'type' => "json",
  282. 'url' => "http://no-encoding.example.com",
  283. 'mode' => 'on_change',
  284. 'extract' => {
  285. 'value' => { 'path' => 'value' },
  286. },
  287. 'force_encoding' => 'EUC-JP',
  288. }
  289. checker = Agents::WebsiteAgent.new(name: "No Encoding Site", options: site)
  290. checker.user = users(:bob)
  291. checker.save!
  292. expect { checker.check }.to change { Event.count }.by(1)
  293. event = Event.last
  294. expect(event.payload['value']).to eq(huginn)
  295. end
  296. it 'should be overridden with force_encoding option' do
  297. huginn = "\u{601d}\u{8003}"
  298. stub_request(:any, /wrong-encoding/).to_return(body: {
  299. value: huginn,
  300. }.to_json.encode(Encoding::EUC_JP).b, headers: {
  301. 'Content-Type' => 'application/json; UTF-8',
  302. }, status: 200)
  303. site = {
  304. 'name' => "Some JSON Response",
  305. 'expected_update_period_in_days' => "2",
  306. 'type' => "json",
  307. 'url' => "http://wrong-encoding.example.com",
  308. 'mode' => 'on_change',
  309. 'extract' => {
  310. 'value' => { 'path' => 'value' },
  311. },
  312. 'force_encoding' => 'EUC-JP',
  313. }
  314. checker = Agents::WebsiteAgent.new(name: "Wrong Encoding Site", options: site)
  315. checker.user = users(:bob)
  316. checker.save!
  317. expect { checker.check }.to change { Event.count }.by(1)
  318. event = Event.last
  319. expect(event.payload['value']).to eq(huginn)
  320. end
  321. it 'should be determined by charset in Content-Type' do
  322. huginn = "\u{601d}\u{8003}"
  323. stub_request(:any, /charset-euc-jp/).to_return(body: {
  324. value: huginn,
  325. }.to_json.encode(Encoding::EUC_JP), headers: {
  326. 'Content-Type' => 'application/json; charset=EUC-JP',
  327. }, status: 200)
  328. site = {
  329. 'name' => "Some JSON Response",
  330. 'expected_update_period_in_days' => "2",
  331. 'type' => "json",
  332. 'url' => "http://charset-euc-jp.example.com",
  333. 'mode' => 'on_change',
  334. 'extract' => {
  335. 'value' => { 'path' => 'value' },
  336. },
  337. }
  338. checker = Agents::WebsiteAgent.new(name: "Charset reader", options: site)
  339. checker.user = users(:bob)
  340. checker.save!
  341. expect { checker.check }.to change { Event.count }.by(1)
  342. event = Event.last
  343. expect(event.payload['value']).to eq(huginn)
  344. end
  345. it 'should default to UTF-8 when unknown charset is found' do
  346. huginn = "\u{601d}\u{8003}"
  347. stub_request(:any, /charset-unknown/).to_return(body: {
  348. value: huginn,
  349. }.to_json.b, headers: {
  350. 'Content-Type' => 'application/json; charset=unicode',
  351. }, status: 200)
  352. site = {
  353. 'name' => "Some JSON Response",
  354. 'expected_update_period_in_days' => "2",
  355. 'type' => "json",
  356. 'url' => "http://charset-unknown.example.com",
  357. 'mode' => 'on_change',
  358. 'extract' => {
  359. 'value' => { 'path' => 'value' },
  360. },
  361. }
  362. checker = Agents::WebsiteAgent.new(name: "Charset reader", options: site)
  363. checker.user = users(:bob)
  364. checker.save!
  365. expect { checker.check }.to change { Event.count }.by(1)
  366. event = Event.last
  367. expect(event.payload['value']).to eq(huginn)
  368. end
  369. end
  370. describe '#working?' do
  371. it 'checks if events have been received within the expected receive period' do
  372. stubbed_time = Time.now
  373. stub(Time).now { stubbed_time }
  374. expect(@checker).not_to be_working # No events created
  375. @checker.check
  376. expect(@checker.reload).to be_working # Just created events
  377. @checker.error "oh no!"
  378. expect(@checker.reload).not_to be_working # There is a recent error
  379. stubbed_time = 20.minutes.from_now
  380. @checker.events.delete_all
  381. @checker.check
  382. expect(@checker.reload).to be_working # There is a newer event now
  383. stubbed_time = 2.days.from_now
  384. expect(@checker.reload).not_to be_working # Two days have passed without a new event having been created
  385. end
  386. end
  387. describe "parsing" do
  388. it "parses CSS" do
  389. @checker.check
  390. event = Event.last
  391. expect(event.payload['url']).to eq("http://imgs.xkcd.com/comics/evolving.png")
  392. expect(event.payload['title']).to eq("Evolving")
  393. expect(event.payload['hovertext']).to match(/^Biologists play reverse/)
  394. end
  395. it "parses XPath" do
  396. @valid_options['extract'].each { |key, value|
  397. value.delete('css')
  398. value['xpath'] = "//*[@id='comic']//img"
  399. }
  400. @checker.options = @valid_options
  401. @checker.check
  402. event = Event.last
  403. expect(event.payload['url']).to eq("http://imgs.xkcd.com/comics/evolving.png")
  404. expect(event.payload['title']).to eq("Evolving")
  405. expect(event.payload['hovertext']).to match(/^Biologists play reverse/)
  406. end
  407. it "should turn relative urls to absolute" do
  408. rel_site = {
  409. 'name' => "XKCD",
  410. 'expected_update_period_in_days' => "2",
  411. 'type' => "html",
  412. 'url' => "http://xkcd.com",
  413. 'mode' => "on_change",
  414. 'extract' => {
  415. 'url' => {'css' => "#topLeft a", 'value' => "@href"},
  416. }
  417. }
  418. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  419. rel.user = users(:bob)
  420. rel.save!
  421. rel.check
  422. event = Event.last
  423. expect(event.payload['url']).to eq("http://xkcd.com/about")
  424. end
  425. it "should return an integer value if XPath evaluates to one" do
  426. rel_site = {
  427. 'name' => "XKCD",
  428. 'expected_update_period_in_days' => 2,
  429. 'type' => "html",
  430. 'url' => "http://xkcd.com",
  431. 'mode' => "on_change",
  432. 'extract' => {
  433. 'num_links' => {'css' => "#comicLinks", 'value' => "count(./a)"}
  434. }
  435. }
  436. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  437. rel.user = users(:bob)
  438. rel.save!
  439. rel.check
  440. event = Event.last
  441. expect(event.payload['num_links']).to eq("9")
  442. end
  443. it "should return all texts concatenated if XPath returns many text nodes" do
  444. rel_site = {
  445. 'name' => "XKCD",
  446. 'expected_update_period_in_days' => 2,
  447. 'type' => "html",
  448. 'url' => "http://xkcd.com",
  449. 'mode' => "on_change",
  450. 'extract' => {
  451. 'slogan' => {'css' => "#slogan", 'value' => ".//text()"}
  452. }
  453. }
  454. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  455. rel.user = users(:bob)
  456. rel.save!
  457. rel.check
  458. event = Event.last
  459. expect(event.payload['slogan']).to eq("A webcomic of romance, sarcasm, math, and language.")
  460. end
  461. it "should interpolate _response_" do
  462. @valid_options['extract']['response_info'] =
  463. @valid_options['extract']['url'].merge(
  464. 'value' => '"{{ "The reponse was " | append:_response_.status | append:" " | append:_response_.headers.X-Status-Message | append:"." }}"'
  465. )
  466. @checker.options = @valid_options
  467. @checker.check
  468. event = Event.last
  469. expect(event.payload['response_info']).to eq('The reponse was 200 OK.')
  470. end
  471. describe "XML" do
  472. before do
  473. stub_request(:any, /github_rss/).to_return(
  474. body: File.read(Rails.root.join("spec/data_fixtures/github_rss.atom")),
  475. status: 200
  476. )
  477. @checker = Agents::WebsiteAgent.new(name: 'github', options: {
  478. 'name' => 'GitHub',
  479. 'expected_update_period_in_days' => '2',
  480. 'type' => 'xml',
  481. 'url' => 'http://example.com/github_rss.atom',
  482. 'mode' => 'on_change',
  483. 'extract' => {
  484. 'title' => { 'xpath' => '/feed/entry', 'value' => 'normalize-space(./title)' },
  485. 'url' => { 'xpath' => '/feed/entry', 'value' => './link[1]/@href' },
  486. 'thumbnail' => { 'xpath' => '/feed/entry', 'value' => './thumbnail/@url' },
  487. }
  488. }, keep_events_for: 2.days)
  489. @checker.user = users(:bob)
  490. @checker.save!
  491. end
  492. it "works with XPath" do
  493. expect {
  494. @checker.check
  495. }.to change { Event.count }.by(20)
  496. event = Event.last
  497. expect(event.payload['title']).to eq('Shift to dev group')
  498. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  499. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  500. end
  501. it "works with XPath with namespaces unstripped" do
  502. @checker.options['use_namespaces'] = 'true'
  503. @checker.save!
  504. expect {
  505. @checker.check
  506. }.to change { Event.count }.by(0)
  507. @checker.options['extract'] = {
  508. 'title' => { 'xpath' => '/xmlns:feed/xmlns:entry', 'value' => 'normalize-space(./xmlns:title)' },
  509. 'url' => { 'xpath' => '/xmlns:feed/xmlns:entry', 'value' => './xmlns:link[1]/@href' },
  510. 'thumbnail' => { 'xpath' => '/xmlns:feed/xmlns:entry', 'value' => './media:thumbnail/@url' },
  511. }
  512. @checker.save!
  513. expect {
  514. @checker.check
  515. }.to change { Event.count }.by(20)
  516. event = Event.last
  517. expect(event.payload['title']).to eq('Shift to dev group')
  518. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  519. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  520. end
  521. it "works with CSS selectors" do
  522. @checker.options['extract'] = {
  523. 'title' => { 'css' => 'feed > entry', 'value' => 'normalize-space(./title)' },
  524. 'url' => { 'css' => 'feed > entry', 'value' => './link[1]/@href' },
  525. 'thumbnail' => { 'css' => 'feed > entry', 'value' => './thumbnail/@url' },
  526. }
  527. @checker.save!
  528. expect {
  529. @checker.check
  530. }.to change { Event.count }.by(20)
  531. event = Event.last
  532. expect(event.payload['title']).to be_empty
  533. expect(event.payload['thumbnail']).to be_empty
  534. @checker.options['extract'] = {
  535. 'title' => { 'css' => 'feed > entry', 'value' => 'normalize-space(./xmlns:title)' },
  536. 'url' => { 'css' => 'feed > entry', 'value' => './xmlns:link[1]/@href' },
  537. 'thumbnail' => { 'css' => 'feed > entry', 'value' => './media:thumbnail/@url' },
  538. }
  539. @checker.save!
  540. expect {
  541. @checker.check
  542. }.to change { Event.count }.by(20)
  543. event = Event.last
  544. expect(event.payload['title']).to eq('Shift to dev group')
  545. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  546. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  547. end
  548. it "works with CSS selectors with namespaces stripped" do
  549. @checker.options['extract'] = {
  550. 'title' => { 'css' => 'feed > entry', 'value' => 'normalize-space(./title)' },
  551. 'url' => { 'css' => 'feed > entry', 'value' => './link[1]/@href' },
  552. 'thumbnail' => { 'css' => 'feed > entry', 'value' => './thumbnail/@url' },
  553. }
  554. @checker.options['use_namespaces'] = 'false'
  555. @checker.save!
  556. expect {
  557. @checker.check
  558. }.to change { Event.count }.by(20)
  559. event = Event.last
  560. expect(event.payload['title']).to eq('Shift to dev group')
  561. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  562. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  563. end
  564. end
  565. describe "XML with cdata" do
  566. before do
  567. stub_request(:any, /cdata_rss/).to_return(
  568. body: File.read(Rails.root.join("spec/data_fixtures/cdata_rss.atom")),
  569. status: 200
  570. )
  571. @checker = Agents::WebsiteAgent.new(name: 'cdata', options: {
  572. 'name' => 'CDATA',
  573. 'expected_update_period_in_days' => '2',
  574. 'type' => 'xml',
  575. 'url' => 'http://example.com/cdata_rss.atom',
  576. 'mode' => 'on_change',
  577. 'extract' => {
  578. 'author' => { 'xpath' => '/feed/entry/author/name', 'value' => './/text()'},
  579. 'title' => { 'xpath' => '/feed/entry/title', 'value' => './/text()' },
  580. 'content' => { 'xpath' => '/feed/entry/content', 'value' => './/text()' },
  581. }
  582. }, keep_events_for: 2.days)
  583. @checker.user = users(:bob)
  584. @checker.save!
  585. end
  586. it "works with XPath" do
  587. expect {
  588. @checker.check
  589. }.to change { Event.count }.by(10)
  590. event = Event.last
  591. expect(event.payload['author']).to eq('bill98')
  592. expect(event.payload['title']).to eq('Help: Rainmeter Skins • Test if Today is Between 2 Dates')
  593. expect(event.payload['content']).to start_with('Can I ')
  594. end
  595. end
  596. describe "JSON" do
  597. it "works with paths" do
  598. json = {
  599. 'response' => {
  600. 'version' => 2,
  601. 'title' => "hello!"
  602. }
  603. }
  604. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  605. site = {
  606. 'name' => "Some JSON Response",
  607. 'expected_update_period_in_days' => "2",
  608. 'type' => "json",
  609. 'url' => "http://json-site.com",
  610. 'mode' => 'on_change',
  611. 'extract' => {
  612. 'version' => {'path' => "response.version"},
  613. 'title' => {'path' => "response.title"}
  614. }
  615. }
  616. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  617. checker.user = users(:bob)
  618. checker.save!
  619. checker.check
  620. event = Event.last
  621. expect(event.payload['version']).to eq(2)
  622. expect(event.payload['title']).to eq("hello!")
  623. end
  624. it "can handle arrays" do
  625. json = {
  626. 'response' => {
  627. 'data' => [
  628. {'title' => "first", 'version' => 2},
  629. {'title' => "second", 'version' => 2.5}
  630. ]
  631. }
  632. }
  633. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  634. site = {
  635. 'name' => "Some JSON Response",
  636. 'expected_update_period_in_days' => "2",
  637. 'type' => "json",
  638. 'url' => "http://json-site.com",
  639. 'mode' => 'on_change',
  640. 'extract' => {
  641. :title => {'path' => "response.data[*].title"},
  642. :version => {'path' => "response.data[*].version"}
  643. }
  644. }
  645. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  646. checker.user = users(:bob)
  647. checker.save!
  648. expect {
  649. checker.check
  650. }.to change { Event.count }.by(2)
  651. (event2, event1) = Event.last(2)
  652. expect(event1.payload['version']).to eq(2.5)
  653. expect(event1.payload['title']).to eq("second")
  654. expect(event2.payload['version']).to eq(2)
  655. expect(event2.payload['title']).to eq("first")
  656. end
  657. it "stores the whole object if :extract is not specified" do
  658. json = {
  659. 'response' => {
  660. 'version' => 2,
  661. 'title' => "hello!"
  662. }
  663. }
  664. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  665. site = {
  666. 'name' => "Some JSON Response",
  667. 'expected_update_period_in_days' => "2",
  668. 'type' => "json",
  669. 'url' => "http://json-site.com",
  670. 'mode' => 'on_change'
  671. }
  672. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  673. checker.user = users(:bob)
  674. checker.save!
  675. checker.check
  676. event = Event.last
  677. expect(event.payload['response']['version']).to eq(2)
  678. expect(event.payload['response']['title']).to eq("hello!")
  679. end
  680. end
  681. describe "text parsing" do
  682. before do
  683. stub_request(:any, /text-site/).to_return(body: <<-EOF, status: 200)
  684. water: wet
  685. fire: hot
  686. EOF
  687. site = {
  688. 'name' => 'Some Text Response',
  689. 'expected_update_period_in_days' => '2',
  690. 'type' => 'text',
  691. 'url' => 'http://text-site.com',
  692. 'mode' => 'on_change',
  693. 'extract' => {
  694. 'word' => { 'regexp' => '^(.+?): (.+)$', index: 1 },
  695. 'property' => { 'regexp' => '^(.+?): (.+)$', index: '2' },
  696. }
  697. }
  698. @checker = Agents::WebsiteAgent.new(name: 'Text Site', options: site)
  699. @checker.user = users(:bob)
  700. @checker.save!
  701. end
  702. it "works with regexp with named capture" do
  703. @checker.options = @checker.options.merge('extract' => {
  704. 'word' => { 'regexp' => '^(?<word>.+?): (?<property>.+)$', index: 'word' },
  705. 'property' => { 'regexp' => '^(?<word>.+?): (?<property>.+)$', index: 'property' },
  706. })
  707. expect {
  708. @checker.check
  709. }.to change { Event.count }.by(2)
  710. event1, event2 = Event.last(2)
  711. expect(event1.payload['word']).to eq('water')
  712. expect(event1.payload['property']).to eq('wet')
  713. expect(event2.payload['word']).to eq('fire')
  714. expect(event2.payload['property']).to eq('hot')
  715. end
  716. it "works with regexp" do
  717. expect {
  718. @checker.check
  719. }.to change { Event.count }.by(2)
  720. event1, event2 = Event.last(2)
  721. expect(event1.payload['word']).to eq('water')
  722. expect(event1.payload['property']).to eq('wet')
  723. expect(event2.payload['word']).to eq('fire')
  724. expect(event2.payload['property']).to eq('hot')
  725. end
  726. end
  727. end
  728. describe "#receive" do
  729. describe "with a url or url_from_event" do
  730. before do
  731. @event = Event.new
  732. @event.agent = agents(:bob_rain_notifier_agent)
  733. @event.payload = {
  734. 'url' => 'http://foo.com',
  735. 'link' => 'Random'
  736. }
  737. end
  738. it "should use url_from_event as the url to scrape" do
  739. stub = stub_request(:any, 'http://example.org/?url=http%3A%2F%2Ffoo.com')
  740. @checker.options = @valid_options.merge(
  741. 'url_from_event' => 'http://example.org/?url={{url | uri_escape}}'
  742. )
  743. @checker.receive([@event])
  744. expect(stub).to have_been_requested
  745. end
  746. it "should use the Agent's `url` option if url_from_event is not set" do
  747. expect {
  748. @checker.options = @valid_options
  749. @checker.receive([@event])
  750. }.to change { Event.count }.by(1)
  751. end
  752. it "should allow url_from_event to be an array of urls" do
  753. stub1 = stub_request(:any, 'http://example.org/?url=http%3A%2F%2Ffoo.com')
  754. stub2 = stub_request(:any, 'http://google.org/?url=http%3A%2F%2Ffoo.com')
  755. @checker.options = @valid_options.merge(
  756. 'url_from_event' => ['http://example.org/?url={{url | uri_escape}}', 'http://google.org/?url={{url | uri_escape}}']
  757. )
  758. @checker.receive([@event])
  759. expect(stub1).to have_been_requested
  760. expect(stub2).to have_been_requested
  761. end
  762. it "should interpolate values from incoming event payload" do
  763. stub_request(:any, /foo/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), status: 200)
  764. expect {
  765. @valid_options['url_from_event'] = '{{ url }}'
  766. @valid_options['extract'] = {
  767. 'from' => {
  768. 'xpath' => '*[1]',
  769. 'value' => '{{url | to_xpath}}'
  770. },
  771. 'to' => {
  772. 'xpath' => '(//a[@href and text()={{link | to_xpath}}])[1]',
  773. 'value' => '@href'
  774. },
  775. }
  776. @checker.options = @valid_options
  777. @checker.receive([@event])
  778. }.to change { Event.count }.by(1)
  779. expect(Event.last.payload).to eq({
  780. 'from' => 'http://foo.com',
  781. 'to' => 'http://dynamic.xkcd.com/random/comic/',
  782. })
  783. end
  784. it "should use the options url if no url is in the event payload, and `url_from_event` is not provided" do
  785. @checker.options['mode'] = 'merge'
  786. @event.payload.delete('url')
  787. expect {
  788. @checker.receive([@event])
  789. }.to change { Event.count }.by(1)
  790. expect(Event.last.payload['title']).to eq('Evolving')
  791. expect(Event.last.payload['link']).to eq('Random')
  792. end
  793. it "should interpolate values from incoming event payload and _response_" do
  794. @event.payload['title'] = 'XKCD'
  795. expect {
  796. @valid_options['extract'] = {
  797. 'response_info' => @valid_options['extract']['url'].merge(
  798. 'value' => '{% capture sentence %}The reponse from {{title}} was {{_response_.status}} {{_response_.headers.X-Status-Message}}.{% endcapture %}{{sentence | to_xpath}}'
  799. )
  800. }
  801. @checker.options = @valid_options
  802. @checker.receive([@event])
  803. }.to change { Event.count }.by(1)
  804. expect(Event.last.payload['response_info']).to eq('The reponse from XKCD was 200 OK.')
  805. end
  806. it "should support merging of events" do
  807. expect {
  808. @checker.options = @valid_options
  809. @checker.options[:mode] = "merge"
  810. @checker.receive([@event])
  811. }.to change { Event.count }.by(1)
  812. last_payload = Event.last.payload
  813. expect(last_payload['link']).to eq('Random')
  814. end
  815. end
  816. describe "with a data_from_event" do
  817. describe "with json data" do
  818. before do
  819. @event = Event.new
  820. @event.agent = agents(:bob_rain_notifier_agent)
  821. @event.payload = {
  822. 'something' => 'some value',
  823. 'some_object' => {
  824. 'some_data' => { hello: 'world' }.to_json
  825. }
  826. }
  827. @event.save!
  828. @checker.options = @valid_options.merge(
  829. 'type' => 'json',
  830. 'data_from_event' => '{{ some_object.some_data }}',
  831. 'extract' => {
  832. 'value' => { 'path' => 'hello' }
  833. }
  834. )
  835. end
  836. it "should extract from the event data in the incoming event payload" do
  837. expect {
  838. @checker.receive([@event])
  839. }.to change { Event.count }.by(1)
  840. expect(@checker.events.last.payload).to eq({ 'value' => 'world' })
  841. end
  842. it "should support merge mode" do
  843. @checker.options['mode'] = "merge"
  844. expect {
  845. @checker.receive([@event])
  846. }.to change { Event.count }.by(1)
  847. expect(@checker.events.last.payload).to eq(@event.payload.merge('value' => 'world'))
  848. end
  849. it "should output an error when nothing can be found at the path" do
  850. @checker.options = @checker.options.merge(
  851. 'data_from_event' => '{{ some_object.mistake }}'
  852. )
  853. expect {
  854. @checker.receive([@event])
  855. }.to_not change { Event.count }
  856. expect(@checker.logs.last.message).to match(/No data was found in the Event payload using the template {{ some_object\.mistake }}/)
  857. end
  858. it "should output an error when the data cannot be parsed" do
  859. @event.update_attribute :payload, @event.payload.merge('some_object' => { 'some_data' => '{invalid json' })
  860. expect {
  861. @checker.receive([@event])
  862. }.to_not change { Event.count }
  863. expect(@checker.logs.last.message).to match(/Error when handling event data:/)
  864. end
  865. end
  866. describe "with HTML data" do
  867. before do
  868. @event = Event.new
  869. @event.agent = agents(:bob_rain_notifier_agent)
  870. @event.payload = {
  871. 'url' => 'http://xkcd.com',
  872. 'some_object' => {
  873. 'some_data' => "<div><span class='title'>Title!</span><span class='body'>Body!</span></div>"
  874. }
  875. }
  876. @event.save!
  877. @checker.options = @valid_options.merge(
  878. 'type' => 'html',
  879. 'data_from_event' => '{{ some_object.some_data }}',
  880. 'extract' => {
  881. 'title' => { 'css' => ".title", 'value' => ".//text()" },
  882. 'body' => { 'css' => "div span.body", 'value' => ".//text()" }
  883. }
  884. )
  885. end
  886. it "should extract from the event data in the incoming event payload" do
  887. expect {
  888. @checker.receive([@event])
  889. }.to change { Event.count }.by(1)
  890. expect(@checker.events.last.payload).to eq({ 'title' => 'Title!', 'body' => 'Body!' })
  891. end
  892. end
  893. end
  894. end
  895. end
  896. describe "checking with http basic auth" do
  897. before do
  898. stub_request(:any, /example/).
  899. with(headers: { 'Authorization' => "Basic #{['user:pass'].pack('m').chomp}" }).
  900. to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
  901. @valid_options = {
  902. 'name' => "XKCD",
  903. 'expected_update_period_in_days' => "2",
  904. 'type' => "html",
  905. 'url' => "http://www.example.com",
  906. 'mode' => 'on_change',
  907. 'extract' => {
  908. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  909. 'title' => { 'css' => "#comic img", 'value' => "@alt" },
  910. 'hovertext' => { 'css' => "#comic img", 'value' => "@title" }
  911. },
  912. 'basic_auth' => "user:pass"
  913. }
  914. @checker = Agents::WebsiteAgent.new(:name => "auth", :options => @valid_options)
  915. @checker.user = users(:bob)
  916. @checker.save!
  917. end
  918. describe "#check" do
  919. it "should check for changes" do
  920. expect { @checker.check }.to change { Event.count }.by(1)
  921. expect { @checker.check }.not_to change { Event.count }
  922. end
  923. end
  924. end
  925. describe "checking with headers" do
  926. before do
  927. stub_request(:any, /example/).
  928. with(headers: { 'foo' => 'bar' }).
  929. to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
  930. @valid_options = {
  931. 'name' => "XKCD",
  932. 'expected_update_period_in_days' => "2",
  933. 'type' => "html",
  934. 'url' => "http://www.example.com",
  935. 'mode' => 'on_change',
  936. 'headers' => { 'foo' => 'bar' },
  937. 'extract' => {
  938. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  939. }
  940. }
  941. @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @valid_options)
  942. @checker.user = users(:bob)
  943. @checker.save!
  944. end
  945. describe "#check" do
  946. it "should check for changes" do
  947. expect { @checker.check }.to change { Event.count }.by(1)
  948. end
  949. end
  950. end
  951. describe "checking urls" do
  952. before do
  953. stub_request(:any, /example/).
  954. to_return(:body => File.read(Rails.root.join("spec/data_fixtures/urlTest.html")), :status => 200)
  955. @valid_options = {
  956. 'name' => "Url Test",
  957. 'expected_update_period_in_days' => "2",
  958. 'type' => "html",
  959. 'url' => "http://www.example.com",
  960. 'mode' => 'all',
  961. 'extract' => {
  962. 'url' => { 'css' => "a", 'value' => "@href" },
  963. }
  964. }
  965. @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @valid_options)
  966. @checker.user = users(:bob)
  967. @checker.save!
  968. end
  969. describe "#check" do
  970. before do
  971. expect { @checker.check }.to change { Event.count }.by(7)
  972. @events = Event.last(7)
  973. end
  974. it "should check hostname" do
  975. event = @events[0]
  976. expect(event.payload['url']).to eq("http://google.com")
  977. end
  978. it "should check unescaped query" do
  979. event = @events[1]
  980. expect(event.payload['url']).to eq("https://www.google.ca/search?q=some%20query")
  981. end
  982. it "should check properly escaped query" do
  983. event = @events[2]
  984. expect(event.payload['url']).to eq("https://www.google.ca/search?q=some%20query")
  985. end
  986. it "should check unescaped unicode url" do
  987. event = @events[3]
  988. expect(event.payload['url']).to eq("http://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  989. end
  990. it "should check unescaped unicode query" do
  991. event = @events[4]
  992. expect(event.payload['url']).to eq("https://www.google.ca/search?q=%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  993. end
  994. it "should check properly escaped unicode url" do
  995. event = @events[5]
  996. expect(event.payload['url']).to eq("http://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  997. end
  998. it "should check properly escaped unicode query" do
  999. event = @events[6]
  1000. expect(event.payload['url']).to eq("https://www.google.ca/search?q=%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  1001. end
  1002. end
  1003. end
  1004. end