website_agent_spec.rb 40KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136
  1. require 'rails_helper'
  2. describe Agents::WebsiteAgent do
  3. describe "checking without basic auth" do
  4. before do
  5. stub_request(:any, /xkcd/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")),
  6. status: 200,
  7. headers: {
  8. 'X-Status-Message' => 'OK'
  9. })
  10. @valid_options = {
  11. 'name' => "XKCD",
  12. 'expected_update_period_in_days' => "2",
  13. 'type' => "html",
  14. 'url' => "http://xkcd.com",
  15. 'mode' => 'on_change',
  16. 'extract' => {
  17. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  18. 'title' => { 'css' => "#comic img", 'value' => "@alt" },
  19. 'hovertext' => { 'css' => "#comic img", 'value' => "@title" }
  20. }
  21. }
  22. @checker = Agents::WebsiteAgent.new(:name => "xkcd", :options => @valid_options, :keep_events_for => 2.days)
  23. @checker.user = users(:bob)
  24. @checker.save!
  25. end
  26. it_behaves_like WebRequestConcern
  27. describe "validations" do
  28. before do
  29. expect(@checker).to be_valid
  30. end
  31. it "should validate the integer fields" do
  32. @checker.options['expected_update_period_in_days'] = "2"
  33. expect(@checker).to be_valid
  34. @checker.options['expected_update_period_in_days'] = "nonsense"
  35. expect(@checker).not_to be_valid
  36. end
  37. it 'should validate the http_success_codes fields' do
  38. @checker.options['http_success_codes'] = [404]
  39. expect(@checker).to be_valid
  40. @checker.options['http_success_codes'] = [404, 404]
  41. expect(@checker).not_to be_valid
  42. @checker.options['http_success_codes'] = [404, "422"]
  43. expect(@checker).to be_valid
  44. @checker.options['http_success_codes'] = [404.0]
  45. expect(@checker).not_to be_valid
  46. @checker.options['http_success_codes'] = ["not_a_code"]
  47. expect(@checker).not_to be_valid
  48. @checker.options['http_success_codes'] = []
  49. expect(@checker).not_to be_valid
  50. end
  51. it "should validate uniqueness_look_back" do
  52. @checker.options['uniqueness_look_back'] = "nonsense"
  53. expect(@checker).not_to be_valid
  54. @checker.options['uniqueness_look_back'] = "2"
  55. expect(@checker).to be_valid
  56. end
  57. it "should validate mode" do
  58. @checker.options['mode'] = "nonsense"
  59. expect(@checker).not_to be_valid
  60. @checker.options['mode'] = "on_change"
  61. expect(@checker).to be_valid
  62. @checker.options['mode'] = "all"
  63. expect(@checker).to be_valid
  64. @checker.options['mode'] = ""
  65. expect(@checker).to be_valid
  66. end
  67. it "should validate the force_encoding option" do
  68. @checker.options['force_encoding'] = ''
  69. expect(@checker).to be_valid
  70. @checker.options['force_encoding'] = 'UTF-8'
  71. expect(@checker).to be_valid
  72. @checker.options['force_encoding'] = ['UTF-8']
  73. expect(@checker).not_to be_valid
  74. @checker.options['force_encoding'] = 'UTF-42'
  75. expect(@checker).not_to be_valid
  76. end
  77. context "in 'json' type" do
  78. it "should ensure that all extractions have a 'path'" do
  79. @checker.options['type'] = 'json'
  80. @checker.options['extract'] = {
  81. 'url' => { 'foo' => 'bar' },
  82. }
  83. expect(@checker).to_not be_valid
  84. expect(@checker.errors_on(:base)).to include(/When type is json, all extractions must have a path attribute/)
  85. @checker.options['type'] = 'json'
  86. @checker.options['extract'] = {
  87. 'url' => { 'path' => 'bar' },
  88. }
  89. expect(@checker).to be_valid
  90. end
  91. end
  92. end
  93. describe "#check" do
  94. it "should check for changes (and update Event.expires_at)" do
  95. expect { @checker.check }.to change { Event.count }.by(1)
  96. event = Event.last
  97. sleep 2
  98. expect { @checker.check }.not_to change { Event.count }
  99. update_event = Event.last
  100. expect(update_event.expires_at).not_to eq(event.expires_at)
  101. end
  102. it "should always save events when in :all mode" do
  103. expect {
  104. @valid_options['mode'] = 'all'
  105. @checker.options = @valid_options
  106. @checker.check
  107. @checker.check
  108. }.to change { Event.count }.by(2)
  109. end
  110. it "should take uniqueness_look_back into account during deduplication" do
  111. @valid_options['mode'] = 'all'
  112. @checker.options = @valid_options
  113. @checker.check
  114. @checker.check
  115. event = Event.last
  116. event.payload = "{}"
  117. event.save
  118. expect {
  119. @valid_options['mode'] = 'on_change'
  120. @valid_options['uniqueness_look_back'] = 2
  121. @checker.options = @valid_options
  122. @checker.check
  123. }.not_to change { Event.count }
  124. expect {
  125. @valid_options['mode'] = 'on_change'
  126. @valid_options['uniqueness_look_back'] = 1
  127. @checker.options = @valid_options
  128. @checker.check
  129. }.to change { Event.count }.by(1)
  130. end
  131. it "should log an error if the number of results for a set of extraction patterns differs" do
  132. @valid_options['extract']['url']['css'] = "div"
  133. @checker.options = @valid_options
  134. @checker.check
  135. expect(@checker.logs.first.message).to match(/Got an uneven number of matches/)
  136. end
  137. it "should accept an array for url" do
  138. @valid_options['url'] = ["http://xkcd.com/1/", "http://xkcd.com/2/"]
  139. @checker.options = @valid_options
  140. expect { @checker.save! }.not_to raise_error;
  141. expect { @checker.check }.not_to raise_error;
  142. end
  143. it "should parse events from all urls in array" do
  144. expect {
  145. @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
  146. @valid_options['mode'] = 'all'
  147. @checker.options = @valid_options
  148. @checker.check
  149. }.to change { Event.count }.by(2)
  150. end
  151. it "should follow unique rules when parsing array of urls" do
  152. expect {
  153. @valid_options['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
  154. @checker.options = @valid_options
  155. @checker.check
  156. }.to change { Event.count }.by(1)
  157. end
  158. end
  159. describe 'http_success_codes' do
  160. it 'should allow scraping from a 404 result' do
  161. json = {
  162. 'response' => {
  163. 'version' => 2,
  164. 'title' => "hello!"
  165. }
  166. }
  167. zipped = ActiveSupport::Gzip.compress(json.to_json)
  168. stub_request(:any, /gzip/).to_return(body: zipped, headers: { 'Content-Encoding' => 'gzip' }, status: 404)
  169. site = {
  170. 'name' => "Some JSON Response",
  171. 'expected_update_period_in_days' => "2",
  172. 'type' => "json",
  173. 'url' => "http://gzip.com",
  174. 'mode' => 'on_change',
  175. 'http_success_codes' => [404],
  176. 'extract' => {
  177. 'version' => { 'path' => 'response.version' },
  178. },
  179. # no unzip option
  180. }
  181. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  182. checker.user = users(:bob)
  183. checker.save!
  184. checker.check
  185. event = Event.last
  186. expect(event.payload['version']).to eq(2)
  187. end
  188. end
  189. describe 'unzipping' do
  190. it 'should unzip automatically if the response has Content-Encoding: gzip' do
  191. json = {
  192. 'response' => {
  193. 'version' => 2,
  194. 'title' => "hello!"
  195. }
  196. }
  197. zipped = ActiveSupport::Gzip.compress(json.to_json)
  198. stub_request(:any, /gzip/).to_return(body: zipped, headers: { 'Content-Encoding' => 'gzip' }, status: 200)
  199. site = {
  200. 'name' => "Some JSON Response",
  201. 'expected_update_period_in_days' => "2",
  202. 'type' => "json",
  203. 'url' => "http://gzip.com",
  204. 'mode' => 'on_change',
  205. 'extract' => {
  206. 'version' => { 'path' => 'response.version' },
  207. },
  208. # no unzip option
  209. }
  210. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  211. checker.user = users(:bob)
  212. checker.save!
  213. checker.check
  214. event = Event.last
  215. expect(event.payload['version']).to eq(2)
  216. end
  217. it 'should unzip with unzip option' do
  218. json = {
  219. 'response' => {
  220. 'version' => 2,
  221. 'title' => "hello!"
  222. }
  223. }
  224. zipped = ActiveSupport::Gzip.compress(json.to_json)
  225. stub_request(:any, /gzip/).to_return(body: zipped, status: 200)
  226. site = {
  227. 'name' => "Some JSON Response",
  228. 'expected_update_period_in_days' => "2",
  229. 'type' => "json",
  230. 'url' => "http://gzip.com",
  231. 'mode' => 'on_change',
  232. 'extract' => {
  233. 'version' => { 'path' => 'response.version' },
  234. },
  235. 'unzip' => 'gzip',
  236. }
  237. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  238. checker.user = users(:bob)
  239. checker.save!
  240. checker.check
  241. event = Event.last
  242. expect(event.payload['version']).to eq(2)
  243. end
  244. it 'should either avoid or support a raw deflate stream (#1018)' do
  245. stub_request(:any, /deflate/).with(headers: { 'Accept-Encoding' => /\A(?!.*deflate)/ }).
  246. to_return(body: 'hello',
  247. status: 200)
  248. stub_request(:any, /deflate/).with(headers: { 'Accept-Encoding' => /deflate/ }).
  249. to_return(body: "\xcb\x48\xcd\xc9\xc9\x07\x00\x06\x2c".b,
  250. headers: { 'Content-Encoding' => 'deflate' },
  251. status: 200)
  252. site = {
  253. 'name' => 'Some Response',
  254. 'expected_update_period_in_days' => '2',
  255. 'type' => 'text',
  256. 'url' => 'http://deflate',
  257. 'mode' => 'on_change',
  258. 'extract' => {
  259. 'content' => { 'regexp' => '.+', 'index' => 0 }
  260. }
  261. }
  262. checker = Agents::WebsiteAgent.new(name: "Deflate Test", options: site)
  263. checker.user = users(:bob)
  264. checker.save!
  265. expect {
  266. checker.check
  267. }.to change { Event.count }.by(1)
  268. event = Event.last
  269. expect(event.payload['content']).to eq('hello')
  270. end
  271. end
  272. describe 'encoding' do
  273. it 'should be forced with force_encoding option' do
  274. huginn = "\u{601d}\u{8003}"
  275. stub_request(:any, /no-encoding/).to_return(body: {
  276. value: huginn,
  277. }.to_json.encode(Encoding::EUC_JP).b, headers: {
  278. 'Content-Type' => 'application/json',
  279. }, status: 200)
  280. site = {
  281. 'name' => "Some JSON Response",
  282. 'expected_update_period_in_days' => "2",
  283. 'type' => "json",
  284. 'url' => "http://no-encoding.example.com",
  285. 'mode' => 'on_change',
  286. 'extract' => {
  287. 'value' => { 'path' => 'value' },
  288. },
  289. 'force_encoding' => 'EUC-JP',
  290. }
  291. checker = Agents::WebsiteAgent.new(name: "No Encoding Site", options: site)
  292. checker.user = users(:bob)
  293. checker.save!
  294. expect { checker.check }.to change { Event.count }.by(1)
  295. event = Event.last
  296. expect(event.payload['value']).to eq(huginn)
  297. end
  298. it 'should be overridden with force_encoding option' do
  299. huginn = "\u{601d}\u{8003}"
  300. stub_request(:any, /wrong-encoding/).to_return(body: {
  301. value: huginn,
  302. }.to_json.encode(Encoding::EUC_JP).b, headers: {
  303. 'Content-Type' => 'application/json; UTF-8',
  304. }, status: 200)
  305. site = {
  306. 'name' => "Some JSON Response",
  307. 'expected_update_period_in_days' => "2",
  308. 'type' => "json",
  309. 'url' => "http://wrong-encoding.example.com",
  310. 'mode' => 'on_change',
  311. 'extract' => {
  312. 'value' => { 'path' => 'value' },
  313. },
  314. 'force_encoding' => 'EUC-JP',
  315. }
  316. checker = Agents::WebsiteAgent.new(name: "Wrong Encoding Site", options: site)
  317. checker.user = users(:bob)
  318. checker.save!
  319. expect { checker.check }.to change { Event.count }.by(1)
  320. event = Event.last
  321. expect(event.payload['value']).to eq(huginn)
  322. end
  323. it 'should be determined by charset in Content-Type' do
  324. huginn = "\u{601d}\u{8003}"
  325. stub_request(:any, /charset-euc-jp/).to_return(body: {
  326. value: huginn,
  327. }.to_json.encode(Encoding::EUC_JP), headers: {
  328. 'Content-Type' => 'application/json; charset=EUC-JP',
  329. }, status: 200)
  330. site = {
  331. 'name' => "Some JSON Response",
  332. 'expected_update_period_in_days' => "2",
  333. 'type' => "json",
  334. 'url' => "http://charset-euc-jp.example.com",
  335. 'mode' => 'on_change',
  336. 'extract' => {
  337. 'value' => { 'path' => 'value' },
  338. },
  339. }
  340. checker = Agents::WebsiteAgent.new(name: "Charset reader", options: site)
  341. checker.user = users(:bob)
  342. checker.save!
  343. expect { checker.check }.to change { Event.count }.by(1)
  344. event = Event.last
  345. expect(event.payload['value']).to eq(huginn)
  346. end
  347. it 'should default to UTF-8 when unknown charset is found' do
  348. huginn = "\u{601d}\u{8003}"
  349. stub_request(:any, /charset-unknown/).to_return(body: {
  350. value: huginn,
  351. }.to_json.b, headers: {
  352. 'Content-Type' => 'application/json; charset=unicode',
  353. }, status: 200)
  354. site = {
  355. 'name' => "Some JSON Response",
  356. 'expected_update_period_in_days' => "2",
  357. 'type' => "json",
  358. 'url' => "http://charset-unknown.example.com",
  359. 'mode' => 'on_change',
  360. 'extract' => {
  361. 'value' => { 'path' => 'value' },
  362. },
  363. }
  364. checker = Agents::WebsiteAgent.new(name: "Charset reader", options: site)
  365. checker.user = users(:bob)
  366. checker.save!
  367. expect { checker.check }.to change { Event.count }.by(1)
  368. event = Event.last
  369. expect(event.payload['value']).to eq(huginn)
  370. end
  371. end
  372. describe '#working?' do
  373. it 'checks if events have been received within the expected receive period' do
  374. stubbed_time = Time.now
  375. stub(Time).now { stubbed_time }
  376. expect(@checker).not_to be_working # No events created
  377. @checker.check
  378. expect(@checker.reload).to be_working # Just created events
  379. @checker.error "oh no!"
  380. expect(@checker.reload).not_to be_working # There is a recent error
  381. stubbed_time = 20.minutes.from_now
  382. @checker.events.delete_all
  383. @checker.check
  384. expect(@checker.reload).to be_working # There is a newer event now
  385. stubbed_time = 2.days.from_now
  386. expect(@checker.reload).not_to be_working # Two days have passed without a new event having been created
  387. end
  388. end
  389. describe "parsing" do
  390. it "parses CSS" do
  391. @checker.check
  392. event = Event.last
  393. expect(event.payload['url']).to eq("http://imgs.xkcd.com/comics/evolving.png")
  394. expect(event.payload['title']).to eq("Evolving")
  395. expect(event.payload['hovertext']).to match(/^Biologists play reverse/)
  396. end
  397. it "parses XPath" do
  398. @valid_options['extract'].each { |key, value|
  399. value.delete('css')
  400. value['xpath'] = "//*[@id='comic']//img"
  401. }
  402. @checker.options = @valid_options
  403. @checker.check
  404. event = Event.last
  405. expect(event.payload['url']).to eq("http://imgs.xkcd.com/comics/evolving.png")
  406. expect(event.payload['title']).to eq("Evolving")
  407. expect(event.payload['hovertext']).to match(/^Biologists play reverse/)
  408. end
  409. it "should turn relative urls to absolute" do
  410. rel_site = {
  411. 'name' => "XKCD",
  412. 'expected_update_period_in_days' => "2",
  413. 'type' => "html",
  414. 'url' => "http://xkcd.com",
  415. 'mode' => "on_change",
  416. 'extract' => {
  417. 'url' => {'css' => "#topLeft a", 'value' => "@href"},
  418. }
  419. }
  420. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  421. rel.user = users(:bob)
  422. rel.save!
  423. rel.check
  424. event = Event.last
  425. expect(event.payload['url']).to eq("http://xkcd.com/about")
  426. end
  427. it "should return an integer value if XPath evaluates to one" do
  428. rel_site = {
  429. 'name' => "XKCD",
  430. 'expected_update_period_in_days' => 2,
  431. 'type' => "html",
  432. 'url' => "http://xkcd.com",
  433. 'mode' => "on_change",
  434. 'extract' => {
  435. 'num_links' => {'css' => "#comicLinks", 'value' => "count(./a)"}
  436. }
  437. }
  438. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  439. rel.user = users(:bob)
  440. rel.save!
  441. rel.check
  442. event = Event.last
  443. expect(event.payload['num_links']).to eq("9")
  444. end
  445. it "should return all texts concatenated if XPath returns many text nodes" do
  446. rel_site = {
  447. 'name' => "XKCD",
  448. 'expected_update_period_in_days' => 2,
  449. 'type' => "html",
  450. 'url' => "http://xkcd.com",
  451. 'mode' => "on_change",
  452. 'extract' => {
  453. 'slogan' => {'css' => "#slogan", 'value' => ".//text()"}
  454. }
  455. }
  456. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  457. rel.user = users(:bob)
  458. rel.save!
  459. rel.check
  460. event = Event.last
  461. expect(event.payload['slogan']).to eq("A webcomic of romance, sarcasm, math, and language.")
  462. end
  463. it "should interpolate _response_" do
  464. @valid_options['extract']['response_info'] =
  465. @valid_options['extract']['url'].merge(
  466. 'value' => '"{{ "The reponse was " | append:_response_.status | append:" " | append:_response_.headers.X-Status-Message | append:"." }}"'
  467. )
  468. @checker.options = @valid_options
  469. @checker.check
  470. event = Event.last
  471. expect(event.payload['response_info']).to eq('The reponse was 200 OK.')
  472. end
  473. describe "XML" do
  474. before do
  475. stub_request(:any, /github_rss/).to_return(
  476. body: File.read(Rails.root.join("spec/data_fixtures/github_rss.atom")),
  477. status: 200
  478. )
  479. @checker = Agents::WebsiteAgent.new(name: 'github', options: {
  480. 'name' => 'GitHub',
  481. 'expected_update_period_in_days' => '2',
  482. 'type' => 'xml',
  483. 'url' => 'http://example.com/github_rss.atom',
  484. 'mode' => 'on_change',
  485. 'extract' => {
  486. 'title' => { 'xpath' => '/feed/entry', 'value' => 'normalize-space(./title)' },
  487. 'url' => { 'xpath' => '/feed/entry', 'value' => './link[1]/@href' },
  488. 'thumbnail' => { 'xpath' => '/feed/entry', 'value' => './thumbnail/@url' },
  489. }
  490. }, keep_events_for: 2.days)
  491. @checker.user = users(:bob)
  492. @checker.save!
  493. end
  494. it "works with XPath" do
  495. expect {
  496. @checker.check
  497. }.to change { Event.count }.by(20)
  498. event = Event.last
  499. expect(event.payload['title']).to eq('Shift to dev group')
  500. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  501. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  502. end
  503. it "works with XPath with namespaces unstripped" do
  504. @checker.options['use_namespaces'] = 'true'
  505. @checker.save!
  506. expect {
  507. @checker.check
  508. }.to change { Event.count }.by(0)
  509. @checker.options['extract'] = {
  510. 'title' => { 'xpath' => '/xmlns:feed/xmlns:entry', 'value' => 'normalize-space(./xmlns:title)' },
  511. 'url' => { 'xpath' => '/xmlns:feed/xmlns:entry', 'value' => './xmlns:link[1]/@href' },
  512. 'thumbnail' => { 'xpath' => '/xmlns:feed/xmlns:entry', 'value' => './media:thumbnail/@url' },
  513. }
  514. @checker.save!
  515. expect {
  516. @checker.check
  517. }.to change { Event.count }.by(20)
  518. event = Event.last
  519. expect(event.payload['title']).to eq('Shift to dev group')
  520. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  521. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  522. end
  523. it "works with CSS selectors" do
  524. @checker.options['extract'] = {
  525. 'title' => { 'css' => 'feed > entry', 'value' => 'normalize-space(./title)' },
  526. 'url' => { 'css' => 'feed > entry', 'value' => './link[1]/@href' },
  527. 'thumbnail' => { 'css' => 'feed > entry', 'value' => './thumbnail/@url' },
  528. }
  529. @checker.save!
  530. expect {
  531. @checker.check
  532. }.to change { Event.count }.by(20)
  533. event = Event.last
  534. expect(event.payload['title']).to be_empty
  535. expect(event.payload['thumbnail']).to be_empty
  536. @checker.options['extract'] = {
  537. 'title' => { 'css' => 'feed > entry', 'value' => 'normalize-space(./xmlns:title)' },
  538. 'url' => { 'css' => 'feed > entry', 'value' => './xmlns:link[1]/@href' },
  539. 'thumbnail' => { 'css' => 'feed > entry', 'value' => './media:thumbnail/@url' },
  540. }
  541. @checker.save!
  542. expect {
  543. @checker.check
  544. }.to change { Event.count }.by(20)
  545. event = Event.last
  546. expect(event.payload['title']).to eq('Shift to dev group')
  547. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  548. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  549. end
  550. it "works with CSS selectors with namespaces stripped" do
  551. @checker.options['extract'] = {
  552. 'title' => { 'css' => 'feed > entry', 'value' => 'normalize-space(./title)' },
  553. 'url' => { 'css' => 'feed > entry', 'value' => './link[1]/@href' },
  554. 'thumbnail' => { 'css' => 'feed > entry', 'value' => './thumbnail/@url' },
  555. }
  556. @checker.options['use_namespaces'] = 'false'
  557. @checker.save!
  558. expect {
  559. @checker.check
  560. }.to change { Event.count }.by(20)
  561. event = Event.last
  562. expect(event.payload['title']).to eq('Shift to dev group')
  563. expect(event.payload['url']).to eq('https://github.com/cantino/huginn/commit/d465158f77dcd9078697e6167b50abbfdfa8b1af')
  564. expect(event.payload['thumbnail']).to eq('https://avatars3.githubusercontent.com/u/365751?s=30')
  565. end
  566. end
  567. describe "XML with cdata" do
  568. before do
  569. stub_request(:any, /cdata_rss/).to_return(
  570. body: File.read(Rails.root.join("spec/data_fixtures/cdata_rss.atom")),
  571. status: 200
  572. )
  573. @checker = Agents::WebsiteAgent.new(name: 'cdata', options: {
  574. 'name' => 'CDATA',
  575. 'expected_update_period_in_days' => '2',
  576. 'type' => 'xml',
  577. 'url' => 'http://example.com/cdata_rss.atom',
  578. 'mode' => 'on_change',
  579. 'extract' => {
  580. 'author' => { 'xpath' => '/feed/entry/author/name', 'value' => './/text()'},
  581. 'title' => { 'xpath' => '/feed/entry/title', 'value' => './/text()' },
  582. 'content' => { 'xpath' => '/feed/entry/content', 'value' => './/text()' },
  583. }
  584. }, keep_events_for: 2.days)
  585. @checker.user = users(:bob)
  586. @checker.save!
  587. end
  588. it "works with XPath" do
  589. expect {
  590. @checker.check
  591. }.to change { Event.count }.by(10)
  592. event = Event.last
  593. expect(event.payload['author']).to eq('bill98')
  594. expect(event.payload['title']).to eq('Help: Rainmeter Skins • Test if Today is Between 2 Dates')
  595. expect(event.payload['content']).to start_with('Can I ')
  596. end
  597. end
  598. describe "JSON" do
  599. it "works with paths" do
  600. json = {
  601. 'response' => {
  602. 'version' => 2,
  603. 'title' => "hello!"
  604. }
  605. }
  606. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  607. site = {
  608. 'name' => "Some JSON Response",
  609. 'expected_update_period_in_days' => "2",
  610. 'type' => "json",
  611. 'url' => "http://json-site.com",
  612. 'mode' => 'on_change',
  613. 'extract' => {
  614. 'version' => {'path' => "response.version"},
  615. 'title' => {'path' => "response.title"}
  616. }
  617. }
  618. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  619. checker.user = users(:bob)
  620. checker.save!
  621. checker.check
  622. event = Event.last
  623. expect(event.payload['version']).to eq(2)
  624. expect(event.payload['title']).to eq("hello!")
  625. end
  626. it "can handle arrays" do
  627. json = {
  628. 'response' => {
  629. 'data' => [
  630. {'title' => "first", 'version' => 2},
  631. {'title' => "second", 'version' => 2.5}
  632. ]
  633. }
  634. }
  635. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  636. site = {
  637. 'name' => "Some JSON Response",
  638. 'expected_update_period_in_days' => "2",
  639. 'type' => "json",
  640. 'url' => "http://json-site.com",
  641. 'mode' => 'on_change',
  642. 'extract' => {
  643. :title => {'path' => "response.data[*].title"},
  644. :version => {'path' => "response.data[*].version"}
  645. }
  646. }
  647. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  648. checker.user = users(:bob)
  649. checker.save!
  650. expect {
  651. checker.check
  652. }.to change { Event.count }.by(2)
  653. (event2, event1) = Event.last(2)
  654. expect(event1.payload['version']).to eq(2.5)
  655. expect(event1.payload['title']).to eq("second")
  656. expect(event2.payload['version']).to eq(2)
  657. expect(event2.payload['title']).to eq("first")
  658. end
  659. it "stores the whole object if :extract is not specified" do
  660. json = {
  661. 'response' => {
  662. 'version' => 2,
  663. 'title' => "hello!"
  664. }
  665. }
  666. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  667. site = {
  668. 'name' => "Some JSON Response",
  669. 'expected_update_period_in_days' => "2",
  670. 'type' => "json",
  671. 'url' => "http://json-site.com",
  672. 'mode' => 'on_change'
  673. }
  674. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  675. checker.user = users(:bob)
  676. checker.save!
  677. checker.check
  678. event = Event.last
  679. expect(event.payload['response']['version']).to eq(2)
  680. expect(event.payload['response']['title']).to eq("hello!")
  681. end
  682. end
  683. describe "text parsing" do
  684. before do
  685. stub_request(:any, /text-site/).to_return(body: <<-EOF, status: 200)
  686. water: wet
  687. fire: hot
  688. EOF
  689. site = {
  690. 'name' => 'Some Text Response',
  691. 'expected_update_period_in_days' => '2',
  692. 'type' => 'text',
  693. 'url' => 'http://text-site.com',
  694. 'mode' => 'on_change',
  695. 'extract' => {
  696. 'word' => { 'regexp' => '^(.+?): (.+)$', index: 1 },
  697. 'property' => { 'regexp' => '^(.+?): (.+)$', index: '2' },
  698. }
  699. }
  700. @checker = Agents::WebsiteAgent.new(name: 'Text Site', options: site)
  701. @checker.user = users(:bob)
  702. @checker.save!
  703. end
  704. it "works with regexp with named capture" do
  705. @checker.options = @checker.options.merge('extract' => {
  706. 'word' => { 'regexp' => '^(?<word>.+?): (?<property>.+)$', index: 'word' },
  707. 'property' => { 'regexp' => '^(?<word>.+?): (?<property>.+)$', index: 'property' },
  708. })
  709. expect {
  710. @checker.check
  711. }.to change { Event.count }.by(2)
  712. event1, event2 = Event.last(2)
  713. expect(event1.payload['word']).to eq('water')
  714. expect(event1.payload['property']).to eq('wet')
  715. expect(event2.payload['word']).to eq('fire')
  716. expect(event2.payload['property']).to eq('hot')
  717. end
  718. it "works with regexp" do
  719. expect {
  720. @checker.check
  721. }.to change { Event.count }.by(2)
  722. event1, event2 = Event.last(2)
  723. expect(event1.payload['word']).to eq('water')
  724. expect(event1.payload['property']).to eq('wet')
  725. expect(event2.payload['word']).to eq('fire')
  726. expect(event2.payload['property']).to eq('hot')
  727. end
  728. end
  729. end
  730. describe "#receive" do
  731. describe "with a url or url_from_event" do
  732. before do
  733. @event = Event.new
  734. @event.agent = agents(:bob_rain_notifier_agent)
  735. @event.payload = {
  736. 'url' => 'http://foo.com',
  737. 'link' => 'Random'
  738. }
  739. end
  740. it "should use url_from_event as the url to scrape" do
  741. stub = stub_request(:any, 'http://example.org/?url=http%3A%2F%2Ffoo.com')
  742. @checker.options = @valid_options.merge(
  743. 'url_from_event' => 'http://example.org/?url={{url | uri_escape}}'
  744. )
  745. @checker.receive([@event])
  746. expect(stub).to have_been_requested
  747. end
  748. it "should use the Agent's `url` option if url_from_event is not set" do
  749. expect {
  750. @checker.options = @valid_options
  751. @checker.receive([@event])
  752. }.to change { Event.count }.by(1)
  753. end
  754. it "should allow url_from_event to be an array of urls" do
  755. stub1 = stub_request(:any, 'http://example.org/?url=http%3A%2F%2Ffoo.com')
  756. stub2 = stub_request(:any, 'http://google.org/?url=http%3A%2F%2Ffoo.com')
  757. @checker.options = @valid_options.merge(
  758. 'url_from_event' => ['http://example.org/?url={{url | uri_escape}}', 'http://google.org/?url={{url | uri_escape}}']
  759. )
  760. @checker.receive([@event])
  761. expect(stub1).to have_been_requested
  762. expect(stub2).to have_been_requested
  763. end
  764. it "should interpolate values from incoming event payload" do
  765. stub_request(:any, /foo/).to_return(body: File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), status: 200)
  766. expect {
  767. @valid_options['url_from_event'] = '{{ url }}'
  768. @valid_options['extract'] = {
  769. 'from' => {
  770. 'xpath' => '*[1]',
  771. 'value' => '{{url | to_xpath}}'
  772. },
  773. 'to' => {
  774. 'xpath' => '(//a[@href and text()={{link | to_xpath}}])[1]',
  775. 'value' => '@href'
  776. },
  777. }
  778. @checker.options = @valid_options
  779. @checker.receive([@event])
  780. }.to change { Event.count }.by(1)
  781. expect(Event.last.payload).to eq({
  782. 'from' => 'http://foo.com',
  783. 'to' => 'http://dynamic.xkcd.com/random/comic/',
  784. })
  785. end
  786. it "should use the options url if no url is in the event payload, and `url_from_event` is not provided" do
  787. @checker.options['mode'] = 'merge'
  788. @event.payload.delete('url')
  789. expect {
  790. @checker.receive([@event])
  791. }.to change { Event.count }.by(1)
  792. expect(Event.last.payload['title']).to eq('Evolving')
  793. expect(Event.last.payload['link']).to eq('Random')
  794. end
  795. it "should interpolate values from incoming event payload and _response_" do
  796. @event.payload['title'] = 'XKCD'
  797. expect {
  798. @valid_options['extract'] = {
  799. 'response_info' => @valid_options['extract']['url'].merge(
  800. 'value' => '{% capture sentence %}The reponse from {{title}} was {{_response_.status}} {{_response_.headers.X-Status-Message}}.{% endcapture %}{{sentence | to_xpath}}'
  801. )
  802. }
  803. @checker.options = @valid_options
  804. @checker.receive([@event])
  805. }.to change { Event.count }.by(1)
  806. expect(Event.last.payload['response_info']).to eq('The reponse from XKCD was 200 OK.')
  807. end
  808. it "should support merging of events" do
  809. expect {
  810. @checker.options = @valid_options
  811. @checker.options[:mode] = "merge"
  812. @checker.receive([@event])
  813. }.to change { Event.count }.by(1)
  814. last_payload = Event.last.payload
  815. expect(last_payload['link']).to eq('Random')
  816. end
  817. end
  818. describe "with a data_from_event" do
  819. describe "with json data" do
  820. before do
  821. @event = Event.new
  822. @event.agent = agents(:bob_rain_notifier_agent)
  823. @event.payload = {
  824. 'something' => 'some value',
  825. 'some_object' => {
  826. 'some_data' => { hello: 'world' }.to_json
  827. }
  828. }
  829. @event.save!
  830. @checker.options = @valid_options.merge(
  831. 'type' => 'json',
  832. 'data_from_event' => '{{ some_object.some_data }}',
  833. 'extract' => {
  834. 'value' => { 'path' => 'hello' }
  835. }
  836. )
  837. end
  838. it "should extract from the event data in the incoming event payload" do
  839. expect {
  840. @checker.receive([@event])
  841. }.to change { Event.count }.by(1)
  842. expect(@checker.events.last.payload).to eq({ 'value' => 'world' })
  843. end
  844. it "should support merge mode" do
  845. @checker.options['mode'] = "merge"
  846. expect {
  847. @checker.receive([@event])
  848. }.to change { Event.count }.by(1)
  849. expect(@checker.events.last.payload).to eq(@event.payload.merge('value' => 'world'))
  850. end
  851. it "should output an error when nothing can be found at the path" do
  852. @checker.options = @checker.options.merge(
  853. 'data_from_event' => '{{ some_object.mistake }}'
  854. )
  855. expect {
  856. @checker.receive([@event])
  857. }.to_not change { Event.count }
  858. expect(@checker.logs.last.message).to match(/No data was found in the Event payload using the template {{ some_object\.mistake }}/)
  859. end
  860. it "should output an error when the data cannot be parsed" do
  861. @event.update_attribute :payload, @event.payload.merge('some_object' => { 'some_data' => '{invalid json' })
  862. expect {
  863. @checker.receive([@event])
  864. }.to_not change { Event.count }
  865. expect(@checker.logs.last.message).to match(/Error when handling event data:/)
  866. end
  867. end
  868. describe "with HTML data" do
  869. before do
  870. @event = Event.new
  871. @event.agent = agents(:bob_rain_notifier_agent)
  872. @event.payload = {
  873. 'url' => 'http://xkcd.com',
  874. 'some_object' => {
  875. 'some_data' => "<div><span class='title'>Title!</span><span class='body'>Body!</span></div>"
  876. }
  877. }
  878. @event.save!
  879. @checker.options = @valid_options.merge(
  880. 'type' => 'html',
  881. 'data_from_event' => '{{ some_object.some_data }}',
  882. 'extract' => {
  883. 'title' => { 'css' => ".title", 'value' => ".//text()" },
  884. 'body' => { 'css' => "div span.body", 'value' => ".//text()" }
  885. }
  886. )
  887. end
  888. it "should extract from the event data in the incoming event payload" do
  889. expect {
  890. @checker.receive([@event])
  891. }.to change { Event.count }.by(1)
  892. expect(@checker.events.last.payload).to eq({ 'title' => 'Title!', 'body' => 'Body!' })
  893. end
  894. end
  895. end
  896. end
  897. end
  898. describe "checking with http basic auth" do
  899. before do
  900. stub_request(:any, /example/).
  901. with(headers: { 'Authorization' => "Basic #{['user:pass'].pack('m').chomp}" }).
  902. to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
  903. @valid_options = {
  904. 'name' => "XKCD",
  905. 'expected_update_period_in_days' => "2",
  906. 'type' => "html",
  907. 'url' => "http://www.example.com",
  908. 'mode' => 'on_change',
  909. 'extract' => {
  910. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  911. 'title' => { 'css' => "#comic img", 'value' => "@alt" },
  912. 'hovertext' => { 'css' => "#comic img", 'value' => "@title" }
  913. },
  914. 'basic_auth' => "user:pass"
  915. }
  916. @checker = Agents::WebsiteAgent.new(:name => "auth", :options => @valid_options)
  917. @checker.user = users(:bob)
  918. @checker.save!
  919. end
  920. describe "#check" do
  921. it "should check for changes" do
  922. expect { @checker.check }.to change { Event.count }.by(1)
  923. expect { @checker.check }.not_to change { Event.count }
  924. end
  925. end
  926. end
  927. describe "checking with headers" do
  928. before do
  929. stub_request(:any, /example/).
  930. with(headers: { 'foo' => 'bar' }).
  931. to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
  932. @valid_options = {
  933. 'name' => "XKCD",
  934. 'expected_update_period_in_days' => "2",
  935. 'type' => "html",
  936. 'url' => "http://www.example.com",
  937. 'mode' => 'on_change',
  938. 'headers' => { 'foo' => 'bar' },
  939. 'extract' => {
  940. 'url' => { 'css' => "#comic img", 'value' => "@src" },
  941. }
  942. }
  943. @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @valid_options)
  944. @checker.user = users(:bob)
  945. @checker.save!
  946. end
  947. describe "#check" do
  948. it "should check for changes" do
  949. expect { @checker.check }.to change { Event.count }.by(1)
  950. end
  951. end
  952. end
  953. describe "checking urls" do
  954. before do
  955. stub_request(:any, /example/).
  956. to_return(:body => File.read(Rails.root.join("spec/data_fixtures/urlTest.html")), :status => 200)
  957. @valid_options = {
  958. 'name' => "Url Test",
  959. 'expected_update_period_in_days' => "2",
  960. 'type' => "html",
  961. 'url' => "http://www.example.com",
  962. 'mode' => 'all',
  963. 'extract' => {
  964. 'url' => { 'css' => "a", 'value' => "@href" },
  965. }
  966. }
  967. @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @valid_options)
  968. @checker.user = users(:bob)
  969. @checker.save!
  970. end
  971. describe "#check" do
  972. before do
  973. expect { @checker.check }.to change { Event.count }.by(7)
  974. @events = Event.last(7)
  975. end
  976. it "should check hostname" do
  977. event = @events[0]
  978. expect(event.payload['url']).to eq("http://google.com")
  979. end
  980. it "should check unescaped query" do
  981. event = @events[1]
  982. expect(event.payload['url']).to eq("https://www.google.ca/search?q=some%20query")
  983. end
  984. it "should check properly escaped query" do
  985. event = @events[2]
  986. expect(event.payload['url']).to eq("https://www.google.ca/search?q=some%20query")
  987. end
  988. it "should check unescaped unicode url" do
  989. event = @events[3]
  990. expect(event.payload['url']).to eq("http://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  991. end
  992. it "should check unescaped unicode query" do
  993. event = @events[4]
  994. expect(event.payload['url']).to eq("https://www.google.ca/search?q=%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  995. end
  996. it "should check properly escaped unicode url" do
  997. event = @events[5]
  998. expect(event.payload['url']).to eq("http://ko.wikipedia.org/wiki/%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  999. end
  1000. it "should check properly escaped unicode query" do
  1001. event = @events[6]
  1002. expect(event.payload['url']).to eq("https://www.google.ca/search?q=%EC%9C%84%ED%82%A4%EB%B0%B1%EA%B3%BC:%EB%8C%80%EB%AC%B8")
  1003. end
  1004. end
  1005. end
  1006. end