website_agent_spec.rb 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451
  1. require 'spec_helper'
  2. describe Agents::WebsiteAgent do
  3. describe "checking without basic auth" do
  4. before do
  5. stub_request(:any, /xkcd/).to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
  6. @site = {
  7. 'name' => "XKCD",
  8. 'expected_update_period_in_days' => 2,
  9. 'type' => "html",
  10. 'url' => "http://xkcd.com",
  11. 'mode' => 'on_change',
  12. 'extract' => {
  13. 'url' => { 'css' => "#comic img", 'attr' => "src" },
  14. 'title' => { 'css' => "#comic img", 'attr' => "alt" },
  15. 'hovertext' => { 'css' => "#comic img", 'attr' => "title" }
  16. }
  17. }
  18. @checker = Agents::WebsiteAgent.new(:name => "xkcd", :options => @site, :keep_events_for => 2)
  19. @checker.user = users(:bob)
  20. @checker.save!
  21. end
  22. describe "validations" do
  23. before do
  24. @checker.should be_valid
  25. end
  26. it "should validate the integer fields" do
  27. @checker.options['expected_update_period_in_days'] = "2"
  28. @checker.should be_valid
  29. @checker.options['expected_update_period_in_days'] = "nonsense"
  30. @checker.should_not be_valid
  31. end
  32. it "should validate uniqueness_look_back" do
  33. @checker.options['uniqueness_look_back'] = "nonsense"
  34. @checker.should_not be_valid
  35. @checker.options['uniqueness_look_back'] = "2"
  36. @checker.should be_valid
  37. end
  38. it "should validate headers" do
  39. @checker.options['headers'] = "blah"
  40. @checker.should_not be_valid
  41. @checker.options['headers'] = ""
  42. @checker.should be_valid
  43. @checker.options['headers'] = {}
  44. @checker.should be_valid
  45. @checker.options['headers'] = { 'foo' => 'bar' }
  46. @checker.should be_valid
  47. end
  48. it "should validate mode" do
  49. @checker.options['mode'] = "nonsense"
  50. @checker.should_not be_valid
  51. @checker.options['mode'] = "on_change"
  52. @checker.should be_valid
  53. @checker.options['mode'] = "all"
  54. @checker.should be_valid
  55. @checker.options['mode'] = ""
  56. @checker.should be_valid
  57. end
  58. it "should validate the force_encoding option" do
  59. @checker.options['force_encoding'] = ''
  60. @checker.should be_valid
  61. @checker.options['force_encoding'] = 'UTF-8'
  62. @checker.should be_valid
  63. @checker.options['force_encoding'] = ['UTF-8']
  64. @checker.should_not be_valid
  65. @checker.options['force_encoding'] = 'UTF-42'
  66. @checker.should_not be_valid
  67. end
  68. end
  69. describe "#check" do
  70. it "should check for changes (and update Event.expires_at)" do
  71. lambda { @checker.check }.should change { Event.count }.by(1)
  72. event = Event.last
  73. sleep 2
  74. lambda { @checker.check }.should_not change { Event.count }
  75. update_event = Event.last
  76. update_event.expires_at.should_not == event.expires_at
  77. end
  78. it "should always save events when in :all mode" do
  79. lambda {
  80. @site['mode'] = 'all'
  81. @checker.options = @site
  82. @checker.check
  83. @checker.check
  84. }.should change { Event.count }.by(2)
  85. end
  86. it "should take uniqueness_look_back into account during deduplication" do
  87. @site['mode'] = 'all'
  88. @checker.options = @site
  89. @checker.check
  90. @checker.check
  91. event = Event.last
  92. event.payload = "{}"
  93. event.save
  94. lambda {
  95. @site['mode'] = 'on_change'
  96. @site['uniqueness_look_back'] = 2
  97. @checker.options = @site
  98. @checker.check
  99. }.should_not change { Event.count }
  100. lambda {
  101. @site['mode'] = 'on_change'
  102. @site['uniqueness_look_back'] = 1
  103. @checker.options = @site
  104. @checker.check
  105. }.should change { Event.count }.by(1)
  106. end
  107. it "should log an error if the number of results for a set of extraction patterns differs" do
  108. @site['extract']['url']['css'] = "div"
  109. @checker.options = @site
  110. @checker.check
  111. @checker.logs.first.message.should =~ /Got an uneven number of matches/
  112. end
  113. it "should accept an array for url" do
  114. @site['url'] = ["http://xkcd.com/1/", "http://xkcd.com/2/"]
  115. @checker.options = @site
  116. lambda { @checker.save! }.should_not raise_error;
  117. lambda { @checker.check }.should_not raise_error;
  118. end
  119. it "should parse events from all urls in array" do
  120. lambda {
  121. @site['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
  122. @site['mode'] = 'all'
  123. @checker.options = @site
  124. @checker.check
  125. }.should change { Event.count }.by(2)
  126. end
  127. it "should follow unique rules when parsing array of urls" do
  128. lambda {
  129. @site['url'] = ["http://xkcd.com/", "http://xkcd.com/"]
  130. @checker.options = @site
  131. @checker.check
  132. }.should change { Event.count }.by(1)
  133. end
  134. end
  135. describe 'encoding' do
  136. it 'should be forced with force_encoding option' do
  137. huginn = "\u{601d}\u{8003}"
  138. stub_request(:any, /no-encoding/).to_return(:body => {
  139. :value => huginn,
  140. }.to_json.encode(Encoding::EUC_JP), :headers => {
  141. 'Content-Type' => 'application/json',
  142. }, :status => 200)
  143. site = {
  144. 'name' => "Some JSON Response",
  145. 'expected_update_period_in_days' => 2,
  146. 'type' => "json",
  147. 'url' => "http://no-encoding.example.com",
  148. 'mode' => 'on_change',
  149. 'extract' => {
  150. 'value' => { 'path' => 'value' },
  151. },
  152. 'force_encoding' => 'EUC-JP',
  153. }
  154. checker = Agents::WebsiteAgent.new(:name => "No Encoding Site", :options => site)
  155. checker.user = users(:bob)
  156. checker.save!
  157. checker.check
  158. event = Event.last
  159. event.payload['value'].should == huginn
  160. end
  161. it 'should be overridden with force_encoding option' do
  162. huginn = "\u{601d}\u{8003}"
  163. stub_request(:any, /wrong-encoding/).to_return(:body => {
  164. :value => huginn,
  165. }.to_json.encode(Encoding::EUC_JP), :headers => {
  166. 'Content-Type' => 'application/json; UTF-8',
  167. }, :status => 200)
  168. site = {
  169. 'name' => "Some JSON Response",
  170. 'expected_update_period_in_days' => 2,
  171. 'type' => "json",
  172. 'url' => "http://wrong-encoding.example.com",
  173. 'mode' => 'on_change',
  174. 'extract' => {
  175. 'value' => { 'path' => 'value' },
  176. },
  177. 'force_encoding' => 'EUC-JP',
  178. }
  179. checker = Agents::WebsiteAgent.new(:name => "Wrong Encoding Site", :options => site)
  180. checker.user = users(:bob)
  181. checker.save!
  182. checker.check
  183. event = Event.last
  184. event.payload['value'].should == huginn
  185. end
  186. end
  187. describe '#working?' do
  188. it 'checks if events have been received within the expected receive period' do
  189. stubbed_time = Time.now
  190. stub(Time).now { stubbed_time }
  191. @checker.should_not be_working # No events created
  192. @checker.check
  193. @checker.reload.should be_working # Just created events
  194. @checker.error "oh no!"
  195. @checker.reload.should_not be_working # There is a recent error
  196. stubbed_time = 20.minutes.from_now
  197. @checker.events.delete_all
  198. @checker.check
  199. @checker.reload.should be_working # There is a newer event now
  200. stubbed_time = 2.days.from_now
  201. @checker.reload.should_not be_working # Two days have passed without a new event having been created
  202. end
  203. end
  204. describe "parsing" do
  205. it "parses CSS" do
  206. @checker.check
  207. event = Event.last
  208. event.payload['url'].should == "http://imgs.xkcd.com/comics/evolving.png"
  209. event.payload['title'].should == "Evolving"
  210. event.payload['hovertext'].should =~ /^Biologists play reverse/
  211. end
  212. it "parses XPath" do
  213. @site['extract'].each { |key, value|
  214. value.delete('css')
  215. value['xpath'] = "//*[@id='comic']//img"
  216. }
  217. @checker.options = @site
  218. @checker.check
  219. event = Event.last
  220. event.payload['url'].should == "http://imgs.xkcd.com/comics/evolving.png"
  221. event.payload['title'].should == "Evolving"
  222. event.payload['hovertext'].should =~ /^Biologists play reverse/
  223. end
  224. it "should turn relative urls to absolute" do
  225. rel_site = {
  226. 'name' => "XKCD",
  227. 'expected_update_period_in_days' => 2,
  228. 'type' => "html",
  229. 'url' => "http://xkcd.com",
  230. 'mode' => "on_change",
  231. 'extract' => {
  232. 'url' => {'css' => "#topLeft a", 'attr' => "href"},
  233. 'title' => {'css' => "#topLeft a", 'text' => "true"}
  234. }
  235. }
  236. rel = Agents::WebsiteAgent.new(:name => "xkcd", :options => rel_site)
  237. rel.user = users(:bob)
  238. rel.save!
  239. rel.check
  240. event = Event.last
  241. event.payload['url'].should == "http://xkcd.com/about"
  242. end
  243. describe "JSON" do
  244. it "works with paths" do
  245. json = {
  246. 'response' => {
  247. 'version' => 2,
  248. 'title' => "hello!"
  249. }
  250. }
  251. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  252. site = {
  253. 'name' => "Some JSON Response",
  254. 'expected_update_period_in_days' => 2,
  255. 'type' => "json",
  256. 'url' => "http://json-site.com",
  257. 'mode' => 'on_change',
  258. 'extract' => {
  259. 'version' => {'path' => "response.version"},
  260. 'title' => {'path' => "response.title"}
  261. }
  262. }
  263. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  264. checker.user = users(:bob)
  265. checker.save!
  266. checker.check
  267. event = Event.last
  268. event.payload['version'].should == 2
  269. event.payload['title'].should == "hello!"
  270. end
  271. it "can handle arrays" do
  272. json = {
  273. 'response' => {
  274. 'data' => [
  275. {'title' => "first", 'version' => 2},
  276. {'title' => "second", 'version' => 2.5}
  277. ]
  278. }
  279. }
  280. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  281. site = {
  282. 'name' => "Some JSON Response",
  283. 'expected_update_period_in_days' => 2,
  284. 'type' => "json",
  285. 'url' => "http://json-site.com",
  286. 'mode' => 'on_change',
  287. 'extract' => {
  288. :title => {'path' => "response.data[*].title"},
  289. :version => {'path' => "response.data[*].version"}
  290. }
  291. }
  292. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  293. checker.user = users(:bob)
  294. checker.save!
  295. lambda {
  296. checker.check
  297. }.should change { Event.count }.by(2)
  298. event = Event.all[-1]
  299. event.payload['version'].should == 2.5
  300. event.payload['title'].should == "second"
  301. event = Event.all[-2]
  302. event.payload['version'].should == 2
  303. event.payload['title'].should == "first"
  304. end
  305. it "stores the whole object if :extract is not specified" do
  306. json = {
  307. 'response' => {
  308. 'version' => 2,
  309. 'title' => "hello!"
  310. }
  311. }
  312. stub_request(:any, /json-site/).to_return(:body => json.to_json, :status => 200)
  313. site = {
  314. 'name' => "Some JSON Response",
  315. 'expected_update_period_in_days' => 2,
  316. 'type' => "json",
  317. 'url' => "http://json-site.com",
  318. 'mode' => 'on_change'
  319. }
  320. checker = Agents::WebsiteAgent.new(:name => "Weather Site", :options => site)
  321. checker.user = users(:bob)
  322. checker.save!
  323. checker.check
  324. event = Event.last
  325. event.payload['response']['version'].should == 2
  326. event.payload['response']['title'].should == "hello!"
  327. end
  328. end
  329. end
  330. describe "#receive" do
  331. it "should scrape from the url element in incoming event payload" do
  332. @event = Event.new
  333. @event.agent = agents(:bob_rain_notifier_agent)
  334. @event.payload = { 'url' => "http://xkcd.com" }
  335. lambda {
  336. @checker.options = @site
  337. @checker.receive([@event])
  338. }.should change { Event.count }.by(1)
  339. end
  340. end
  341. end
  342. describe "checking with http basic auth" do
  343. before do
  344. stub_request(:any, /example/).
  345. with(headers: { 'Authorization' => "Basic #{['user:pass'].pack('m').chomp}" }).
  346. to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
  347. @site = {
  348. 'name' => "XKCD",
  349. 'expected_update_period_in_days' => 2,
  350. 'type' => "html",
  351. 'url' => "http://www.example.com",
  352. 'mode' => 'on_change',
  353. 'extract' => {
  354. 'url' => { 'css' => "#comic img", 'attr' => "src" },
  355. 'title' => { 'css' => "#comic img", 'attr' => "alt" },
  356. 'hovertext' => { 'css' => "#comic img", 'attr' => "title" }
  357. },
  358. 'basic_auth' => "user:pass"
  359. }
  360. @checker = Agents::WebsiteAgent.new(:name => "auth", :options => @site)
  361. @checker.user = users(:bob)
  362. @checker.save!
  363. end
  364. describe "#check" do
  365. it "should check for changes" do
  366. lambda { @checker.check }.should change { Event.count }.by(1)
  367. lambda { @checker.check }.should_not change { Event.count }
  368. end
  369. end
  370. end
  371. describe "checking with headers" do
  372. before do
  373. stub_request(:any, /example/).
  374. with(headers: { 'foo' => 'bar', 'user_agent' => /Faraday/ }).
  375. to_return(:body => File.read(Rails.root.join("spec/data_fixtures/xkcd.html")), :status => 200)
  376. @site = {
  377. 'name' => "XKCD",
  378. 'expected_update_period_in_days' => 2,
  379. 'type' => "html",
  380. 'url' => "http://www.example.com",
  381. 'mode' => 'on_change',
  382. 'headers' => { 'foo' => 'bar' },
  383. 'extract' => {
  384. 'url' => { 'css' => "#comic img", 'attr' => "src" },
  385. }
  386. }
  387. @checker = Agents::WebsiteAgent.new(:name => "ua", :options => @site)
  388. @checker.user = users(:bob)
  389. @checker.save!
  390. end
  391. describe "#check" do
  392. it "should check for changes" do
  393. lambda { @checker.check }.should change { Event.count }.by(1)
  394. end
  395. end
  396. end
  397. end