Call remove_namespaces! just once.

Akinori MUSHA 10 gadi atpakaļ
vecāks
revīzija
76929583d7
1 mainītis faili ar 12 papildinājumiem un 4 dzēšanām
  1. 12 4
      app/models/agents/website_agent.rb

+ 12 - 4
app/models/agents/website_agent.rb

@@ -299,6 +299,12 @@ module Agents
299 299
       end).to_s
300 300
     end
301 301
 
302
+    def use_namespaces?
303
+      interpolated['extract'].none? { |name, extraction_details|
304
+        extraction_details.key?('xpath')
305
+      }
306
+    end
307
+
302 308
     def extract_each(&block)
303 309
       interpolated['extract'].each_with_object({}) { |(name, extraction_details), output|
304 310
         output[name] = block.call(extraction_details)
@@ -331,7 +337,6 @@ module Agents
331 337
         when css = extraction_details['css']
332 338
           nodes = doc.css(css)
333 339
         when xpath = extraction_details['xpath']
334
-          doc.remove_namespaces! # ignore xmlns, useful when parsing atom feeds
335 340
           nodes = doc.xpath(xpath)
336 341
         else
337 342
           raise '"css" or "xpath" is required for HTML or XML extraction'
@@ -356,9 +361,12 @@ module Agents
356 361
     end
357 362
 
358 363
     def parse(data)
359
-      case extraction_type
364
+      case type = extraction_type
360 365
       when "xml"
361
-        Nokogiri::XML(data)
366
+        doc = Nokogiri::XML(data)
367
+        # ignore xmlns, useful when parsing atom feeds
368
+        doc.remove_namespaces! unless use_namespaces?
369
+        doc
362 370
       when "json"
363 371
         JSON.parse(data)
364 372
       when "html"
@@ -366,7 +374,7 @@ module Agents
366 374
       when "text"
367 375
         data
368 376
       else
369
-        raise "Unknown extraction type #{extraction_type}"
377
+        raise "Unknown extraction type: #{type}"
370 378
       end
371 379
     end
372 380