require 'nokogiri' class Nokogiri::HTML::Document # Returns an array of lower-cased tokens. If # no tag is found, returns an empty array. An optional # +custom_name+ specifies the name of a meta tag to look for ahead # of "ROBOTS". Names are compared in a case-insensitive manner. def meta_robots(custom_name = nil) (@meta_robots ||= {})[custom_name] = (custom_name && parse_meta_robots(custom_name)) || parse_meta_robots('robots') end # Equivalent to meta_robots(custom_name).include?('noindex'). def noindex?(custom_name = nil) meta_robots(custom_name).include?('noindex') end # Equivalent to meta_robots(custom_name).include?('nofollow'). def nofollow?(custom_name = nil) meta_robots(custom_name).include?('nofollow') end private def parse_meta_robots(custom_name) pattern = /\A#{Regexp.quote(custom_name)}\z/i meta = css('meta[@name]').find { |element| element['name'].match(pattern) } and content = meta['content'] or return [] content.downcase.split(/[,\s]+/) end end