Class | Mechanize::Util |
In: |
lib/mechanize/util.rb
|
Parent: | Object |
CODE_DIC | = | { :JIS => "ISO-2022-JP", :EUC => "EUC-JP", :SJIS => "SHIFT_JIS", :UTF8 => "UTF-8", :UTF16 => "UTF-16", :UTF32 => "UTF-32"} |
# File lib/mechanize/util.rb, line 12 12: def build_query_string(parameters, enc=nil) 13: parameters.map { |k,v| 14: # WEBrick::HTTP.escape* has some problems about m17n on ruby-1.9.*. 15: [CGI.escape(k.to_s), CGI.escape(v.to_s)].join("=") if k 16: }.compact.join('&') 17: end
# File lib/mechanize/util.rb, line 54 54: def detect_charset(src) 55: tmp = NKF.guess(src || "<html></html>") 56: if RUBY_VERSION >= "1.9.0" 57: enc = tmp.to_s.upcase 58: else 59: enc = NKF.constants.find{|c| 60: NKF.const_get(c) == tmp 61: } 62: enc = CODE_DIC[enc.intern] 63: end 64: enc || "ISO-8859-1" 65: end
# File lib/mechanize/util.rb, line 29 29: def from_native_charset(s, code) 30: return s unless s && code 31: return s unless Mechanize.html_parser == Nokogiri::HTML 32: 33: begin 34: Iconv.iconv(code.to_s, "UTF-8", s).join("") 35: rescue Iconv::InvalidEncoding 36: s 37: end 38: end
# File lib/mechanize/util.rb, line 40 40: def html_unescape(s) 41: return s unless s 42: s.gsub(/&(\w+|#[0-9]+);/) { |match| 43: number = case match 44: when /&(\w+);/ 45: Mechanize.html_parser::NamedCharacters[$1] 46: when /&#([0-9]+);/ 47: $1.to_i 48: end 49: 50: number ? ([number].pack('U') rescue match) : match 51: } 52: end