Class | FeedTools::URI |
In: |
lib/feed_tools/vendor/uri.rb
|
Parent: | Object |
This is an implementation of a URI parser based on RFC 3986.
Converts a path to a file protocol URI. If the path supplied is relative, it will be returned as a relative URI. If the path supplied is actually a URI, it will return the parsed URI.
# File lib/feed_tools/vendor/uri.rb, line 61 61: def self.convert_path(path) 62: return nil if path.nil? 63: 64: converted_uri = path.strip 65: if converted_uri.length > 0 && converted_uri[0..0] == "/" 66: converted_uri = "file://" + converted_uri 67: end 68: if converted_uri.length > 0 && 69: converted_uri.scan(/^[a-zA-Z]:[\\\/]/).size > 0 70: converted_uri = "file:///" + converted_uri 71: end 72: converted_uri.gsub!(/^file:\/*/i, "file:///") 73: if converted_uri =~ /^file:/i 74: # Adjust windows-style uris 75: converted_uri.gsub!(/^file:\/\/\/([a-zA-Z])\|/i, 'file:///\1:') 76: converted_uri.gsub!(/\\/, '/') 77: converted_uri = self.parse(converted_uri).normalize 78: else 79: converted_uri = self.parse(converted_uri) 80: end 81: 82: return converted_uri 83: end
Correctly escapes a uri.
# File lib/feed_tools/vendor/uri.rb, line 98 98: def self.escape(uri) 99: uri_object = uri.kind_of?(self) ? uri : self.parse(uri.to_s) 100: return URI.new( 101: uri_object.scheme, 102: uri_object.userinfo, 103: uri_object.host, 104: uri_object.specified_port, 105: self.normalize_escaping(uri_object.path), 106: self.normalize_escaping(uri_object.query), 107: self.normalize_escaping(uri_object.fragment) 108: ).to_s 109: end
Extracts uris from an arbitrary body of text.
# File lib/feed_tools/vendor/uri.rb, line 112 112: def self.extract(text, options={}) 113: defaults = {:base => nil, :parse => false} 114: options = defaults.merge(options) 115: raise InvalidOptionError unless (options.keys - defaults.keys).empty? 116: # This regular expression needs to be less forgiving or else it would 117: # match virtually all text. Which isn't exactly what we're going for. 118: extract_regex = /((([a-z\+]+):)[^ \n\<\>\"\\]+[\w\/])/ 119: extracted_uris = 120: text.scan(extract_regex).collect { |match| match[0] } 121: sgml_extract_regex = /<[^>]+href=\"([^\"]+?)\"[^>]*>/ 122: sgml_extracted_uris = 123: text.scan(sgml_extract_regex).collect { |match| match[0] } 124: extracted_uris.concat(sgml_extracted_uris - extracted_uris) 125: textile_extract_regex = /\".+?\":([^ ]+\/[^ ]+)[ \,\.\;\:\?\!\<\>\"]/i 126: textile_extracted_uris = 127: text.scan(textile_extract_regex).collect { |match| match[0] } 128: extracted_uris.concat(textile_extracted_uris - extracted_uris) 129: parsed_uris = [] 130: base_uri = nil 131: if options[:base] != nil 132: base_uri = options[:base] if options[:base].kind_of?(self) 133: base_uri = self.parse(options[:base].to_s) if base_uri == nil 134: end 135: for uri_string in extracted_uris 136: begin 137: if base_uri == nil 138: parsed_uris << self.parse(uri_string) 139: else 140: parsed_uris << (base_uri + self.parse(uri_string)) 141: end 142: rescue Exception 143: nil 144: end 145: end 146: parsed_uris.reject! do |uri| 147: (uri.scheme =~ /T\d+/ || 148: uri.scheme == "xmlns" || 149: uri.scheme == "xml" || 150: uri.scheme == "thr" || 151: uri.scheme == "this" || 152: uri.scheme == "float" || 153: uri.scheme == "user" || 154: uri.scheme == "username" || 155: uri.scheme == "out") 156: end 157: if options[:parse] 158: return parsed_uris 159: else 160: return parsed_uris.collect { |uri| uri.to_s } 161: end 162: end
Joins several uris together.
# File lib/feed_tools/vendor/uri.rb, line 86 86: def self.join(*uris) 87: uri_objects = uris.collect do |uri| 88: uri.kind_of?(self) ? uri : self.parse(uri.to_s) 89: end 90: result = uri_objects.shift.dup 91: for uri in uri_objects 92: result.merge!(uri) 93: end 94: return result 95: end
Creates a new uri object from component parts. Passing nil for any of these parameters is acceptable.
# File lib/feed_tools/vendor/uri.rb, line 166 166: def initialize(scheme, userinfo, host, port, path, query, fragment) 167: assign_components(scheme, userinfo, host, port, path, query, fragment) 168: end
Returns a URI object based on the parsed string.
# File lib/feed_tools/vendor/uri.rb, line 12 12: def self.parse(uri_string) 13: return nil if uri_string.nil? 14: 15: # If a URI object is passed, just return itself. 16: return uri_string if uri_string.kind_of?(self) 17: 18: uri_regex = 19: /^(([^:\/?#]+):)?(\/\/([^\/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?/ 20: scan = uri_string.scan(uri_regex) 21: fragments = scan[0] 22: return nil if fragments.nil? 23: scheme = fragments[1] 24: authority = fragments[3] 25: path = fragments[4] 26: query = fragments[6] 27: fragment = fragments[8] 28: userinfo = nil 29: host = nil 30: port = nil 31: if authority != nil 32: userinfo = authority.scan(/^([^\[\]]*)@/).flatten[0] 33: host = authority.gsub(/^([^\[\]]*)@/, "").gsub(/:([^:@\[\]]*?)$/, "") 34: port = authority.scan(/:([^:@\[\]]*?)$/).flatten[0] 35: end 36: if port.nil? || port == "" 37: port = nil 38: end 39: 40: # WARNING: Not standards-compliant, but follows the theme 41: # of Postel's law: 42: # 43: # Special exception for dealing with the retarded idea of the 44: # feed pseudo-protocol. Without this exception, the parser will read 45: # the URI as having a blank port number, instead of as having a second 46: # URI embedded within. This exception translates these broken URIs 47: # and instead treats the inner URI as opaque. 48: if scheme == "feed" && host == "http" 49: userinfo = nil 50: host = nil 51: port = nil 52: path = authority + path 53: end 54: 55: return URI.new(scheme, userinfo, host, port, path, query, fragment) 56: end
Returns a hash of common IP-based schemes and their default port numbers. Adding new schemes to this hash, as necessary, will allow for better URI normalization.
# File lib/feed_tools/vendor/uri.rb, line 232 232: def self.scheme_mapping 233: if !defined?(@protocol_mapping) || @protocol_mapping.nil? 234: @protocol_mapping = { 235: "http" => 80, 236: "https" => 443, 237: "ftp" => 21, 238: "tftp" => 69, 239: "ssh" => 22, 240: "svn+ssh" => 22, 241: "telnet" => 23, 242: "nntp" => 119, 243: "gopher" => 70, 244: "wais" => 210, 245: "prospero" => 1525 246: } 247: end 248: return @protocol_mapping 249: end
Normalizes percent escaping of characters
# File lib/feed_tools/vendor/uri.rb, line 688 688: def self.normalize_escaping(escaped_section) 689: return nil if escaped_section.nil? 690: normalized_section = escaped_section.dup 691: normalized_section.gsub!(/%[0-9a-f]{2}/i) do |sequence| 692: sequence[1..3].to_i(16).chr 693: end 694: if URI::IDNA.send(:use_libidn?) 695: normalized_section = 696: IDN::Stringprep.nfkc_normalize(normalized_section) 697: end 698: new_section = "" 699: for index in 0...normalized_section.size 700: if self.unreserved?(normalized_section[index]) || 701: normalized_section[index] == '/'[0] 702: new_section << normalized_section[index..index] 703: else 704: new_section << ("%" + normalized_section[index].to_s(16).upcase) 705: end 706: end 707: normalized_section = new_section 708: return normalized_section 709: end
Resolves paths to their simplest form.
# File lib/feed_tools/vendor/uri.rb, line 665 665: def self.normalize_path(path) 666: return nil if path.nil? 667: normalized_path = path.dup 668: previous_state = normalized_path.dup 669: begin 670: previous_state = normalized_path.dup 671: normalized_path.gsub!(/\/\.\//, "/") 672: normalized_path.gsub!(/\/\.$/, "/") 673: parent = normalized_path.scan(/\/([^\/]+)\/\.\.\//).flatten[0] 674: if parent != "." && parent != ".." 675: normalized_path.gsub!(/\/#{parent}\/\.\.\//, "/") 676: end 677: parent = normalized_path.scan(/\/([^\/]+)\/\.\.$/).flatten[0] 678: if parent != "." && parent != ".." 679: normalized_path.gsub!(/\/#{parent}\/\.\.$/, "/") 680: end 681: normalized_path.gsub!(/^\.\.?\/?/, "") 682: normalized_path.gsub!(/^\/\.\.?\//, "/") 683: end until previous_state == normalized_path 684: return normalized_path 685: end
Returns a list of unreserved characters.
# File lib/feed_tools/vendor/uri.rb, line 720 720: def self.unreserved 721: if !defined?(@unreserved) || @unreserved.nil? 722: @unreserved = ["-", ".", "_", "~"] 723: for c in "a".."z" 724: @unreserved << c 725: @unreserved << c.upcase 726: end 727: for c in "0".."9" 728: @unreserved << c 729: end 730: @unreserved.sort! 731: end 732: return @unreserved 733: end
Returns true if the specified character is unreserved.
# File lib/feed_tools/vendor/uri.rb, line 712 712: def self.unreserved?(character) 713: character_string = nil 714: character_string = character.chr if character.respond_to?(:chr) 715: character_string = character[0..0] if character.kind_of?(String) 716: return self.unreserved.include?(character_string) 717: end
Joins two URIs together.
# File lib/feed_tools/vendor/uri.rb, line 312 312: def +(uri) 313: if !uri.kind_of?(self.class) 314: uri = URI.parse(uri.to_s) 315: end 316: if uri.to_s == "" 317: return self.dup 318: end 319: 320: joined_scheme = nil 321: joined_userinfo = nil 322: joined_host = nil 323: joined_port = nil 324: joined_path = nil 325: joined_query = nil 326: joined_fragment = nil 327: 328: # Section 5.2.2 of RFC 3986 329: if uri.scheme != nil 330: joined_scheme = uri.scheme 331: joined_userinfo = uri.userinfo 332: joined_host = uri.host 333: joined_port = uri.specified_port 334: joined_path = self.class.normalize_path(uri.path) 335: joined_query = uri.query 336: else 337: if uri.authority != nil 338: joined_userinfo = uri.userinfo 339: joined_host = uri.host 340: joined_port = uri.specified_port 341: joined_path = self.class.normalize_path(uri.path) 342: joined_query = uri.query 343: else 344: if uri.path == nil || uri.path == "" 345: joined_path = self.path 346: if uri.query != nil 347: joined_query = uri.query 348: else 349: joined_query = self.query 350: end 351: else 352: if uri.path[0..0] == "/" 353: joined_path = self.class.normalize_path(uri.path) 354: else 355: base_path = self.path.nil? ? "" : self.path.dup 356: base_path = self.class.normalize_path(base_path) 357: base_path.gsub!(/\/[^\/]+$/, "/") 358: joined_path = self.class.normalize_path(base_path + uri.path) 359: end 360: joined_query = uri.query 361: end 362: joined_userinfo = self.userinfo 363: joined_host = self.host 364: joined_port = self.specified_port 365: end 366: joined_scheme = self.scheme 367: end 368: joined_fragment = uri.fragment 369: 370: return URI.new( 371: joined_scheme, 372: joined_userinfo, 373: joined_host, 374: joined_port, 375: joined_path, 376: joined_query, 377: joined_fragment 378: ) 379: end
Returns true if the URI objects are equal. This method normalizes both URIs before doing the comparison, and allows comparison against strings.
# File lib/feed_tools/vendor/uri.rb, line 533 533: def ===(uri) 534: uri_string = nil 535: if uri.respond_to?(:normalize) 536: uri_string = uri.normalize.to_s 537: else 538: begin 539: uri_string = URI.parse(uri.to_s).normalize.to_s 540: rescue Exception 541: return false 542: end 543: end 544: return self.normalize.to_s == uri_string 545: end
Returns the authority segment of this URI.
# File lib/feed_tools/vendor/uri.rb, line 187 187: def authority 188: if !defined?(@authority) || @authority.nil? 189: return nil if self.host.nil? 190: @authority = "" 191: if self.userinfo != nil 192: @authority << "#{self.userinfo}@" 193: end 194: @authority << self.host 195: if self.specified_port != nil 196: @authority << ":#{self.specified_port}" 197: end 198: end 199: return @authority 200: end
Creates a URI suitable for display to users. If semantic attacks are likely, the application should try to detect these and warn the user. See RFC 3986 section 7.6 for more information.
# File lib/feed_tools/vendor/uri.rb, line 520 520: def display_uri 521: display_uri = self.normalize 522: begin 523: display_uri.instance_variable_set("@host", 524: URI::IDNA.to_unicode(display_uri.host)) 525: rescue Exception 526: end 527: return display_uri 528: end
Clones the URI object.
# File lib/feed_tools/vendor/uri.rb, line 562 562: def dup 563: duplicated_scheme = nil 564: duplicated_scheme = self.scheme.dup if self.scheme != nil 565: duplicated_userinfo = nil 566: duplicated_userinfo = self.userinfo.dup if self.userinfo != nil 567: duplicated_host = nil 568: duplicated_host = self.host.dup if self.host != nil 569: duplicated_port = self.port 570: duplicated_path = nil 571: duplicated_path = self.path.dup if self.path != nil 572: duplicated_query = nil 573: duplicated_query = self.query.dup if self.query != nil 574: duplicated_fragment = nil 575: duplicated_fragment = self.fragment.dup if self.fragment != nil 576: duplicated_uri = URI.new( 577: duplicated_scheme, 578: duplicated_userinfo, 579: duplicated_host, 580: duplicated_port, 581: duplicated_path, 582: duplicated_query, 583: duplicated_fragment 584: ) 585: @specified_port = nil if !defined?(@specified_port) 586: duplicated_uri.instance_variable_set("@specified_port", @specified_port) 587: return duplicated_uri 588: end
Merges two URIs together.
# File lib/feed_tools/vendor/uri.rb, line 382 382: def merge(uri) 383: return self + uri 384: end
Returns a normalized URI object.
NOTE: This method does not attempt to conform to specifications. It exists largely to correct other people‘s failures to read the specifications, and also to deal with caching issues since several different URIs may represent the same resource and should not be cached multiple times.
# File lib/feed_tools/vendor/uri.rb, line 398 398: def normalize 399: normalized_scheme = nil 400: normalized_scheme = self.scheme.strip.downcase if self.scheme != nil 401: normalized_scheme = "svn+ssh" if normalized_scheme == "ssh+svn" 402: if normalized_scheme == "feed" 403: if self.to_s =~ /^feed:\/*http:\/*/ 404: return self.class.parse( 405: self.to_s.scan(/^feed:\/*(http:\/*.*)/).flatten[0]).normalize 406: end 407: end 408: normalized_userinfo = nil 409: normalized_userinfo = self.userinfo.strip if self.userinfo != nil 410: normalized_host = nil 411: normalized_host = self.host.strip.downcase if self.host != nil 412: if normalized_host != nil 413: begin 414: normalized_host = URI::IDNA.to_ascii(normalized_host) 415: rescue Exception 416: end 417: end 418: 419: # Normalize IPv4 addresses that were generated with the stupid 420: # assumption that inet_addr() would be used to parse the IP address. 421: if normalized_host != nil && normalized_host.strip =~ /^\d+$/ 422: # Decimal IPv4 address. 423: decimal = normalized_host.to_i 424: if decimal < (256 ** 4) 425: octets = [0,0,0,0] 426: octets[0] = decimal >> 24 427: decimal -= (octets[0] * (256 ** 3)) 428: octets[1] = decimal >> 16 429: decimal -= (octets[1] * (256 ** 2)) 430: octets[2] = decimal >> 8 431: decimal -= (octets[2] * (256 ** 1)) 432: octets[3] = decimal 433: normalized_host = octets.join(".") 434: end 435: elsif (normalized_host != nil && normalized_host.strip =~ 436: /^0+[0-7]{3}.0+[0-7]{3}.0+[0-7]{3}.0+[0-7]{3}$/) 437: # Octal IPv4 address. 438: octet_strings = normalized_host.split('.') 439: octets = [] 440: octet_strings.each do |octet_string| 441: decimal = octet_string.to_i(8) 442: octets << decimal 443: end 444: normalized_host = octets.join(".") 445: elsif (normalized_host != nil && normalized_host.strip =~ 446: /^0x[0-9a-f]{2}.0x[0-9a-f]{2}.0x[0-9a-f]{2}.0x[0-9a-f]{2}$/i) 447: # Hexidecimal IPv4 address. 448: octet_strings = normalized_host.split('.') 449: octets = [] 450: octet_strings.each do |octet_string| 451: decimal = octet_string[2...4].to_i(16) 452: octets << decimal 453: end 454: normalized_host = octets.join(".") 455: end 456: normalized_port = self.port 457: if self.class.scheme_mapping[normalized_scheme] == normalized_port 458: normalized_port = nil 459: end 460: normalized_path = nil 461: normalized_path = self.path.strip if self.path != nil 462: if normalized_scheme != nil && normalized_host == nil 463: if self.class.ip_based_schemes.include?(normalized_scheme) && 464: normalized_path =~ /[\w\.]+/ 465: normalized_host = normalized_path 466: normalized_path = nil 467: unless normalized_host =~ /\./ 468: normalized_host = normalized_host + ".com" 469: end 470: end 471: end 472: if normalized_path == nil && 473: normalized_scheme != nil && 474: normalized_host != nil 475: normalized_path = "/" 476: end 477: if normalized_path != nil 478: normalized_path = self.class.normalize_path(normalized_path) 479: normalized_path = self.class.normalize_escaping(normalized_path) 480: end 481: if normalized_path == "" 482: if ["http", "https", "ftp", "tftp"].include?(normalized_scheme) 483: normalized_path = "/" 484: end 485: end 486: normalized_path.gsub!(/%3B/, ";") if normalized_path != nil 487: normalized_path.gsub!(/%3A/, ":") if normalized_path != nil 488: normalized_path.gsub!(/%40/, "@") if normalized_path != nil 489: normalized_path.gsub!(/%2B/, "+") if normalized_path != nil 490: 491: normalized_query = nil 492: normalized_query = self.query.strip if self.query != nil 493: normalized_query = self.class.normalize_escaping(normalized_query) 494: normalized_query.gsub!(/%3D/, "=") if normalized_query != nil 495: normalized_query.gsub!(/%26/, "&") if normalized_query != nil 496: normalized_query.gsub!(/%2B/, "+") if normalized_query != nil 497: 498: normalized_fragment = nil 499: normalized_fragment = self.fragment.strip if self.fragment != nil 500: normalized_fragment = self.class.normalize_escaping(normalized_fragment) 501: return URI.new( 502: normalized_scheme, 503: normalized_userinfo, 504: normalized_host, 505: normalized_port, 506: normalized_path, 507: normalized_query, 508: normalized_fragment 509: ) 510: end
Returns the password for this URI.
# File lib/feed_tools/vendor/uri.rb, line 213 213: def password 214: if !defined?(@password) || @password.nil? 215: @password = nil 216: return @password if @userinfo.nil? 217: @password = @userinfo.strip.scan(/:(.*)$/).flatten[0].strip 218: end 219: return @password 220: end
Returns the port number for this URI. This method will normalize to the default port for the URI‘s scheme if the port isn‘t explicitly specified in the URI.
# File lib/feed_tools/vendor/uri.rb, line 254 254: def port 255: if @port.to_i == 0 256: if self.scheme.nil? 257: @port = nil 258: else 259: @port = self.class.scheme_mapping[self.scheme.strip.downcase] 260: end 261: return @port 262: else 263: @port = @port.to_i 264: return @port 265: end 266: end
Returns the port number that was actually specified in the URI string.
# File lib/feed_tools/vendor/uri.rb, line 269 269: def specified_port 270: @specified_port = nil if !defined?(@specified_port) 271: return nil if @specified_port.nil? 272: port = @specified_port.to_s.to_i 273: if port == 0 274: return nil 275: else 276: return port 277: end 278: end
Returns the assembled URI as a string.
# File lib/feed_tools/vendor/uri.rb, line 591 591: def to_s 592: uri_string = "" 593: if self.scheme != nil 594: uri_string << "#{self.scheme}:" 595: end 596: if self.authority != nil 597: uri_string << "//#{self.authority}" 598: end 599: if self.path != nil 600: uri_string << self.path 601: end 602: if self.query != nil 603: uri_string << "?#{self.query}" 604: end 605: if self.fragment != nil 606: uri_string << "##{self.fragment}" 607: end 608: return uri_string 609: end
Assigns the specified components to the appropriate instance variables. Used in destructive operations to avoid code repetition.
# File lib/feed_tools/vendor/uri.rb, line 737 737: def assign_components(scheme, userinfo, host, port, path, query, fragment) 738: if scheme == nil && userinfo == nil && host == nil && port == nil && 739: path == nil && query == nil && fragment == nil 740: raise InvalidURIError, "All parameters were nil." 741: end 742: @scheme = scheme 743: @userinfo = userinfo 744: @host = host 745: @specified_port = port.to_s 746: @port = port 747: @port = @port.to_s if @port.kind_of?(Fixnum) 748: if @port != nil && !(@port =~ /^\d+$/) 749: raise InvalidURIError, 750: "Invalid port number: #{@port.inspect}" 751: end 752: @port = @port.to_i 753: @port = nil if @port == 0 754: @path = path 755: @query = query 756: @fragment = fragment 757: if @scheme != nil && @host == "" && @path == "" 758: raise InvalidURIError, 759: "Absolute URI missing hierarchical segment." 760: end 761: end
Replaces the internal state of self with the specified URI‘s state. Used in destructive operations to avoid code repetition.
# File lib/feed_tools/vendor/uri.rb, line 765 765: def replace_self(uri) 766: @authority = nil 767: @user = nil 768: @password = nil 769: 770: @scheme = uri.scheme 771: @userinfo = uri.userinfo 772: @host = uri.host 773: @specified_port = uri.instance_variable_get("@specified_port") 774: @port = @specified_port.to_s.to_i 775: @path = uri.path 776: @query = uri.query 777: @fragment = uri.fragment 778: return self 779: end