Class | Gem::Indexer |
In: |
lib/rubygems/indexer.rb
|
Parent: | Object |
Top level class for building the gem repository index.
build_legacy | [RW] | Build indexes for RubyGems older than 1.2.0 when true |
build_modern | [RW] | Build indexes for RubyGems 1.2.0 and newer when true |
dest_directory | [R] | Index install location |
dest_latest_specs_index | [R] | Latest specs index install location |
dest_prerelease_specs_index | [R] | Prerelease specs index install location |
dest_specs_index | [R] | Specs index install location |
directory | [R] | Index build directory |
Create an indexer that will index the gems in directory.
# File lib/rubygems/indexer.rb, line 59 59: def initialize(directory, options = {}) 60: unless ''.respond_to? :to_xs then 61: fail "Gem::Indexer requires that the XML Builder library be installed:" \ 62: "\n\tgem install builder" 63: end 64: 65: options = { :build_legacy => true, :build_modern => true }.merge options 66: 67: @build_legacy = options[:build_legacy] 68: @build_modern = options[:build_modern] 69: 70: @rss_title = options[:rss_title] 71: @rss_host = options[:rss_host] 72: @rss_gems_host = options[:rss_gems_host] 73: 74: @dest_directory = directory 75: @directory = File.join Dir.tmpdir, "gem_generate_index_#{$$}" 76: 77: marshal_name = "Marshal.#{Gem.marshal_version}" 78: 79: @master_index = File.join @directory, 'yaml' 80: @marshal_index = File.join @directory, marshal_name 81: 82: @quick_dir = File.join @directory, 'quick' 83: 84: @quick_marshal_dir = File.join @quick_dir, marshal_name 85: 86: @quick_index = File.join @quick_dir, 'index' 87: @latest_index = File.join @quick_dir, 'latest_index' 88: 89: @specs_index = File.join @directory, "specs.#{Gem.marshal_version}" 90: @latest_specs_index = File.join @directory, 91: "latest_specs.#{Gem.marshal_version}" 92: @prerelease_specs_index = File.join(@directory, 93: "prerelease_specs.#{Gem.marshal_version}") 94: 95: @dest_specs_index = File.join @dest_directory, 96: "specs.#{Gem.marshal_version}" 97: @dest_latest_specs_index = File.join @dest_directory, 98: "latest_specs.#{Gem.marshal_version}" 99: @dest_prerelease_specs_index = File.join @dest_directory, 100: "prerelease_specs.#{Gem.marshal_version}" 101: 102: @rss_index = File.join @directory, 'index.rss' 103: 104: @files = [] 105: end
Abbreviate the spec for downloading. Abbreviated specs are only used for searching, downloading and related activities and do not need deployment specific information (e.g. list of files). So we abbreviate the spec, making it much smaller for quicker downloads.
# File lib/rubygems/indexer.rb, line 113 113: def abbreviate(spec) 114: spec.files = [] 115: spec.test_files = [] 116: spec.rdoc_options = [] 117: spec.extra_rdoc_files = [] 118: spec.cert_chain = [] 119: spec 120: end
Build various indicies
# File lib/rubygems/indexer.rb, line 125 125: def build_indicies(index) 126: # Marshal gemspecs are used by both modern and legacy RubyGems 127: build_marshal_gemspecs index 128: build_legacy_indicies index if @build_legacy 129: build_modern_indicies index if @build_modern 130: build_rss index 131: 132: compress_indicies 133: end
Builds indicies for RubyGems older than 1.2.x
# File lib/rubygems/indexer.rb, line 138 138: def build_legacy_indicies(index) 139: progress = ui.progress_reporter index.size, 140: "Generating YAML quick index gemspecs for #{index.size} gems", 141: "Complete" 142: 143: Gem.time 'Generated YAML quick index gemspecs' do 144: index.released_gems.each do |original_name, spec| 145: spec_file_name = "#{original_name}.gemspec.rz" 146: yaml_name = File.join @quick_dir, spec_file_name 147: 148: yaml_zipped = Gem.deflate spec.to_yaml 149: open yaml_name, 'wb' do |io| io.write yaml_zipped end 150: 151: progress.updated original_name 152: end 153: 154: progress.done 155: end 156: 157: say "Generating quick index" 158: 159: Gem.time 'Generated quick index' do 160: open @quick_index, 'wb' do |io| 161: io.puts index.sort.map { |_, spec| spec.original_name } 162: end 163: end 164: 165: say "Generating latest index" 166: 167: Gem.time 'Generated latest index' do 168: open @latest_index, 'wb' do |io| 169: io.puts index.latest_specs.sort.map { |spec| spec.original_name } 170: end 171: end 172: 173: # Don't need prerelease legacy index 174: 175: say "Generating Marshal master index" 176: 177: Gem.time 'Generated Marshal master index' do 178: open @marshal_index, 'wb' do |io| 179: io.write index.dump 180: end 181: end 182: 183: progress = ui.progress_reporter index.size, 184: "Generating YAML master index for #{index.size} gems (this may take a while)", 185: "Complete" 186: 187: Gem.time 'Generated YAML master index' do 188: open @master_index, 'wb' do |io| 189: io.puts "--- !ruby/object:#{index.class}" 190: io.puts "gems:" 191: 192: gems = index.sort_by { |name, gemspec| gemspec.sort_obj } 193: gems.each do |original_name, gemspec| 194: yaml = gemspec.to_yaml.gsub(/^/, ' ') 195: yaml = yaml.sub(/\A ---/, '') # there's a needed extra ' ' here 196: io.print " #{original_name}:" 197: io.puts yaml 198: 199: progress.updated original_name 200: end 201: end 202: 203: progress.done 204: end 205: 206: @files << @quick_dir 207: @files << @master_index 208: @files << "#{@master_index}.Z" 209: @files << @marshal_index 210: @files << "#{@marshal_index}.Z" 211: end
Builds Marshal quick index gemspecs.
# File lib/rubygems/indexer.rb, line 216 216: def build_marshal_gemspecs(index) 217: progress = ui.progress_reporter index.size, 218: "Generating Marshal quick index gemspecs for #{index.size} gems", 219: "Complete" 220: 221: files = [] 222: 223: Gem.time 'Generated Marshal quick index gemspecs' do 224: index.gems.each do |original_name, spec| 225: spec_file_name = "#{original_name}.gemspec.rz" 226: marshal_name = File.join @quick_marshal_dir, spec_file_name 227: 228: marshal_zipped = Gem.deflate Marshal.dump(spec) 229: open marshal_name, 'wb' do |io| io.write marshal_zipped end 230: 231: files << marshal_name 232: 233: progress.updated original_name 234: end 235: 236: progress.done 237: end 238: 239: @files << @quick_marshal_dir 240: 241: files 242: end
Build a single index for RubyGems 1.2 and newer
# File lib/rubygems/indexer.rb, line 247 247: def build_modern_index(index, file, name) 248: say "Generating #{name} index" 249: 250: Gem.time "Generated #{name} index" do 251: open(file, 'wb') do |io| 252: specs = index.map do |*spec| 253: # We have to splat here because latest_specs is an array, 254: # while the others are hashes. See the TODO in source_index.rb 255: spec = spec.flatten.last 256: platform = spec.original_platform 257: 258: # win32-api-1.0.4-x86-mswin32-60 259: unless String === platform then 260: alert_warning "Skipping invalid platform in gem: #{spec.full_name}" 261: next 262: end 263: 264: platform = Gem::Platform::RUBY if platform.nil? or platform.empty? 265: [spec.name, spec.version, platform] 266: end 267: 268: specs = compact_specs(specs) 269: Marshal.dump(specs, io) 270: end 271: end 272: end
Builds indicies for RubyGems 1.2 and newer. Handles full, latest, prerelease
# File lib/rubygems/indexer.rb, line 277 277: def build_modern_indicies(index) 278: build_modern_index(index.released_specs.sort, @specs_index, 'specs') 279: build_modern_index(index.latest_specs.sort, 280: @latest_specs_index, 281: 'latest specs') 282: build_modern_index(index.prerelease_specs.sort, 283: @prerelease_specs_index, 284: 'prerelease specs') 285: 286: @files += [@specs_index, 287: "#{@specs_index}.gz", 288: @latest_specs_index, 289: "#{@latest_specs_index}.gz", 290: @prerelease_specs_index, 291: "#{@prerelease_specs_index}.gz"] 292: end
Builds an RSS feed for past two days gem releases according to the gem‘s date.
# File lib/rubygems/indexer.rb, line 298 298: def build_rss(index) 299: if @rss_host.nil? or @rss_gems_host.nil? then 300: if Gem.configuration.really_verbose then 301: alert_warning "no --rss-host or --rss-gems-host, RSS generation disabled" 302: end 303: return 304: end 305: 306: require 'cgi' 307: require 'rubygems/text' 308: 309: extend Gem::Text 310: 311: Gem.time 'Generated rss' do 312: open @rss_index, 'wb' do |io| 313: rss_host = CGI.escapeHTML @rss_host 314: rss_title = CGI.escapeHTML(@rss_title || 'gems') 315: 316: io.puts "<?xml version=\"1.0\"?>\n<rss version=\"2.0\">\n<channel>\n<title>\#{rss_title}</title>\n<link>http://\#{rss_host}</link>\n<description>Recently released gems from http://\#{rss_host}</description>\n<generator>RubyGems v\#{Gem::RubyGemsVersion}</generator>\n<docs>http://cyber.law.harvard.edu/rss/rss.html</docs>\n" 317: 318: today = Gem::Specification::TODAY 319: yesterday = today - 86400 320: 321: index = index.select do |_, spec| 322: spec_date = spec.date 323: 324: case spec_date 325: when Date 326: Time.parse(spec_date.to_s) >= yesterday 327: when Time 328: spec_date >= yesterday 329: end 330: end 331: 332: index = index.select do |_, spec| 333: spec_date = spec.date 334: 335: case spec_date 336: when Date 337: Time.parse(spec_date.to_s) <= today 338: when Time 339: spec_date <= today 340: end 341: end 342: 343: index.sort_by { |_, spec| [-spec.date.to_i, spec] }.each do |_, spec| 344: gem_path = CGI.escapeHTML "http://#{@rss_gems_host}/gems/#{spec.full_name}.gem" 345: size = File.stat(spec.loaded_from).size rescue next 346: 347: description = spec.description || spec.summary || '' 348: authors = Array spec.authors 349: emails = Array spec.email 350: authors = emails.zip(authors).map do |email, author| 351: email += " (#{author})" if author and not author.empty? 352: end.join ', ' 353: 354: description = description.split(/\n\n+/).map do |chunk| 355: format_text chunk, 78 356: end 357: 358: description = description.join "\n\n" 359: 360: item = '' 361: 362: item << "<item>\n<title>\#{CGI.escapeHTML spec.full_name}</title>\n<description>\n<pre>\#{CGI.escapeHTML description.chomp}</pre>\n</description>\n<author>\#{CGI.escapeHTML authors}</author>\n<guid>\#{CGI.escapeHTML spec.full_name}</guid>\n<enclosure url=\\\"\#{gem_path}\\\"\nlength=\\\"\#{size}\\\" type=\\\"application/octet-stream\\\" />\n<pubDate>\#{spec.date.rfc2822}</pubDate>\n" 363: 364: item << "<link>\#{CGI.escapeHTML spec.homepage}</link>\n" if spec.homepage 365: 366: item << "</item>\n" 367: 368: io.puts item 369: end 370: 371: io.puts "</channel>\n</rss>\n" 372: end 373: end 374: 375: @files << @rss_index 376: end
Collect specifications from .gem files from the gem directory.
# File lib/rubygems/indexer.rb, line 413 413: def collect_specs(gems = gem_file_list) 414: index = Gem::SourceIndex.new 415: 416: progress = ui.progress_reporter gems.size, 417: "Loading #{gems.size} gems from #{@dest_directory}", 418: "Loaded all gems" 419: 420: Gem.time 'loaded' do 421: gems.each do |gemfile| 422: if File.size(gemfile.to_s) == 0 then 423: alert_warning "Skipping zero-length gem: #{gemfile}" 424: next 425: end 426: 427: begin 428: spec = Gem::Format.from_file_by_path(gemfile).spec 429: spec.loaded_from = gemfile 430: 431: unless gemfile =~ /\/#{Regexp.escape spec.original_name}.*\.gem\z/i then 432: expected_name = spec.full_name 433: expected_name << " (#{spec.original_name})" if 434: spec.original_name != spec.full_name 435: alert_warning "Skipping misnamed gem: #{gemfile} should be named #{expected_name}" 436: next 437: end 438: 439: abbreviate spec 440: sanitize spec 441: 442: index.add_spec spec, spec.original_name 443: 444: progress.updated spec.original_name 445: 446: rescue SignalException => e 447: alert_error "Received signal, exiting" 448: raise 449: rescue Exception => e 450: alert_error "Unable to process #{gemfile}\n#{e.message} (#{e.class})\n\t#{e.backtrace.join "\n\t"}" 451: end 452: end 453: 454: progress.done 455: end 456: 457: index 458: end
Compacts Marshal output for the specs index data source by using identical objects as much as possible.
# File lib/rubygems/indexer.rb, line 495 495: def compact_specs(specs) 496: names = {} 497: versions = {} 498: platforms = {} 499: 500: specs.map do |(name, version, platform)| 501: names[name] = name unless names.include? name 502: versions[version] = version unless versions.include? version 503: platforms[platform] = platform unless platforms.include? platform 504: 505: [names[name], versions[version], platforms[platform]] 506: end 507: end
Compress filename with extension.
# File lib/rubygems/indexer.rb, line 512 512: def compress(filename, extension) 513: data = Gem.read_binary filename 514: 515: zipped = Gem.deflate data 516: 517: open "#{filename}.#{extension}", 'wb' do |io| 518: io.write zipped 519: end 520: end
Compresses indicies on disk
# File lib/rubygems/indexer.rb, line 465 465: def compress_indicies 466: say "Compressing indicies" 467: 468: Gem.time 'Compressed indicies' do 469: if @build_legacy then 470: compress @quick_index, 'rz' 471: paranoid @quick_index, 'rz' 472: 473: compress @latest_index, 'rz' 474: paranoid @latest_index, 'rz' 475: 476: compress @marshal_index, 'Z' 477: paranoid @marshal_index, 'Z' 478: 479: compress @master_index, 'Z' 480: paranoid @master_index, 'Z' 481: end 482: 483: if @build_modern then 484: gzip @specs_index 485: gzip @latest_specs_index 486: gzip @prerelease_specs_index 487: end 488: end 489: end
List of gem file names to index.
# File lib/rubygems/indexer.rb, line 525 525: def gem_file_list 526: Dir.glob(File.join(@dest_directory, "gems", "*.gem")) 527: end
Builds and installs indicies.
# File lib/rubygems/indexer.rb, line 532 532: def generate_index 533: make_temp_directories 534: index = collect_specs 535: build_indicies index 536: install_indicies 537: rescue SignalException 538: ensure 539: FileUtils.rm_rf @directory 540: end
Zlib::GzipWriter wrapper that gzips filename on disk.
# File lib/rubygems/indexer.rb, line 545 545: def gzip(filename) 546: Zlib::GzipWriter.open "#{filename}.gz" do |io| 547: io.write Gem.read_binary(filename) 548: end 549: end
Install generated indicies into the destination directory.
# File lib/rubygems/indexer.rb, line 554 554: def install_indicies 555: verbose = Gem.configuration.really_verbose 556: 557: say "Moving index into production dir #{@dest_directory}" if verbose 558: 559: files = @files.dup 560: files.delete @quick_marshal_dir if files.include? @quick_dir 561: 562: if files.include? @quick_marshal_dir and 563: not files.include? @quick_dir then 564: files.delete @quick_marshal_dir 565: quick_marshal_dir = @quick_marshal_dir.sub @directory, '' 566: 567: dst_name = File.join @dest_directory, quick_marshal_dir 568: 569: FileUtils.mkdir_p File.dirname(dst_name), :verbose => verbose 570: FileUtils.rm_rf dst_name, :verbose => verbose 571: FileUtils.mv @quick_marshal_dir, dst_name, :verbose => verbose, 572: :force => true 573: end 574: 575: files = files.map do |path| 576: path.sub @directory, '' 577: end 578: 579: files.each do |file| 580: src_name = File.join @directory, file 581: dst_name = File.join @dest_directory, file 582: 583: FileUtils.rm_rf dst_name, :verbose => verbose 584: FileUtils.mv src_name, @dest_directory, :verbose => verbose, 585: :force => true 586: end 587: end
Make directories for index generation
# File lib/rubygems/indexer.rb, line 592 592: def make_temp_directories 593: FileUtils.rm_rf @directory 594: FileUtils.mkdir_p @directory, :mode => 0700 595: FileUtils.mkdir_p @quick_marshal_dir 596: end
Ensure path and path with extension are identical.
# File lib/rubygems/indexer.rb, line 601 601: def paranoid(path, extension) 602: data = Gem.read_binary path 603: compressed_data = Gem.read_binary "#{path}.#{extension}" 604: 605: unless data == Gem.inflate(compressed_data) then 606: raise "Compressed file #{compressed_path} does not match uncompressed file #{path}" 607: end 608: end
Sanitize the descriptive fields in the spec. Sometimes non-ASCII characters will garble the site index. Non-ASCII characters will be replaced by their XML entity equivalent.
# File lib/rubygems/indexer.rb, line 615 615: def sanitize(spec) 616: spec.summary = sanitize_string(spec.summary) 617: spec.description = sanitize_string(spec.description) 618: spec.post_install_message = sanitize_string(spec.post_install_message) 619: spec.authors = spec.authors.collect { |a| sanitize_string(a) } 620: 621: spec 622: end
Sanitize a single string.
# File lib/rubygems/indexer.rb, line 627 627: def sanitize_string(string) 628: # HACK the #to_s is in here because RSpec has an Array of Arrays of 629: # Strings for authors. Need a way to disallow bad values on gempsec 630: # generation. (Probably won't happen.) 631: string ? string.to_s.to_xs : string 632: end
Perform an in-place update of the repository from newly added gems. Only works for modern indicies, and sets build_legacy to false when run.
# File lib/rubygems/indexer.rb, line 638 638: def update_index 639: @build_legacy = false 640: 641: make_temp_directories 642: 643: specs_mtime = File.stat(@dest_specs_index).mtime 644: newest_mtime = Time.at 0 645: 646: updated_gems = gem_file_list.select do |gem| 647: gem_mtime = File.stat(gem).mtime 648: newest_mtime = gem_mtime if gem_mtime > newest_mtime 649: gem_mtime >= specs_mtime 650: end 651: 652: if updated_gems.empty? then 653: say 'No new gems' 654: terminate_interaction 0 655: end 656: 657: index = collect_specs updated_gems 658: 659: files = build_marshal_gemspecs index 660: 661: Gem.time 'Updated indexes' do 662: update_specs_index index.released_gems, @dest_specs_index, @specs_index 663: update_specs_index index.released_gems, @dest_latest_specs_index, @latest_specs_index 664: update_specs_index(index.prerelease_gems, @dest_prerelease_specs_index, 665: @prerelease_specs_index) 666: end 667: 668: compress_indicies 669: 670: verbose = Gem.configuration.really_verbose 671: 672: say "Updating production dir #{@dest_directory}" if verbose 673: 674: files << @specs_index 675: files << "#{@specs_index}.gz" 676: files << @latest_specs_index 677: files << "#{@latest_specs_index}.gz" 678: files << @prerelease_specs_index 679: files << "#{@prerelease_specs_index}.gz" 680: 681: files = files.map do |path| 682: path.sub @directory, '' 683: end 684: 685: files.each do |file| 686: src_name = File.join @directory, file 687: dst_name = File.join @dest_directory, File.dirname(file) 688: 689: FileUtils.mv src_name, dst_name, :verbose => verbose, 690: :force => true 691: 692: File.utime newest_mtime, newest_mtime, dst_name 693: end 694: end
Combines specs in index and source then writes out a new copy to dest. For a latest index, does not ensure the new file is minimal.
# File lib/rubygems/indexer.rb, line 700 700: def update_specs_index(index, source, dest) 701: specs_index = Marshal.load Gem.read_binary(source) 702: 703: index.each do |_, spec| 704: platform = spec.original_platform 705: platform = Gem::Platform::RUBY if platform.nil? or platform.empty? 706: specs_index << [spec.name, spec.version, platform] 707: end 708: 709: specs_index = compact_specs specs_index.uniq.sort 710: 711: open dest, 'wb' do |io| 712: Marshal.dump specs_index, io 713: end 714: end