Package CedarBackup2 :: Module filesystem
[hide private]
[frames] | no frames]

Source Code for Module CedarBackup2.filesystem

   1  # -*- coding: iso-8859-1 -*- 
   2  # vim: set ft=python ts=3 sw=3 expandtab: 
   3  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
   4  # 
   5  #              C E D A R 
   6  #          S O L U T I O N S       "Software done right." 
   7  #           S O F T W A R E 
   8  # 
   9  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  10  # 
  11  # Copyright (c) 2004-2008 Kenneth J. Pronovici. 
  12  # All rights reserved. 
  13  # 
  14  # This program is free software; you can redistribute it and/or 
  15  # modify it under the terms of the GNU General Public License, 
  16  # Version 2, as published by the Free Software Foundation. 
  17  # 
  18  # This program is distributed in the hope that it will be useful, 
  19  # but WITHOUT ANY WARRANTY; without even the implied warranty of 
  20  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. 
  21  # 
  22  # Copies of the GNU General Public License are available from 
  23  # the Free Software Foundation website, http://www.gnu.org/. 
  24  # 
  25  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  26  # 
  27  # Author   : Kenneth J. Pronovici <pronovic@ieee.org> 
  28  # Language : Python (>= 2.3) 
  29  # Project  : Cedar Backup, release 2 
  30  # Revision : $Id: filesystem.py 950 2009-08-16 20:12:09Z pronovic $ 
  31  # Purpose  : Provides filesystem-related objects. 
  32  # 
  33  # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # 
  34   
  35  ######################################################################## 
  36  # Module documentation 
  37  ######################################################################## 
  38   
  39  """ 
  40  Provides filesystem-related objects. 
  41  @sort: FilesystemList, BackupFileList, PurgeItemList 
  42  @author: Kenneth J. Pronovici <pronovic@ieee.org> 
  43  """ 
  44   
  45   
  46  ######################################################################## 
  47  # Imported modules 
  48  ######################################################################## 
  49   
  50  # System modules 
  51  import sys 
  52  import os 
  53  import re 
  54  import math 
  55  import logging 
  56  import tarfile 
  57   
  58  # Cedar Backup modules 
  59  from CedarBackup2.knapsack import firstFit, bestFit, worstFit, alternateFit 
  60  from CedarBackup2.util import AbsolutePathList, ObjectTypeList, UnorderedList, RegexList 
  61  from CedarBackup2.util import removeKeys, displayBytes, calculateFileAge, encodePath, dereferenceLink 
  62   
  63   
  64  ######################################################################## 
  65  # Module-wide variables 
  66  ######################################################################## 
  67   
  68  logger = logging.getLogger("CedarBackup2.log.filesystem") 
  69   
  70   
  71  ######################################################################## 
  72  # FilesystemList class definition 
  73  ######################################################################## 
  74   
75 -class FilesystemList(list):
76 77 ###################### 78 # Class documentation 79 ###################### 80 81 """ 82 Represents a list of filesystem items. 83 84 This is a generic class that represents a list of filesystem items. Callers 85 can add individual files or directories to the list, or can recursively add 86 the contents of a directory. The class also allows for up-front exclusions 87 in several forms (all files, all directories, all items matching a pattern, 88 all items whose basename matches a pattern, or all directories containing a 89 specific "ignore file"). Symbolic links are typically backed up 90 non-recursively, i.e. the link to a directory is backed up, but not the 91 contents of that link (we don't want to deal with recursive loops, etc.). 92 93 The custom methods such as L{addFile} will only add items if they exist on 94 the filesystem and do not match any exclusions that are already in place. 95 However, since a FilesystemList is a subclass of Python's standard list 96 class, callers can also add items to the list in the usual way, using 97 methods like C{append()} or C{insert()}. No validations apply to items 98 added to the list in this way; however, many list-manipulation methods deal 99 "gracefully" with items that don't exist in the filesystem, often by 100 ignoring them. 101 102 Once a list has been created, callers can remove individual items from the 103 list using standard methods like C{pop()} or C{remove()} or they can use 104 custom methods to remove specific types of entries or entries which match a 105 particular pattern. 106 107 @note: Regular expression patterns that apply to paths are assumed to be 108 bounded at front and back by the beginning and end of the string, i.e. they 109 are treated as if they begin with C{^} and end with C{$}. This is true 110 whether we are matching a complete path or a basename. 111 112 @note: Some platforms, like Windows, do not support soft links. On those 113 platforms, the ignore-soft-links flag can be set, but it won't do any good 114 because the operating system never reports a file as a soft link. 115 116 @sort: __init__, addFile, addDir, addDirContents, removeFiles, removeDirs, 117 removeLinks, removeMatch, removeInvalid, normalize, 118 excludeFiles, excludeDirs, excludeLinks, excludePaths, 119 excludePatterns, excludeBasenamePatterns, ignoreFile 120 """ 121 122 123 ############## 124 # Constructor 125 ############## 126
127 - def __init__(self):
128 """Initializes a list with no configured exclusions.""" 129 list.__init__(self) 130 self._excludeFiles = False 131 self._excludeDirs = False 132 self._excludeLinks = False 133 self._excludePaths = None 134 self._excludePatterns = None 135 self._excludeBasenamePatterns = None 136 self._ignoreFile = None 137 self.excludeFiles = False 138 self.excludeLinks = False 139 self.excludeDirs = False 140 self.excludePaths = [] 141 self.excludePatterns = RegexList() 142 self.excludeBasenamePatterns = RegexList() 143 self.ignoreFile = None
144 145 146 ############# 147 # Properties 148 ############# 149
150 - def _setExcludeFiles(self, value):
151 """ 152 Property target used to set the exclude files flag. 153 No validations, but we normalize the value to C{True} or C{False}. 154 """ 155 if value: 156 self._excludeFiles = True 157 else: 158 self._excludeFiles = False
159
160 - def _getExcludeFiles(self):
161 """ 162 Property target used to get the exclude files flag. 163 """ 164 return self._excludeFiles
165
166 - def _setExcludeDirs(self, value):
167 """ 168 Property target used to set the exclude directories flag. 169 No validations, but we normalize the value to C{True} or C{False}. 170 """ 171 if value: 172 self._excludeDirs = True 173 else: 174 self._excludeDirs = False
175
176 - def _getExcludeDirs(self):
177 """ 178 Property target used to get the exclude directories flag. 179 """ 180 return self._excludeDirs
181 191 197
198 - def _setExcludePaths(self, value):
199 """ 200 Property target used to set the exclude paths list. 201 A C{None} value is converted to an empty list. 202 Elements do not have to exist on disk at the time of assignment. 203 @raise ValueError: If any list element is not an absolute path. 204 """ 205 self._absoluteExcludePaths = AbsolutePathList() 206 if value is not None: 207 self._absoluteExcludePaths.extend(value)
208
209 - def _getExcludePaths(self):
210 """ 211 Property target used to get the absolute exclude paths list. 212 """ 213 return self._absoluteExcludePaths
214
215 - def _setExcludePatterns(self, value):
216 """ 217 Property target used to set the exclude patterns list. 218 A C{None} value is converted to an empty list. 219 """ 220 self._excludePatterns = RegexList() 221 if value is not None: 222 self._excludePatterns.extend(value)
223
224 - def _getExcludePatterns(self):
225 """ 226 Property target used to get the exclude patterns list. 227 """ 228 return self._excludePatterns
229
230 - def _setExcludeBasenamePatterns(self, value):
231 """ 232 Property target used to set the exclude basename patterns list. 233 A C{None} value is converted to an empty list. 234 """ 235 self._excludeBasenamePatterns = RegexList() 236 if value is not None: 237 self._excludeBasenamePatterns.extend(value)
238
240 """ 241 Property target used to get the exclude basename patterns list. 242 """ 243 return self._excludeBasenamePatterns
244
245 - def _setIgnoreFile(self, value):
246 """ 247 Property target used to set the ignore file. 248 The value must be a non-empty string if it is not C{None}. 249 @raise ValueError: If the value is an empty string. 250 """ 251 if value is not None: 252 if len(value) < 1: 253 raise ValueError("The ignore file must be a non-empty string.") 254 self._ignoreFile = value
255
256 - def _getIgnoreFile(self):
257 """ 258 Property target used to get the ignore file. 259 """ 260 return self._ignoreFile
261 262 excludeFiles = property(_getExcludeFiles, _setExcludeFiles, None, "Boolean indicating whether files should be excluded.") 263 excludeDirs = property(_getExcludeDirs, _setExcludeDirs, None, "Boolean indicating whether directories should be excluded.") 264 excludeLinks = property(_getExcludeLinks, _setExcludeLinks, None, "Boolean indicating whether soft links should be excluded.") 265 excludePaths = property(_getExcludePaths, _setExcludePaths, None, "List of absolute paths to be excluded.") 266 excludePatterns = property(_getExcludePatterns, _setExcludePatterns, None, 267 "List of regular expression patterns (matching complete path) to be excluded.") 268 excludeBasenamePatterns = property(_getExcludeBasenamePatterns, _setExcludeBasenamePatterns, 269 None, "List of regular expression patterns (matching basename) to be excluded.") 270 ignoreFile = property(_getIgnoreFile, _setIgnoreFile, None, "Name of file which will cause directory contents to be ignored.") 271 272 273 ############## 274 # Add methods 275 ############## 276
277 - def addFile(self, path):
278 """ 279 Adds a file to the list. 280 281 The path must exist and must be a file or a link to an existing file. It 282 will be added to the list subject to any exclusions that are in place. 283 284 @param path: File path to be added to the list 285 @type path: String representing a path on disk 286 287 @return: Number of items added to the list. 288 289 @raise ValueError: If path is not a file or does not exist. 290 @raise ValueError: If the path could not be encoded properly. 291 """ 292 path = encodePath(path) 293 if not os.path.exists(path) or not os.path.isfile(path): 294 logger.debug("Path [%s] is not a file or does not exist on disk." % path) 295 raise ValueError("Path is not a file or does not exist on disk.") 296 if self.excludeLinks and os.path.islink(path): 297 logger.debug("Path [%s] is excluded based on excludeLinks." % path) 298 return 0 299 if self.excludeFiles: 300 logger.debug("Path [%s] is excluded based on excludeFiles." % path) 301 return 0 302 if path in self.excludePaths: 303 logger.debug("Path [%s] is excluded based on excludePaths." % path) 304 return 0 305 for pattern in self.excludePatterns: 306 pattern = encodePath(pattern) # use same encoding as filenames 307 if re.compile(r"^%s$" % pattern).match(path): # safe to assume all are valid due to RegexList 308 logger.debug("Path [%s] is excluded based on pattern [%s]." % (path, pattern)) 309 return 0 310 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 311 pattern = encodePath(pattern) # use same encoding as filenames 312 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 313 logger.debug("Path [%s] is excluded based on basename pattern [%s]." % (path, pattern)) 314 return 0 315 self.append(path) 316 logger.debug("Added file to list: [%s]" % path) 317 return 1
318
319 - def addDir(self, path):
320 """ 321 Adds a directory to the list. 322 323 The path must exist and must be a directory or a link to an existing 324 directory. It will be added to the list subject to any exclusions that 325 are in place. The L{ignoreFile} does not apply to this method, only to 326 L{addDirContents}. 327 328 @param path: Directory path to be added to the list 329 @type path: String representing a path on disk 330 331 @return: Number of items added to the list. 332 333 @raise ValueError: If path is not a directory or does not exist. 334 @raise ValueError: If the path could not be encoded properly. 335 """ 336 path = encodePath(path) 337 path = normalizeDir(path) 338 if not os.path.exists(path) or not os.path.isdir(path): 339 logger.debug("Path [%s] is not a directory or does not exist on disk." % path) 340 raise ValueError("Path is not a directory or does not exist on disk.") 341 if self.excludeLinks and os.path.islink(path): 342 logger.debug("Path [%s] is excluded based on excludeLinks." % path) 343 return 0 344 if self.excludeDirs: 345 logger.debug("Path [%s] is excluded based on excludeDirs." % path) 346 return 0 347 if path in self.excludePaths: 348 logger.debug("Path [%s] is excluded based on excludePaths." % path) 349 return 0 350 for pattern in self.excludePatterns: # safe to assume all are valid due to RegexList 351 pattern = encodePath(pattern) # use same encoding as filenames 352 if re.compile(r"^%s$" % pattern).match(path): 353 logger.debug("Path [%s] is excluded based on pattern [%s]." % (path, pattern)) 354 return 0 355 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 356 pattern = encodePath(pattern) # use same encoding as filenames 357 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 358 logger.debug("Path [%s] is excluded based on basename pattern [%s]." % (path, pattern)) 359 return 0 360 self.append(path) 361 logger.debug("Added directory to list: [%s]" % path) 362 return 1
363
364 - def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0, dereference=False):
365 """ 366 Adds the contents of a directory to the list. 367 368 The path must exist and must be a directory or a link to a directory. 369 The contents of the directory (as well as the directory path itself) will 370 be recursively added to the list, subject to any exclusions that are in 371 place. If you only want the directory and its immediate contents to be 372 added, then pass in C{recursive=False}. 373 374 @note: If a directory's absolute path matches an exclude pattern or path, 375 or if the directory contains the configured ignore file, then the 376 directory and all of its contents will be recursively excluded from the 377 list. 378 379 @note: If the passed-in directory happens to be a soft link, it will be 380 recursed. However, the linkDepth parameter controls whether any soft 381 links I{within} the directory will be recursed. The link depth is 382 maximum depth of the tree at which soft links should be followed. So, a 383 depth of 0 does not follow any soft links, a depth of 1 follows only 384 links within the passed-in directory, a depth of 2 follows the links at 385 the next level down, etc. 386 387 @note: Any invalid soft links (i.e. soft links that point to 388 non-existent items) will be silently ignored. 389 390 @note: The L{excludeDirs} flag only controls whether any given directory 391 path itself is added to the list once it has been discovered. It does 392 I{not} modify any behavior related to directory recursion. 393 394 @note: If you call this method I{on a link to a directory} that link will 395 never be dereferenced (it may, however, be followed). 396 397 @param path: Directory path whose contents should be added to the list 398 @type path: String representing a path on disk 399 400 @param recursive: Indicates whether directory contents should be added recursively. 401 @type recursive: Boolean value 402 403 @param addSelf: Indicates whether the directory itself should be added to the list. 404 @type addSelf: Boolean value 405 406 @param linkDepth: Maximum depth of the tree at which soft links should be followed 407 @type linkDepth: Integer value, where zero means not to follow any soft links 408 409 @param dereference: Indicates whether soft links, if followed, should be dereferenced 410 @type dereference: Boolean value 411 412 @return: Number of items recursively added to the list 413 414 @raise ValueError: If path is not a directory or does not exist. 415 @raise ValueError: If the path could not be encoded properly. 416 """ 417 path = encodePath(path) 418 path = normalizeDir(path) 419 return self._addDirContentsInternal(path, addSelf, recursive, linkDepth, dereference)
420
421 - def _addDirContentsInternal(self, path, includePath=True, recursive=True, linkDepth=0, dereference=False):
422 """ 423 Internal implementation of C{addDirContents}. 424 425 This internal implementation exists due to some refactoring. Basically, 426 some subclasses have a need to add the contents of a directory, but not 427 the directory itself. This is different than the standard C{FilesystemList} 428 behavior and actually ends up making a special case out of the first 429 call in the recursive chain. Since I don't want to expose the modified 430 interface, C{addDirContents} ends up being wholly implemented in terms 431 of this method. 432 433 The linkDepth parameter controls whether soft links are followed when we 434 are adding the contents recursively. Any recursive calls reduce the 435 value by one. If the value zero or less, then soft links will just be 436 added as directories, but will not be followed. This means that links 437 are followed to a I{constant depth} starting from the top-most directory. 438 439 There is one difference between soft links and directories: soft links 440 that are added recursively are not placed into the list explicitly. This 441 is because if we do add the links recursively, the resulting tar file 442 gets a little confused (it has a link and a directory with the same 443 name). 444 445 @note: If you call this method I{on a link to a directory} that link will 446 never be dereferenced (it may, however, be followed). 447 448 @param path: Directory path whose contents should be added to the list. 449 @param includePath: Indicates whether to include the path as well as contents. 450 @param recursive: Indicates whether directory contents should be added recursively. 451 @param linkDepth: Depth of soft links that should be followed 452 @param dereference: Indicates whether soft links, if followed, should be dereferenced 453 454 @return: Number of items recursively added to the list 455 456 @raise ValueError: If path is not a directory or does not exist. 457 """ 458 added = 0 459 if not os.path.exists(path) or not os.path.isdir(path): 460 logger.debug("Path [%s] is not a directory or does not exist on disk." % path) 461 raise ValueError("Path is not a directory or does not exist on disk.") 462 if path in self.excludePaths: 463 logger.debug("Path [%s] is excluded based on excludePaths." % path) 464 return added 465 for pattern in self.excludePatterns: # safe to assume all are valid due to RegexList 466 pattern = encodePath(pattern) # use same encoding as filenames 467 if re.compile(r"^%s$" % pattern).match(path): 468 logger.debug("Path [%s] is excluded based on pattern [%s]." % (path, pattern)) 469 return added 470 for pattern in self.excludeBasenamePatterns: # safe to assume all are valid due to RegexList 471 pattern = encodePath(pattern) # use same encoding as filenames 472 if re.compile(r"^%s$" % pattern).match(os.path.basename(path)): 473 logger.debug("Path [%s] is excluded based on basename pattern [%s]." % (path, pattern)) 474 return added 475 if self.ignoreFile is not None and os.path.exists(os.path.join(path, self.ignoreFile)): 476 logger.debug("Path [%s] is excluded based on ignore file." % path) 477 return added 478 if includePath: 479 added += self.addDir(path) # could actually be excluded by addDir, yet 480 for entry in os.listdir(path): 481 entrypath = os.path.join(path, entry) 482 if os.path.isfile(entrypath): 483 if linkDepth > 0 and dereference: 484 derefpath = dereferenceLink(entrypath) 485 if derefpath != entrypath: 486 added += self.addFile(derefpath) 487 added += self.addFile(entrypath) 488 elif os.path.isdir(entrypath): 489 if os.path.islink(entrypath): 490 if recursive: 491 if linkDepth > 0: 492 newDepth = linkDepth - 1; 493 if dereference: 494 derefpath = dereferenceLink(entrypath) 495 if derefpath != entrypath: 496 added += self._addDirContentsInternal(derefpath, True, recursive, newDepth, dereference) 497 added += self.addDir(entrypath) 498 else: 499 added += self._addDirContentsInternal(entrypath, False, recursive, newDepth, dereference) 500 else: 501 added += self.addDir(entrypath) 502 else: 503 added += self.addDir(entrypath) 504 else: 505 if recursive: 506 newDepth = linkDepth - 1; 507 added += self._addDirContentsInternal(entrypath, True, recursive, newDepth, dereference) 508 else: 509 added += self.addDir(entrypath) 510 return added
511 512 513 ################# 514 # Remove methods 515 ################# 516
517 - def removeFiles(self, pattern=None):
518 """ 519 Removes file entries from the list. 520 521 If C{pattern} is not passed in or is C{None}, then all file entries will 522 be removed from the list. Otherwise, only those file entries matching 523 the pattern will be removed. Any entry which does not exist on disk 524 will be ignored (use L{removeInvalid} to purge those entries). 525 526 This method might be fairly slow for large lists, since it must check the 527 type of each item in the list. If you know ahead of time that you want 528 to exclude all files, then you will be better off setting L{excludeFiles} 529 to C{True} before adding items to the list. 530 531 @param pattern: Regular expression pattern representing entries to remove 532 533 @return: Number of entries removed 534 @raise ValueError: If the passed-in pattern is not a valid regular expression. 535 """ 536 removed = 0 537 if pattern is None: 538 for entry in self[:]: 539 if os.path.exists(entry) and os.path.isfile(entry): 540 self.remove(entry) 541 logger.debug("Removed path [%s] from list." % entry) 542 removed += 1 543 else: 544 try: 545 pattern = encodePath(pattern) # use same encoding as filenames 546 compiled = re.compile(pattern) 547 except re.error: 548 raise ValueError("Pattern is not a valid regular expression.") 549 for entry in self[:]: 550 if os.path.exists(entry) and os.path.isfile(entry): 551 if compiled.match(entry): 552 self.remove(entry) 553 logger.debug("Removed path [%s] from list." % entry) 554 removed += 1 555 logger.debug("Removed a total of %d entries." % removed); 556 return removed
557
558 - def removeDirs(self, pattern=None):
559 """ 560 Removes directory entries from the list. 561 562 If C{pattern} is not passed in or is C{None}, then all directory entries 563 will be removed from the list. Otherwise, only those directory entries 564 matching the pattern will be removed. Any entry which does not exist on 565 disk will be ignored (use L{removeInvalid} to purge those entries). 566 567 This method might be fairly slow for large lists, since it must check the 568 type of each item in the list. If you know ahead of time that you want 569 to exclude all directories, then you will be better off setting 570 L{excludeDirs} to C{True} before adding items to the list (note that this 571 will not prevent you from recursively adding the I{contents} of 572 directories). 573 574 @param pattern: Regular expression pattern representing entries to remove 575 576 @return: Number of entries removed 577 @raise ValueError: If the passed-in pattern is not a valid regular expression. 578 """ 579 removed = 0 580 if pattern is None: 581 for entry in self[:]: 582 if os.path.exists(entry) and os.path.isdir(entry): 583 self.remove(entry) 584 logger.debug("Removed path [%s] from list." % entry) 585 removed += 1 586 else: 587 try: 588 pattern = encodePath(pattern) # use same encoding as filenames 589 compiled = re.compile(pattern) 590 except re.error: 591 raise ValueError("Pattern is not a valid regular expression.") 592 for entry in self[:]: 593 if os.path.exists(entry) and os.path.isdir(entry): 594 if compiled.match(entry): 595 self.remove(entry) 596 logger.debug("Removed path [%s] from list based on pattern [%s]." % (entry, pattern)) 597 removed += 1 598 logger.debug("Removed a total of %d entries." % removed); 599 return removed
600 641
642 - def removeMatch(self, pattern):
643 """ 644 Removes from the list all entries matching a pattern. 645 646 This method removes from the list all entries which match the passed in 647 C{pattern}. Since there is no need to check the type of each entry, it 648 is faster to call this method than to call the L{removeFiles}, 649 L{removeDirs} or L{removeLinks} methods individually. If you know which 650 patterns you will want to remove ahead of time, you may be better off 651 setting L{excludePatterns} or L{excludeBasenamePatterns} before adding 652 items to the list. 653 654 @note: Unlike when using the exclude lists, the pattern here is I{not} 655 bounded at the front and the back of the string. You can use any pattern 656 you want. 657 658 @param pattern: Regular expression pattern representing entries to remove 659 660 @return: Number of entries removed. 661 @raise ValueError: If the passed-in pattern is not a valid regular expression. 662 """ 663 try: 664 pattern = encodePath(pattern) # use same encoding as filenames 665 compiled = re.compile(pattern) 666 except re.error: 667 raise ValueError("Pattern is not a valid regular expression.") 668 removed = 0 669 for entry in self[:]: 670 if compiled.match(entry): 671 self.remove(entry) 672 logger.debug("Removed path [%s] from list based on pattern [%s]." % (entry, pattern)) 673 removed += 1 674 logger.debug("Removed a total of %d entries." % removed); 675 return removed
676
677 - def removeInvalid(self):
678 """ 679 Removes from the list all entries that do not exist on disk. 680 681 This method removes from the list all entries which do not currently 682 exist on disk in some form. No attention is paid to whether the entries 683 are files or directories. 684 685 @return: Number of entries removed. 686 """ 687 removed = 0 688 for entry in self[:]: 689 if not os.path.exists(entry): 690 self.remove(entry) 691 logger.debug("Removed path [%s] from list." % entry) 692 removed += 1 693 logger.debug("Removed a total of %d entries." % removed); 694 return removed
695 696 697 ################## 698 # Utility methods 699 ################## 700
701 - def normalize(self):
702 """Normalizes the list, ensuring that each entry is unique.""" 703 orig = len(self) 704 self.sort() 705 dups = filter(lambda x, self=self: self[x] == self[x+1], range(0, len(self) - 1)) 706 items = map(lambda x, self=self: self[x], dups) 707 map(self.remove, items) 708 new = len(self) 709 logger.debug("Completed normalizing list; removed %d items (%d originally, %d now)." % (new-orig, orig, new))
710
711 - def verify(self):
712 """ 713 Verifies that all entries in the list exist on disk. 714 @return: C{True} if all entries exist, C{False} otherwise. 715 """ 716 for entry in self: 717 if not os.path.exists(entry): 718 logger.debug("Path [%s] is invalid; list is not valid." % entry) 719 return False 720 logger.debug("All entries in list are valid.") 721 return True
722 723 724 ######################################################################## 725 # SpanItem class definition 726 ######################################################################## 727
728 -class SpanItem(object):
729 """ 730 Item returned by L{BackupFileList.generateSpan}. 731 """
732 - def __init__(self, fileList, size, capacity, utilization):
733 """ 734 Create object. 735 @param fileList: List of files 736 @param size: Size (in bytes) of files 737 @param utilization: Utilization, as a percentage (0-100) 738 """ 739 self.fileList = fileList 740 self.size = size 741 self.capacity = capacity 742 self.utilization = utilization
743 744 745 ######################################################################## 746 # BackupFileList class definition 747 ######################################################################## 748
749 -class BackupFileList(FilesystemList):
750 751 ###################### 752 # Class documentation 753 ###################### 754 755 """ 756 List of files to be backed up. 757 758 A BackupFileList is a L{FilesystemList} containing a list of files to be 759 backed up. It only contains files, not directories (soft links are treated 760 like files). On top of the generic functionality provided by 761 L{FilesystemList}, this class adds functionality to keep a hash (checksum) 762 for each file in the list, and it also provides a method to calculate the 763 total size of the files in the list and a way to export the list into tar 764 form. 765 766 @sort: __init__, addDir, totalSize, generateSizeMap, generateDigestMap, 767 generateFitted, generateTarfile, removeUnchanged 768 """ 769 770 ############## 771 # Constructor 772 ############## 773
774 - def __init__(self):
775 """Initializes a list with no configured exclusions.""" 776 FilesystemList.__init__(self)
777 778 779 ################################ 780 # Overridden superclass methods 781 ################################ 782
783 - def addDir(self, path):
784 """ 785 Adds a directory to the list. 786 787 Note that this class does not allow directories to be added by themselves 788 (a backup list contains only files). However, since links to directories 789 are technically files, we allow them to be added. 790 791 This method is implemented in terms of the superclass method, with one 792 additional validation: the superclass method is only called if the 793 passed-in path is both a directory and a link. All of the superclass's 794 existing validations and restrictions apply. 795 796 @param path: Directory path to be added to the list 797 @type path: String representing a path on disk 798 799 @return: Number of items added to the list. 800 801 @raise ValueError: If path is not a directory or does not exist. 802 @raise ValueError: If the path could not be encoded properly. 803 """ 804 path = encodePath(path) 805 path = normalizeDir(path) 806 if os.path.isdir(path) and not os.path.islink(path): 807 return 0 808 else: 809 return FilesystemList.addDir(self, path)
810 811 812 ################## 813 # Utility methods 814 ################## 815
816 - def totalSize(self):
817 """ 818 Returns the total size among all files in the list. 819 Only files are counted. 820 Soft links that point at files are ignored. 821 Entries which do not exist on disk are ignored. 822 @return: Total size, in bytes 823 """ 824 total = 0.0 825 for entry in self: 826 if os.path.isfile(entry) and not os.path.islink(entry): 827 total += float(os.stat(entry).st_size) 828 return total
829
830 - def generateSizeMap(self):
831 """ 832 Generates a mapping from file to file size in bytes. 833 The mapping does include soft links, which are listed with size zero. 834 Entries which do not exist on disk are ignored. 835 @return: Dictionary mapping file to file size 836 """ 837 table = { } 838 for entry in self: 839 if os.path.islink(entry): 840 table[entry] = 0.0 841 elif os.path.isfile(entry): 842 table[entry] = float(os.stat(entry).st_size) 843 return table
844
845 - def generateDigestMap(self, stripPrefix=None):
846 """ 847 Generates a mapping from file to file digest. 848 849 Currently, the digest is an SHA hash, which should be pretty secure. In 850 the future, this might be a different kind of hash, but we guarantee that 851 the type of the hash will not change unless the library major version 852 number is bumped. 853 854 Entries which do not exist on disk are ignored. 855 856 Soft links are ignored. We would end up generating a digest for the file 857 that the soft link points at, which doesn't make any sense. 858 859 If C{stripPrefix} is passed in, then that prefix will be stripped from 860 each key when the map is generated. This can be useful in generating two 861 "relative" digest maps to be compared to one another. 862 863 @param stripPrefix: Common prefix to be stripped from paths 864 @type stripPrefix: String with any contents 865 866 @return: Dictionary mapping file to digest value 867 @see: L{removeUnchanged} 868 """ 869 table = { } 870 if stripPrefix is not None: 871 for entry in self: 872 if os.path.isfile(entry) and not os.path.islink(entry): 873 table[entry.replace(stripPrefix, "", 1)] = BackupFileList._generateDigest(entry) 874 else: 875 for entry in self: 876 if os.path.isfile(entry) and not os.path.islink(entry): 877 table[entry] = BackupFileList._generateDigest(entry) 878 return table
879
880 - def _generateDigest(path):
881 """ 882 Generates an SHA digest for a given file on disk. 883 884 The original code for this function used this simplistic implementation, 885 which requires reading the entire file into memory at once in order to 886 generate a digest value:: 887 888 sha.new(open(path).read()).hexdigest() 889 890 Not surprisingly, this isn't an optimal solution. The U{Simple file 891 hashing <http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/259109>} 892 Python Cookbook recipe describes how to incrementally generate a hash 893 value by reading in chunks of data rather than reading the file all at 894 once. The recipe relies on the the C{update()} method of the various 895 Python hashing algorithms. 896 897 In my tests using a 110 MB file on CD, the original implementation 898 requires 111 seconds. This implementation requires only 40-45 seconds, 899 which is a pretty substantial speed-up. 900 901 Experience shows that reading in around 4kB (4096 bytes) at a time yields 902 the best performance. Smaller reads are quite a bit slower, and larger 903 reads don't make much of a difference. The 4kB number makes me a little 904 suspicious, and I think it might be related to the size of a filesystem 905 read at the hardware level. However, I've decided to just hardcode 4096 906 until I have evidence that shows it's worthwhile making the read size 907 configurable. 908 909 @param path: Path to generate digest for. 910 911 @return: ASCII-safe SHA digest for the file. 912 @raise OSError: If the file cannot be opened. 913 """ 914 try: 915 import hashlib 916 s = hashlib.sha1() 917 except: 918 import sha 919 s = sha.new() 920 f = open(path, mode="rb") # in case platform cares about binary reads 921 readBytes = 4096 # see notes above 922 while(readBytes > 0): 923 readString = f.read(readBytes) 924 s.update(readString) 925 readBytes = len(readString) 926 f.close() 927 digest = s.hexdigest() 928 logger.debug("Generated digest [%s] for file [%s]." % (digest, path)) 929 return digest
930 _generateDigest = staticmethod(_generateDigest) 931
932 - def generateFitted(self, capacity, algorithm="worst_fit"):
933 """ 934 Generates a list of items that fit in the indicated capacity. 935 936 Sometimes, callers would like to include every item in a list, but are 937 unable to because not all of the items fit in the space available. This 938 method returns a copy of the list, containing only the items that fit in 939 a given capacity. A copy is returned so that we don't lose any 940 information if for some reason the fitted list is unsatisfactory. 941 942 The fitting is done using the functions in the knapsack module. By 943 default, the first fit algorithm is used, but you can also choose 944 from best fit, worst fit and alternate fit. 945 946 @param capacity: Maximum capacity among the files in the new list 947 @type capacity: Integer, in bytes 948 949 @param algorithm: Knapsack (fit) algorithm to use 950 @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit" 951 952 @return: Copy of list with total size no larger than indicated capacity 953 @raise ValueError: If the algorithm is invalid. 954 """ 955 table = self._getKnapsackTable() 956 function = BackupFileList._getKnapsackFunction(algorithm) 957 return function(table, capacity)[0]
958
959 - def generateSpan(self, capacity, algorithm="worst_fit"):
960 """ 961 Splits the list of items into sub-lists that fit in a given capacity. 962 963 Sometimes, callers need split to a backup file list into a set of smaller 964 lists. For instance, you could use this to "span" the files across a set 965 of discs. 966 967 The fitting is done using the functions in the knapsack module. By 968 default, the first fit algorithm is used, but you can also choose 969 from best fit, worst fit and alternate fit. 970 971 @note: If any of your items are larger than the capacity, then it won't 972 be possible to find a solution. In this case, a value error will be 973 raised. 974 975 @param capacity: Maximum capacity among the files in the new list 976 @type capacity: Integer, in bytes 977 978 @param algorithm: Knapsack (fit) algorithm to use 979 @type algorithm: One of "first_fit", "best_fit", "worst_fit", "alternate_fit" 980 981 @return: List of L{SpanItem} objects. 982 983 @raise ValueError: If the algorithm is invalid. 984 @raise ValueError: If it's not possible to fit some items 985 """ 986 spanItems = [] 987 function = BackupFileList._getKnapsackFunction(algorithm) 988 table = self._getKnapsackTable(capacity) 989 iteration = 0 990 while len(table) > 0: 991 iteration += 1 992 fit = function(table, capacity) 993 if len(fit[0]) == 0: 994 # Should never happen due to validations in _convertToKnapsackForm(), but let's be safe 995 raise ValueError("After iteration %d, unable to add any new items." % iteration) 996 removeKeys(table, fit[0]) 997 utilization = (float(fit[1])/float(capacity))*100.0 998 item = SpanItem(fit[0], fit[1], capacity, utilization) 999 spanItems.append(item) 1000 return spanItems
1001
1002 - def _getKnapsackTable(self, capacity=None):
1003 """ 1004 Converts the list into the form needed by the knapsack algorithms. 1005 @return: Dictionary mapping file name to tuple of (file path, file size). 1006 """ 1007 table = { } 1008 for entry in self: 1009 if os.path.islink(entry): 1010 table[entry] = (entry, 0.0) 1011 elif os.path.isfile(entry): 1012 size = float(os.stat(entry).st_size) 1013 if capacity is not None: 1014 if size > capacity: 1015 raise ValueError("File [%s] cannot fit in capacity %s." % (entry, displayBytes(capacity))) 1016 table[entry] = (entry, size) 1017 return table
1018
1019 - def _getKnapsackFunction(algorithm):
1020 """ 1021 Returns a reference to the function associated with an algorithm name. 1022 Algorithm name must be one of "first_fit", "best_fit", "worst_fit", "alternate_fit" 1023 @param algorithm: Name of the algorithm 1024 @return: Reference to knapsack function 1025 @raise ValueError: If the algorithm name is unknown. 1026 """ 1027 if algorithm == "first_fit": 1028 return firstFit 1029 elif algorithm == "best_fit": 1030 return bestFit 1031 elif algorithm == "worst_fit": 1032 return worstFit 1033 elif algorithm == "alternate_fit": 1034 return alternateFit 1035 else: 1036 raise ValueError("Algorithm [%s] is invalid." % algorithm);
1037 _getKnapsackFunction = staticmethod(_getKnapsackFunction) 1038
1039 - def generateTarfile(self, path, mode='tar', ignore=False, flat=False):
1040 """ 1041 Creates a tar file containing the files in the list. 1042 1043 By default, this method will create uncompressed tar files. If you pass 1044 in mode C{'targz'}, then it will create gzipped tar files, and if you 1045 pass in mode C{'tarbz2'}, then it will create bzipped tar files. 1046 1047 The tar file will be created as a GNU tar archive, which enables extended 1048 file name lengths, etc. Since GNU tar is so prevalent, I've decided that 1049 the extra functionality out-weighs the disadvantage of not being 1050 "standard". 1051 1052 If you pass in C{flat=True}, then a "flat" archive will be created, and 1053 all of the files will be added to the root of the archive. So, the file 1054 C{/tmp/something/whatever.txt} would be added as just C{whatever.txt}. 1055 1056 By default, the whole method call fails if there are problems adding any 1057 of the files to the archive, resulting in an exception. Under these 1058 circumstances, callers are advised that they might want to call 1059 L{removeInvalid()} and then attempt to extract the tar file a second 1060 time, since the most common cause of failures is a missing file (a file 1061 that existed when the list was built, but is gone again by the time the 1062 tar file is built). 1063 1064 If you want to, you can pass in C{ignore=True}, and the method will 1065 ignore errors encountered when adding individual files to the archive 1066 (but not errors opening and closing the archive itself). 1067 1068 We'll always attempt to remove the tarfile from disk if an exception will 1069 be thrown. 1070 1071 @note: No validation is done as to whether the entries in the list are 1072 files, since only files or soft links should be in an object like this. 1073 However, to be safe, everything is explicitly added to the tar archive 1074 non-recursively so it's safe to include soft links to directories. 1075 1076 @note: The Python C{tarfile} module, which is used internally here, is 1077 supposed to deal properly with long filenames and links. In my testing, 1078 I have found that it appears to be able to add long really long filenames 1079 to archives, but doesn't do a good job reading them back out, even out of 1080 an archive it created. Fortunately, all Cedar Backup does is add files 1081 to archives. 1082 1083 @param path: Path of tar file to create on disk 1084 @type path: String representing a path on disk 1085 1086 @param mode: Tar creation mode 1087 @type mode: One of either C{'tar'}, C{'targz'} or C{'tarbz2'} 1088 1089 @param ignore: Indicates whether to ignore certain errors. 1090 @type ignore: Boolean 1091 1092 @param flat: Creates "flat" archive by putting all items in root 1093 @type flat: Boolean 1094 1095 @raise ValueError: If mode is not valid 1096 @raise ValueError: If list is empty 1097 @raise ValueError: If the path could not be encoded properly. 1098 @raise TarError: If there is a problem creating the tar file 1099 """ 1100 path = encodePath(path) 1101 if len(self) == 0: raise ValueError("Empty list cannot be used to generate tarfile.") 1102 if(mode == 'tar'): tarmode = "w:" 1103 elif(mode == 'targz'): tarmode = "w:gz" 1104 elif(mode == 'tarbz2'): tarmode = "w:bz2" 1105 else: raise ValueError("Mode [%s] is not valid." % mode) 1106 try: 1107 tar = tarfile.open(path, tarmode) 1108 try: 1109 tar.format = tarfile.GNU_FORMAT 1110 except: 1111 tar.posix = False 1112 for entry in self: 1113 try: 1114 if flat: 1115 tar.add(entry, arcname=os.path.basename(entry), recursive=False) 1116 else: 1117 tar.add(entry, recursive=False) 1118 except tarfile.TarError, e: 1119 if not ignore: 1120 raise e 1121 logger.info("Unable to add file [%s]; going on anyway." % entry) 1122 except OSError, e: 1123 if not ignore: 1124 raise tarfile.TarError(e) 1125 logger.info("Unable to add file [%s]; going on anyway." % entry) 1126 tar.close() 1127 except tarfile.ReadError, e: 1128 try: tar.close() 1129 except: pass 1130 if os.path.exists(path): 1131 try: os.remove(path) 1132 except: pass 1133 raise tarfile.ReadError("Unable to open [%s]; maybe directory doesn't exist?" % path) 1134 except tarfile.TarError, e: 1135 try: tar.close() 1136 except: pass 1137 if os.path.exists(path): 1138 try: os.remove(path) 1139 except: pass 1140 raise e
1141
1142 - def removeUnchanged(self, digestMap, captureDigest=False):
1143 """ 1144 Removes unchanged entries from the list. 1145 1146 This method relies on a digest map as returned from L{generateDigestMap}. 1147 For each entry in C{digestMap}, if the entry also exists in the current 1148 list I{and} the entry in the current list has the same digest value as in 1149 the map, the entry in the current list will be removed. 1150 1151 This method offers a convenient way for callers to filter unneeded 1152 entries from a list. The idea is that a caller will capture a digest map 1153 from C{generateDigestMap} at some point in time (perhaps the beginning of 1154 the week), and will save off that map using C{pickle} or some other 1155 method. Then, the caller could use this method sometime in the future to 1156 filter out any unchanged files based on the saved-off map. 1157 1158 If C{captureDigest} is passed-in as C{True}, then digest information will 1159 be captured for the entire list before the removal step occurs using the 1160 same rules as in L{generateDigestMap}. The check will involve a lookup 1161 into the complete digest map. 1162 1163 If C{captureDigest} is passed in as C{False}, we will only generate a 1164 digest value for files we actually need to check, and we'll ignore any 1165 entry in the list which isn't a file that currently exists on disk. 1166 1167 The return value varies depending on C{captureDigest}, as well. To 1168 preserve backwards compatibility, if C{captureDigest} is C{False}, then 1169 we'll just return a single value representing the number of entries 1170 removed. Otherwise, we'll return a tuple of C{(entries removed, digest 1171 map)}. The returned digest map will be in exactly the form returned by 1172 L{generateDigestMap}. 1173 1174 @note: For performance reasons, this method actually ends up rebuilding 1175 the list from scratch. First, we build a temporary dictionary containing 1176 all of the items from the original list. Then, we remove items as needed 1177 from the dictionary (which is faster than the equivalent operation on a 1178 list). Finally, we replace the contents of the current list based on the 1179 keys left in the dictionary. This should be transparent to the caller. 1180 1181 @param digestMap: Dictionary mapping file name to digest value. 1182 @type digestMap: Map as returned from L{generateDigestMap}. 1183 1184 @param captureDigest: Indicates that digest information should be captured. 1185 @type captureDigest: Boolean 1186 1187 @return: Number of entries removed 1188 """ 1189 if captureDigest: 1190 removed = 0 1191 table = {} 1192 captured = {} 1193 for entry in self: 1194 if os.path.isfile(entry) and not os.path.islink(entry): 1195 table[entry] = BackupFileList._generateDigest(entry) 1196 captured[entry] = table[entry] 1197 else: 1198 table[entry] = None 1199 for entry in digestMap.keys(): 1200 if table.has_key(entry): 1201 if table[entry] is not None: # equivalent to file/link check in other case 1202 digest = table[entry] 1203 if digest == digestMap[entry]: 1204 removed += 1 1205 del table[entry] 1206 logger.debug("Discarded unchanged file [%s]." % entry) 1207 self[:] = table.keys() 1208 return (removed, captured) 1209 else: 1210 removed = 0 1211 table = {} 1212 for entry in self: 1213 table[entry] = None 1214 for entry in digestMap.keys(): 1215 if table.has_key(entry): 1216 if os.path.isfile(entry) and not os.path.islink(entry): 1217 digest = BackupFileList._generateDigest(entry) 1218 if digest == digestMap[entry]: 1219 removed += 1 1220 del table[entry] 1221 logger.debug("Discarded unchanged file [%s]." % entry) 1222 self[:] = table.keys() 1223 return removed
1224 1225 1226 ######################################################################## 1227 # PurgeItemList class definition 1228 ######################################################################## 1229
1230 -class PurgeItemList(FilesystemList):
1231 1232 ###################### 1233 # Class documentation 1234 ###################### 1235 1236 """ 1237 List of files and directories to be purged. 1238 1239 A PurgeItemList is a L{FilesystemList} containing a list of files and 1240 directories to be purged. On top of the generic functionality provided by 1241 L{FilesystemList}, this class adds functionality to remove items that are 1242 too young to be purged, and to actually remove each item in the list from 1243 the filesystem. 1244 1245 The other main difference is that when you add a directory's contents to a 1246 purge item list, the directory itself is not added to the list. This way, 1247 if someone asks to purge within in C{/opt/backup/collect}, that directory 1248 doesn't get removed once all of the files within it is gone. 1249 """ 1250 1251 ############## 1252 # Constructor 1253 ############## 1254
1255 - def __init__(self):
1256 """Initializes a list with no configured exclusions.""" 1257 FilesystemList.__init__(self)
1258 1259 1260 ############## 1261 # Add methods 1262 ############## 1263
1264 - def addDirContents(self, path, recursive=True, addSelf=True, linkDepth=0, dereference=False):
1265 """ 1266 Adds the contents of a directory to the list. 1267 1268 The path must exist and must be a directory or a link to a directory. 1269 The contents of the directory (but I{not} the directory path itself) will 1270 be recursively added to the list, subject to any exclusions that are in 1271 place. If you only want the directory and its contents to be added, then 1272 pass in C{recursive=False}. 1273 1274 @note: If a directory's absolute path matches an exclude pattern or path, 1275 or if the directory contains the configured ignore file, then the 1276 directory and all of its contents will be recursively excluded from the 1277 list. 1278 1279 @note: If the passed-in directory happens to be a soft link, it will be 1280 recursed. However, the linkDepth parameter controls whether any soft 1281 links I{within} the directory will be recursed. The link depth is 1282 maximum depth of the tree at which soft links should be followed. So, a 1283 depth of 0 does not follow any soft links, a depth of 1 follows only 1284 links within the passed-in directory, a depth of 2 follows the links at 1285 the next level down, etc. 1286 1287 @note: Any invalid soft links (i.e. soft links that point to 1288 non-existent items) will be silently ignored. 1289 1290 @note: The L{excludeDirs} flag only controls whether any given soft link 1291 path itself is added to the list once it has been discovered. It does 1292 I{not} modify any behavior related to directory recursion. 1293 1294 @note: The L{excludeDirs} flag only controls whether any given directory 1295 path itself is added to the list once it has been discovered. It does 1296 I{not} modify any behavior related to directory recursion. 1297 1298 @note: If you call this method I{on a link to a directory} that link will 1299 never be dereferenced (it may, however, be followed). 1300 1301 @param path: Directory path whose contents should be added to the list 1302 @type path: String representing a path on disk 1303 1304 @param recursive: Indicates whether directory contents should be added recursively. 1305 @type recursive: Boolean value 1306 1307 @param addSelf: Ignored in this subclass. 1308 1309 @param linkDepth: Depth of soft links that should be followed 1310 @type linkDepth: Integer value, where zero means not to follow any soft links 1311 1312 @param dereference: Indicates whether soft links, if followed, should be dereferenced 1313 @type dereference: Boolean value 1314 1315 @return: Number of items recursively added to the list 1316 1317 @raise ValueError: If path is not a directory or does not exist. 1318 @raise ValueError: If the path could not be encoded properly. 1319 """ 1320 path = encodePath(path) 1321 path = normalizeDir(path) 1322 return super(PurgeItemList, self)._addDirContentsInternal(path, False, recursive, linkDepth, dereference)
1323 1324 1325 ################## 1326 # Utility methods 1327 ################## 1328
1329 - def removeYoungFiles(self, daysOld):
1330 """ 1331 Removes from the list files younger than a certain age (in days). 1332 1333 Any file whose "age" in days is less than (C{<}) the value of the 1334 C{daysOld} parameter will be removed from the list so that it will not be 1335 purged later when L{purgeItems} is called. Directories and soft links 1336 will be ignored. 1337 1338 The "age" of a file is the amount of time since the file was last used, 1339 per the most recent of the file's C{st_atime} and C{st_mtime} values. 1340 1341 @note: Some people find the "sense" of this method confusing or 1342 "backwards". Keep in mind that this method is used to remove items 1343 I{from the list}, not from the filesystem! It removes from the list 1344 those items that you would I{not} want to purge because they are too 1345 young. As an example, passing in C{daysOld} of zero (0) would remove 1346 from the list no files, which would result in purging all of the files 1347 later. I would be happy to make a synonym of this method with an 1348 easier-to-understand "sense", if someone can suggest one. 1349 1350 @param daysOld: Minimum age of files that are to be kept in the list. 1351 @type daysOld: Integer value >= 0. 1352 1353 @return: Number of entries removed 1354 """ 1355 removed = 0 1356 daysOld = int(daysOld) 1357 if daysOld < 0: 1358 raise ValueError("Days old value must be an integer >= 0.") 1359 for entry in self[:]: 1360 if os.path.isfile(entry) and not os.path.islink(entry): 1361 try: 1362 ageInDays = calculateFileAge(entry) 1363 ageInWholeDays = math.floor(ageInDays) 1364 if ageInWholeDays < daysOld: 1365 removed += 1 1366 self.remove(entry) 1367 except OSError: 1368 pass 1369 return removed
1370
1371 - def purgeItems(self):
1372 """ 1373 Purges all items in the list. 1374 1375 Every item in the list will be purged. Directories in the list will 1376 I{not} be purged recursively, and hence will only be removed if they are 1377 empty. Errors will be ignored. 1378 1379 To faciliate easy removal of directories that will end up being empty, 1380 the delete process happens in two passes: files first (including soft 1381 links), then directories. 1382 1383 @return: Tuple containing count of (files, dirs) removed 1384 """ 1385 files = 0 1386 dirs = 0 1387 for entry in self: 1388 if os.path.exists(entry) and (os.path.isfile(entry) or os.path.islink(entry)): 1389 try: 1390 os.remove(entry) 1391 files += 1 1392 logger.debug("Purged file [%s]." % entry) 1393 except OSError: 1394 pass 1395 for entry in self: 1396 if os.path.exists(entry) and os.path.isdir(entry) and not os.path.islink(entry): 1397 try: 1398 os.rmdir(entry) 1399 dirs += 1 1400 logger.debug("Purged empty directory [%s]." % entry) 1401 except OSError: 1402 pass 1403 return (files, dirs)
1404 1405 1406 ######################################################################## 1407 # Public functions 1408 ######################################################################## 1409 1410 ########################## 1411 # normalizeDir() function 1412 ########################## 1413
1414 -def normalizeDir(path):
1415 """ 1416 Normalizes a directory name. 1417 1418 For our purposes, a directory name is normalized by removing the trailing 1419 path separator, if any. This is important because we want directories to 1420 appear within lists in a consistent way, although from the user's 1421 perspective passing in C{/path/to/dir/} and C{/path/to/dir} are equivalent. 1422 1423 @param path: Path to be normalized. 1424 @type path: String representing a path on disk 1425 1426 @return: Normalized path, which should be equivalent to the original. 1427 """ 1428 if path != os.sep and path[-1:] == os.sep: 1429 return path[:-1] 1430 return path
1431 1432 1433 ############################# 1434 # compareContents() function 1435 ############################# 1436
1437 -def compareContents(path1, path2, verbose=False):
1438 """ 1439 Compares the contents of two directories to see if they are equivalent. 1440 1441 The two directories are recursively compared. First, we check whether they 1442 contain exactly the same set of files. Then, we check to see every given 1443 file has exactly the same contents in both directories. 1444 1445 This is all relatively simple to implement through the magic of 1446 L{BackupFileList.generateDigestMap}, which knows how to strip a path prefix 1447 off the front of each entry in the mapping it generates. This makes our 1448 comparison as simple as creating a list for each path, then generating a 1449 digest map for each path and comparing the two. 1450 1451 If no exception is thrown, the two directories are considered identical. 1452 1453 If the C{verbose} flag is C{True}, then an alternate (but slower) method is 1454 used so that any thrown exception can indicate exactly which file caused the 1455 comparison to fail. The thrown C{ValueError} exception distinguishes 1456 between the directories containing different files, and containing the same 1457 files with differing content. 1458 1459 @note: Symlinks are I{not} followed for the purposes of this comparison. 1460 1461 @param path1: First path to compare. 1462 @type path1: String representing a path on disk 1463 1464 @param path2: First path to compare. 1465 @type path2: String representing a path on disk 1466 1467 @param verbose: Indicates whether a verbose response should be given. 1468 @type verbose: Boolean 1469 1470 @raise ValueError: If a directory doesn't exist or can't be read. 1471 @raise ValueError: If the two directories are not equivalent. 1472 @raise IOError: If there is an unusual problem reading the directories. 1473 """ 1474 try: 1475 path1List = BackupFileList() 1476 path1List.addDirContents(path1) 1477 path1Digest = path1List.generateDigestMap(stripPrefix=normalizeDir(path1)) 1478 path2List = BackupFileList() 1479 path2List.addDirContents(path2) 1480 path2Digest = path2List.generateDigestMap(stripPrefix=normalizeDir(path2)) 1481 compareDigestMaps(path1Digest, path2Digest, verbose) 1482 except IOError, e: 1483 logger.error("I/O error encountered during consistency check.") 1484 raise e
1485
1486 -def compareDigestMaps(digest1, digest2, verbose=False):
1487 """ 1488 Compares two digest maps and throws an exception if they differ. 1489 1490 @param digest1: First digest to compare. 1491 @type digest1: Digest as returned from BackupFileList.generateDigestMap() 1492 1493 @param digest2: Second digest to compare. 1494 @type digest2: Digest as returned from BackupFileList.generateDigestMap() 1495 1496 @param verbose: Indicates whether a verbose response should be given. 1497 @type verbose: Boolean 1498 1499 @raise ValueError: If the two directories are not equivalent. 1500 """ 1501 if not verbose: 1502 if digest1 != digest2: 1503 raise ValueError("Consistency check failed.") 1504 else: 1505 list1 = UnorderedList(digest1.keys()) 1506 list2 = UnorderedList(digest2.keys()) 1507 if list1 != list2: 1508 raise ValueError("Directories contain a different set of files.") 1509 for key in list1: 1510 if digest1[key] != digest2[key]: 1511 raise ValueError("File contents for [%s] vary between directories." % key)
1512