Package duplicity :: Module patchdir
[hide private]
[frames] | no frames]

Source Code for Module duplicity.patchdir

  1  # -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*- 
  2  # 
  3  # Copyright 2002 Ben Escoto <ben@emerose.org> 
  4  # Copyright 2007 Kenneth Loafman <kenneth@loafman.com> 
  5  # 
  6  # This file is part of duplicity. 
  7  # 
  8  # Duplicity is free software; you can redistribute it and/or modify it 
  9  # under the terms of the GNU General Public License as published by the 
 10  # Free Software Foundation; either version 2 of the License, or (at your 
 11  # option) any later version. 
 12  # 
 13  # Duplicity is distributed in the hope that it will be useful, but 
 14  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
 16  # General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with duplicity; if not, write to the Free Software Foundation, 
 20  # Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
 21   
 22  import re #@UnusedImport 
 23  import types 
 24  import tempfile 
 25  import os 
 26   
 27  from duplicity import tarfile #@UnusedImport 
 28  from duplicity import librsync #@UnusedImport 
 29  from duplicity import log #@UnusedImport 
 30  from duplicity import diffdir 
 31  from duplicity import misc 
 32  from duplicity import selection 
 33  from duplicity import util #@UnusedImport 
 34  from duplicity.path import * #@UnusedWildImport 
 35  from duplicity.lazy import * #@UnusedWildImport 
 36   
 37  """Functions for patching of directories""" 
 38   
39 -class PatchDirException( Exception ):
40 pass
41 42
43 -def Patch( base_path, difftar_fileobj ):
44 """Patch given base_path and file object containing delta""" 45 diff_tarfile = tarfile.TarFile( "arbitrary", "r", difftar_fileobj ) 46 patch_diff_tarfile( base_path, diff_tarfile ) 47 assert not difftar_fileobj.close()
48
49 -def Patch_from_iter( base_path, fileobj_iter, restrict_index=() ):
50 """Patch given base_path and iterator of delta file objects""" 51 diff_tarfile = TarFile_FromFileobjs( fileobj_iter ) 52 patch_diff_tarfile( base_path, diff_tarfile, restrict_index )
53
54 -def patch_diff_tarfile( base_path, diff_tarfile, restrict_index=() ):
55 """Patch given Path object using delta tarfile (as in tarfile.TarFile) 56 57 If restrict_index is set, ignore any deltas in diff_tarfile that 58 don't start with restrict_index. 59 60 """ 61 if base_path.exists(): 62 path_iter = selection.Select( base_path ).set_iter() 63 else: 64 path_iter = empty_iter() # probably untarring full backup 65 66 diff_path_iter = difftar2path_iter( diff_tarfile ) 67 if restrict_index: 68 diff_path_iter = filter_path_iter( diff_path_iter, restrict_index ) 69 collated = diffdir.collate2iters( path_iter, diff_path_iter ) 70 71 ITR = IterTreeReducer( PathPatcher, [base_path] ) 72 for basis_path, diff_ropath in collated: 73 if basis_path: 74 log.Info( _( "Patching %s" ) % ( basis_path.get_relative_path(), ), 75 log.InfoCode.patch_file_patching, 76 util.escape( basis_path.get_relative_path() ) ) 77 ITR( basis_path.index, basis_path, diff_ropath ) 78 else: 79 log.Info( _( "Patching %s" ) % ( diff_ropath.get_relative_path(), ), 80 log.InfoCode.patch_file_patching, 81 util.escape( diff_ropath.get_relative_path() ) ) 82 ITR( diff_ropath.index, basis_path, diff_ropath ) 83 ITR.Finish() 84 base_path.setdata()
85
86 -def empty_iter():
87 if 0: 88 yield 1 # this never happens, but fools into generator treatment
89
90 -def filter_path_iter( path_iter, index ):
91 """Rewrite path elements of path_iter so they start with index 92 93 Discard any that doesn't start with index, and remove the index 94 prefix from the rest. 95 96 """ 97 assert isinstance( index, tuple ) and index, index 98 l = len( index ) 99 for path in path_iter: 100 if path.index[:l] == index: 101 path.index = path.index[l:] 102 yield path
103
104 -def difftar2path_iter( diff_tarfile ):
105 """Turn file-like difftarobj into iterator of ROPaths""" 106 tar_iter = iter( diff_tarfile ) 107 multivol_fileobj = None 108 109 # The next tar_info is stored in this one element list so 110 # Multivol_Filelike below can update it. Any StopIterations will 111 # be passed upwards. 112 tarinfo_list = [tar_iter.next()] 113 114 while 1: 115 # This section relevant when a multivol diff is last in tar 116 if not tarinfo_list[0]: 117 raise StopIteration 118 if multivol_fileobj and not multivol_fileobj.at_end: 119 multivol_fileobj.close() # aborting in middle of multivol 120 continue 121 122 index, difftype, multivol = get_index_from_tarinfo( tarinfo_list[0] ) 123 ropath = ROPath( index ) 124 ropath.init_from_tarinfo( tarinfo_list[0] ) 125 ropath.difftype = difftype 126 if difftype == "deleted": 127 ropath.type = None 128 elif ropath.isreg(): 129 if multivol: 130 multivol_fileobj = Multivol_Filelike( diff_tarfile, tar_iter, 131 tarinfo_list, index ) 132 ropath.setfileobj( multivol_fileobj ) 133 yield ropath 134 continue # Multivol_Filelike will reset tarinfo_list 135 else: 136 ropath.setfileobj( diff_tarfile.extractfile( tarinfo_list[0] ) ) 137 yield ropath 138 tarinfo_list[0] = tar_iter.next()
139
140 -def get_index_from_tarinfo( tarinfo ):
141 """Return (index, difftype, multivol) pair from tarinfo object""" 142 for prefix in ["snapshot/", "diff/", "deleted/", 143 "multivol_diff/", "multivol_snapshot/"]: 144 tiname = util.get_tarinfo_name( tarinfo ) 145 if tiname.startswith( prefix ): 146 name = tiname[len( prefix ):] # strip prefix 147 if prefix.startswith( "multivol" ): 148 if prefix == "multivol_diff/": 149 difftype = "diff" 150 else: 151 difftype = "snapshot" 152 multivol = 1 153 name, num_subs = \ 154 re.subn( "(?s)^multivol_(diff|snapshot)/?(.*)/[0-9]+$", 155 "\\2", tiname ) 156 if num_subs != 1: 157 raise PatchDirException( "Unrecognized diff entry %s" % 158 ( tiname, ) ) 159 else: 160 difftype = prefix[:-1] # strip trailing / 161 name = tiname[len( prefix ):] 162 if name.endswith( "/" ): 163 name = name[:-1] # strip trailing /'s 164 multivol = 0 165 break 166 else: 167 raise PatchDirException( "Unrecognized diff entry %s" % 168 ( tiname, ) ) 169 if name == "." or name == "": 170 index = () 171 else: 172 index = tuple( name.split( "/" ) ) 173 if '..' in index: 174 raise PatchDirException( "Tar entry %s contains '..'. Security " 175 "violation" % ( tiname, ) ) 176 return ( index, difftype, multivol )
177 178
179 -class Multivol_Filelike:
180 """Emulate a file like object from multivols 181 182 Maintains a buffer about the size of a volume. When it is read() 183 to the end, pull in more volumes as desired. 184 185 """
186 - def __init__( self, tf, tar_iter, tarinfo_list, index ):
187 """Initializer. tf is TarFile obj, tarinfo is first tarinfo""" 188 self.tf, self.tar_iter = tf, tar_iter 189 self.tarinfo_list = tarinfo_list # must store as list for write access 190 self.index = index 191 self.buffer = "" 192 self.at_end = 0
193
194 - def read( self, length= -1 ):
195 """Read length bytes from file""" 196 if length < 0: 197 while self.addtobuffer(): 198 pass 199 real_len = len( self.buffer ) 200 else: 201 while len( self.buffer ) < length: 202 if not self.addtobuffer(): 203 break 204 real_len = min( len( self.buffer ), length ) 205 206 result = self.buffer[:real_len] 207 self.buffer = self.buffer[real_len:] 208 return result
209
210 - def addtobuffer( self ):
211 """Add next chunk to buffer""" 212 if self.at_end: 213 return None 214 index, difftype, multivol = get_index_from_tarinfo( #@UnusedVariable 215 self.tarinfo_list[0] ) 216 if not multivol or index != self.index: 217 # we've moved on 218 # the following communicates next tarinfo to difftar2path_iter 219 self.at_end = 1 220 return None 221 222 fp = self.tf.extractfile( self.tarinfo_list[0] ) 223 self.buffer += fp.read() 224 fp.close() 225 226 try: 227 self.tarinfo_list[0] = self.tar_iter.next() 228 except StopIteration: 229 self.tarinfo_list[0] = None 230 self.at_end = 1 231 return None 232 return 1
233
234 - def close( self ):
235 """If not at end, read remaining data""" 236 if not self.at_end: 237 while 1: 238 self.buffer = "" 239 if not self.addtobuffer(): 240 break 241 self.at_end = 1
242 243
244 -class PathPatcher( ITRBranch ):
245 """Used by DirPatch, process the given basis and diff"""
246 - def __init__( self, base_path ):
247 """Set base_path, Path of root of tree""" 248 self.base_path = base_path 249 self.dir_diff_ropath = None
250
251 - def start_process( self, index, basis_path, diff_ropath ):
252 """Start processing when diff_ropath is a directory""" 253 if not ( diff_ropath and diff_ropath.isdir() ): 254 assert index == (), str( index ) # should only happen for first elem 255 self.fast_process( index, basis_path, diff_ropath ) 256 return 257 258 if not basis_path: 259 basis_path = self.base_path.new_index( index ) 260 assert not basis_path.exists() 261 basis_path.mkdir() # Need place for later files to go into 262 elif not basis_path.isdir(): 263 basis_path.delete() 264 basis_path.mkdir() 265 self.dir_basis_path = basis_path 266 self.dir_diff_ropath = diff_ropath
267
268 - def end_process( self ):
269 """Copy directory permissions when leaving tree""" 270 if self.dir_diff_ropath: 271 self.dir_diff_ropath.copy_attribs( self.dir_basis_path )
272
273 - def can_fast_process( self, index, basis_path, diff_ropath ):
274 """No need to recurse if diff_ropath isn't a directory""" 275 return not ( diff_ropath and diff_ropath.isdir() )
276
277 - def fast_process( self, index, basis_path, diff_ropath ):
278 """For use when neither is a directory""" 279 if not diff_ropath: 280 return # no change 281 elif not basis_path: 282 if diff_ropath.difftype == "deleted": 283 pass # already deleted 284 else: 285 # just copy snapshot over 286 diff_ropath.copy( self.base_path.new_index( index ) ) 287 elif diff_ropath.difftype == "deleted": 288 if basis_path.isdir(): 289 basis_path.deltree() 290 else: 291 basis_path.delete() 292 elif not basis_path.isreg(): 293 if basis_path.isdir(): 294 basis_path.deltree() 295 else: 296 basis_path.delete() 297 diff_ropath.copy( basis_path ) 298 else: 299 assert diff_ropath.difftype == "diff", diff_ropath.difftype 300 basis_path.patch_with_attribs( diff_ropath )
301 302
303 -class TarFile_FromFileobjs:
304 """Like a tarfile.TarFile iterator, but read from multiple fileobjs"""
305 - def __init__( self, fileobj_iter ):
306 """Make new tarinfo iterator 307 308 fileobj_iter should be an iterator of file objects opened for 309 reading. They will be closed at end of reading. 310 311 """ 312 self.fileobj_iter = fileobj_iter 313 self.tarfile, self.tar_iter = None, None 314 self.current_fp = None
315
316 - def __iter__( self ):
317 return self
318
319 - def set_tarfile( self ):
320 """Set tarfile from next file object, or raise StopIteration""" 321 if self.current_fp: 322 assert not self.current_fp.close() 323 self.current_fp = self.fileobj_iter.next() 324 self.tarfile = util.make_tarfile("r", self.current_fp) 325 self.tar_iter = iter( self.tarfile )
326
327 - def next( self ):
328 if not self.tarfile: 329 self.set_tarfile() 330 try: 331 return self.tar_iter.next() 332 except StopIteration: 333 assert not self.tarfile.close() 334 self.set_tarfile() 335 return self.tar_iter.next()
336
337 - def extractfile( self, tarinfo ):
338 """Return data associated with given tarinfo""" 339 return self.tarfile.extractfile( tarinfo )
340 341
342 -def collate_iters( iter_list ):
343 """Collate iterators by index 344 345 Input is a list of n iterators each of which must iterate elements 346 with an index attribute. The elements must come out in increasing 347 order, and the index should be a tuple itself. 348 349 The output is an iterator which yields tuples where all elements 350 in the tuple have the same index, and the tuple has n elements in 351 it. If any iterator lacks an element with that index, the tuple 352 will have None in that spot. 353 354 """ 355 # overflow[i] means that iter_list[i] has been exhausted 356 # elems[i] is None means that it is time to replenish it. 357 iter_num = len( iter_list ) 358 if iter_num == 2: 359 return diffdir.collate2iters( iter_list[0], iter_list[1] ) 360 overflow = [None] * iter_num 361 elems = overflow[:] 362 363 def setrorps( overflow, elems ): 364 """Set the overflow and rorps list""" 365 for i in range( iter_num ): 366 if not overflow[i] and elems[i] is None: 367 try: 368 elems[i] = iter_list[i].next() 369 except StopIteration: 370 overflow[i] = 1 371 elems[i] = None
372 373 def getleastindex( elems ): 374 """Return the first index in elems, assuming elems isn't empty""" 375 return min( map( lambda elem: elem.index, filter( lambda x: x, elems ) ) ) 376 377 def yield_tuples( iter_num, overflow, elems ): 378 while 1: 379 setrorps( overflow, elems ) 380 if not None in overflow: 381 break 382 383 index = getleastindex( elems ) 384 yieldval = [] 385 for i in range( iter_num ): 386 if elems[i] and elems[i].index == index: 387 yieldval.append( elems[i] ) 388 elems[i] = None 389 else: 390 yieldval.append( None ) 391 yield tuple( yieldval ) 392 return yield_tuples( iter_num, overflow, elems ) 393
394 -class IndexedTuple:
395 """Like a tuple, but has .index (used previously by collate_iters)"""
396 - def __init__( self, index, sequence ):
397 self.index = index 398 self.data = tuple( sequence )
399
400 - def __len__( self ):
401 return len( self.data )
402
403 - def __getitem__( self, key ):
404 """This only works for numerical keys (easier this way)""" 405 return self.data[key]
406
407 - def __lt__( self, other ):
408 return self.__cmp__( other ) == -1
409 - def __le__( self, other ):
410 return self.__cmp__( other ) != 1
411 - def __ne__( self, other ):
412 return not self.__eq__( other )
413 - def __gt__( self, other ):
414 return self.__cmp__( other ) == 1
415 - def __ge__( self, other ):
416 return self.__cmp__( other ) != -1
417
418 - def __cmp__( self, other ):
419 assert isinstance( other, IndexedTuple ) 420 if self.index < other.index: 421 return - 1 422 elif self.index == other.index: 423 return 0 424 else: 425 return 1
426
427 - def __eq__( self, other ):
428 if isinstance( other, IndexedTuple ): 429 return self.index == other.index and self.data == other.data 430 elif type( other ) is types.TupleType: 431 return self.data == other 432 else: 433 return None
434
435 - def __str__( self ):
436 return "(%s).%s" % ( ", ".join( map( str, self.data ) ), self.index )
437
438 -def normalize_ps( patch_sequence ):
439 """Given an sequence of ROPath deltas, remove blank and unnecessary 440 441 The sequence is assumed to be in patch order (later patches apply 442 to earlier ones). A patch is unnecessary if a later one doesn't 443 require it (for instance, any patches before a "delete" are 444 unnecessary). 445 446 """ 447 result_list = [] 448 i = len( patch_sequence ) - 1 449 while i >= 0: 450 delta = patch_sequence[i] 451 if delta is not None: 452 # skip blank entries 453 result_list.insert( 0, delta ) 454 if delta.difftype != "diff": 455 break 456 i -= 1 457 return result_list
458
459 -def patch_seq2ropath( patch_seq ):
460 """Apply the patches in patch_seq, return single ropath""" 461 first = patch_seq[0] 462 assert first.difftype != "diff", patch_seq 463 if not first.isreg(): 464 # No need to bother with data if not regular file 465 assert len( patch_seq ) == 1, len( patch_seq ) 466 return first.get_ropath() 467 468 current_file = first.open( "rb" ) 469 470 for delta_ropath in patch_seq[1:]: 471 assert delta_ropath.difftype == "diff", delta_ropath.difftype 472 if not isinstance( current_file, file ): 473 # librsync needs true file 474 tempfp = os.tmpfile() 475 misc.copyfileobj( current_file, tempfp ) 476 assert not current_file.close() 477 tempfp.seek( 0 ) 478 current_file = tempfp 479 current_file = librsync.PatchedFile( current_file, 480 delta_ropath.open( "rb" ) ) 481 result = patch_seq[-1].get_ropath() 482 result.setfileobj( current_file ) 483 return result
484
485 -def integrate_patch_iters( iter_list ):
486 """Combine a list of iterators of ropath patches 487 488 The iter_list should be sorted in patch order, and the elements in 489 each iter_list need to be orderd by index. The output will be an 490 iterator of the final ROPaths in index order. 491 492 """ 493 collated = collate_iters( iter_list ) 494 for patch_seq in collated: 495 final_ropath = patch_seq2ropath( normalize_ps( patch_seq ) ) 496 if final_ropath.exists(): 497 # otherwise final patch was delete 498 yield final_ropath
499
500 -def tarfiles2rop_iter( tarfile_list, restrict_index=() ):
501 """Integrate tarfiles of diffs into single ROPath iter 502 503 Then filter out all the diffs in that index which don't start with 504 the restrict_index. 505 506 """ 507 diff_iters = map( difftar2path_iter, tarfile_list ) 508 if restrict_index: 509 # Apply filter before integration 510 diff_iters = map( lambda i: filter_path_iter( i, restrict_index ), 511 diff_iters ) 512 return integrate_patch_iters( diff_iters )
513
514 -def Write_ROPaths( base_path, rop_iter ):
515 """Write out ropaths in rop_iter starting at base_path 516 517 Returns 1 if something was actually written, 0 otherwise. 518 519 """ 520 ITR = IterTreeReducer( ROPath_IterWriter, [base_path] ) 521 return_val = 0 522 for ropath in rop_iter: 523 return_val = 1 524 ITR( ropath.index, ropath ) 525 ITR.Finish() 526 base_path.setdata() 527 return return_val
528
529 -class ROPath_IterWriter( ITRBranch ):
530 """Used in Write_ROPaths above 531 532 We need to use an ITR because we have to update the 533 permissions/times of directories after we write the files in them. 534 535 """
536 - def __init__( self, base_path ):
537 """Set base_path, Path of root of tree""" 538 self.base_path = base_path 539 self.dir_diff_ropath = None 540 self.dir_new_path = None
541
542 - def start_process( self, index, ropath ):
543 """Write ropath. Only handles the directory case""" 544 if not ropath.isdir(): 545 # Base may not be a directory, but rest should 546 assert ropath.index == (), ropath.index 547 new_path = self.base_path.new_index( index ) 548 if ropath.exists(): 549 if new_path.exists(): 550 new_path.deltree() 551 ropath.copy( new_path ) 552 553 self.dir_new_path = self.base_path.new_index( index ) 554 if self.dir_new_path.exists() and not globals.force: 555 # base may exist, but nothing else 556 assert index == (), index 557 else: 558 self.dir_new_path.mkdir() 559 self.dir_diff_ropath = ropath
560
561 - def end_process( self ):
562 """Update information of a directory when leaving it""" 563 if self.dir_diff_ropath: 564 self.dir_diff_ropath.copy_attribs( self.dir_new_path )
565
566 - def can_fast_process( self, index, ropath ):
567 """Can fast process (no recursion) if ropath isn't a directory""" 568 log.Info( _( "Writing %s of type %s" ) % 569 ( ropath.get_relative_path(), ropath.type ), 570 log.InfoCode.patch_file_writing, 571 "%s %s" % ( util.escape( ropath.get_relative_path() ), ropath.type ) ) 572 return not ropath.isdir()
573
574 - def fast_process( self, index, ropath ):
575 """Write non-directory ropath to destination""" 576 if ropath.exists(): 577 ropath.copy( self.base_path.new_index( index ) )
578