Package duplicity :: Module backend
[hide private]
[frames] | no frames]

Source Code for Module duplicity.backend

  1  # -*- Mode:Python; indent-tabs-mode:nil; tab-width:4 -*- 
  2  # 
  3  # Copyright 2002 Ben Escoto <ben@emerose.org> 
  4  # Copyright 2007 Kenneth Loafman <kenneth@loafman.com> 
  5  # 
  6  # This file is part of duplicity. 
  7  # 
  8  # Duplicity is free software; you can redistribute it and/or modify it 
  9  # under the terms of the GNU General Public License as published by the 
 10  # Free Software Foundation; either version 2 of the License, or (at your 
 11  # option) any later version. 
 12  # 
 13  # Duplicity is distributed in the hope that it will be useful, but 
 14  # WITHOUT ANY WARRANTY; without even the implied warranty of 
 15  # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU 
 16  # General Public License for more details. 
 17  # 
 18  # You should have received a copy of the GNU General Public License 
 19  # along with duplicity; if not, write to the Free Software Foundation, 
 20  # Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 
 21   
 22  """ 
 23  Provides a common interface to all backends and certain sevices 
 24  intended to be used by the backends themselves. 
 25  """ 
 26   
 27  import os 
 28  import sys 
 29  import socket 
 30  import time 
 31  import re 
 32  import getpass 
 33  import gettext 
 34  import urllib 
 35   
 36  from duplicity import dup_temp 
 37  from duplicity import dup_threading 
 38  from duplicity import file_naming 
 39  from duplicity import globals 
 40  from duplicity import log 
 41  from duplicity import urlparse_2_5 as urlparser 
 42   
 43  from duplicity.util import exception_traceback 
 44   
 45  from duplicity.errors import BackendException 
 46  from duplicity.errors import TemporaryLoadException 
 47  from duplicity.errors import ConflictingScheme 
 48  from duplicity.errors import InvalidBackendURL 
 49  from duplicity.errors import UnsupportedBackendScheme 
 50   
 51  import duplicity.backends 
 52   
 53   
 54  # todo: this should really NOT be done here 
 55  socket.setdefaulttimeout(globals.timeout) 
 56   
 57  _forced_backend = None 
 58  _backends = {} 
 59   
 60   
61 -def import_backends():
62 """ 63 Import files in the duplicity/backends directory where 64 the filename ends in 'backend.py' and ignore the rest. 65 66 @rtype: void 67 @return: void 68 """ 69 path = duplicity.backends.__path__[0] 70 assert path.endswith("duplicity/backends"), duplicity.backends.__path__ 71 72 files = os.listdir(path) 73 for fn in files: 74 if fn.endswith("backend.py"): 75 fn = fn[:-3] 76 imp = "duplicity.backends.%s" % (fn,) 77 try: 78 __import__(imp) 79 res = "Succeeded" 80 except Exception: 81 res = "Failed: " + str(sys.exc_info()[1]) 82 log.Info("Import of %s %s" % (imp, res)) 83 else: 84 continue
85 86
87 -def force_backend(backend):
88 """ 89 Forces the use of a particular backend, regardless of schema 90 """ 91 global _forced_backend 92 _forced_backend = backend
93 94
95 -def register_backend(scheme, backend_factory):
96 """ 97 Register a given backend factory responsible for URL:s with the 98 given scheme. 99 100 The backend must be a callable which, when called with a URL as 101 the single parameter, returns an object implementing the backend 102 protocol (i.e., a subclass of Backend). 103 104 Typically the callable will be the Backend subclass itself. 105 106 This function is not thread-safe and is intended to be called 107 during module importation or start-up. 108 """ 109 global _backends 110 111 assert callable(backend_factory), "backend factory must be callable" 112 113 if scheme in _backends: 114 raise ConflictingScheme("the scheme %s already has a backend " 115 "associated with it" 116 "" % (scheme,)) 117 118 _backends[scheme] = backend_factory
119 120
121 -def is_backend_url(url_string):
122 """ 123 @return Whether the given string looks like a backend URL. 124 """ 125 pu = ParsedUrl(url_string) 126 127 # Be verbose to actually return True/False rather than string. 128 if pu.scheme: 129 return True 130 else: 131 return False
132 133
134 -def get_backend(url_string):
135 """ 136 Instantiate a backend suitable for the given URL, or return None 137 if the given string looks like a local path rather than a URL. 138 139 Raise InvalidBackendURL if the URL is not a valid URL. 140 """ 141 if not is_backend_url(url_string): 142 return None 143 144 pu = ParsedUrl(url_string) 145 146 # Implicit local path 147 assert pu.scheme, "should be a backend url according to is_backend_url" 148 149 global _backends, _forced_backend 150 151 if _forced_backend: 152 return _forced_backend(pu) 153 elif not pu.scheme in _backends: 154 raise UnsupportedBackendScheme(url_string) 155 else: 156 return _backends[pu.scheme](pu)
157 158 159 _urlparser_initialized = False 160 _urlparser_initialized_lock = dup_threading.threading_module().Lock() 161
162 -def _ensure_urlparser_initialized():
163 """ 164 Ensure that the appropriate clobbering of variables in the 165 urlparser module has been done. In the future, the need for this 166 clobbering to begin with should preferably be eliminated. 167 """ 168 def init(): 169 global _urlparser_initialized 170 171 if not _urlparser_initialized: 172 # These URL schemes have a backend with a notion of an RFC "network location". 173 # The 'file' and 's3+http' schemes should not be in this list. 174 # 'http' and 'https' are not actually used for duplicity backend urls, but are needed 175 # in order to properly support urls returned from some webdav servers. adding them here 176 # is a hack. we should instead not stomp on the url parsing module to begin with. 177 # 178 # todo: eliminate the need for backend specific hacking here completely. 179 urlparser.uses_netloc = ['ftp', 180 'ftps', 181 'hsi', 182 'rsync', 183 's3', 184 'u1', 185 'scp', 'ssh', 'sftp', 186 'webdav', 'webdavs', 187 'gdocs', 188 'http', 'https', 189 'imap', 'imaps'] 190 191 # Do not transform or otherwise parse the URL path component. 192 urlparser.uses_query = [] 193 urlparser.uses_fragm = [] 194 195 _urlparser_initialized = True
196 197 dup_threading.with_lock(_urlparser_initialized_lock, init) 198
199 -class ParsedUrl:
200 """ 201 Parse the given URL as a duplicity backend URL. 202 203 Returns the data of a parsed URL with the same names as that of 204 the standard urlparse.urlparse() except that all values have been 205 resolved rather than deferred. There are no get_* members. This 206 makes sure that the URL parsing errors are detected early. 207 208 Raise InvalidBackendURL on invalid URL's 209 """
210 - def __init__(self, url_string):
211 self.url_string = url_string 212 _ensure_urlparser_initialized() 213 214 # While useful in some cases, the fact is that the urlparser makes 215 # all the properties in the URL deferred or lazy. This means that 216 # problems don't get detected till called. We'll try to trap those 217 # problems here, so they will be caught early. 218 219 try: 220 pu = urlparser.urlparse(url_string) 221 except Exception: 222 raise InvalidBackendURL("Syntax error in: %s" % url_string) 223 224 try: 225 self.scheme = pu.scheme 226 except Exception: 227 raise InvalidBackendURL("Syntax error (scheme) in: %s" % url_string) 228 229 try: 230 self.netloc = pu.netloc 231 except Exception: 232 raise InvalidBackendURL("Syntax error (netloc) in: %s" % url_string) 233 234 try: 235 self.path = pu.path 236 except Exception: 237 raise InvalidBackendURL("Syntax error (path) in: %s" % url_string) 238 239 try: 240 self.username = pu.username 241 except Exception: 242 raise InvalidBackendURL("Syntax error (username) in: %s" % url_string) 243 if self.username: 244 self.username = urllib.unquote(pu.username) 245 else: 246 self.username = None 247 248 try: 249 self.password = pu.password 250 except Exception: 251 raise InvalidBackendURL("Syntax error (password) in: %s" % url_string) 252 if self.password: 253 self.password = urllib.unquote(self.password) 254 else: 255 self.password = None 256 257 try: 258 self.hostname = pu.hostname 259 except Exception: 260 raise InvalidBackendURL("Syntax error (hostname) in: %s" % url_string) 261 262 # init to None, overwrite with actual value on success 263 self.port = None 264 try: 265 self.port = pu.port 266 except Exception: 267 # old style rsync://host::[/]dest, are still valid, though they contain no port 268 if not ( self.scheme in ['rsync'] and re.search('::[^:]*$', self.url_string)): 269 raise InvalidBackendURL("Syntax error (port) in: %s A%s B%s C%s" % (url_string, (self.scheme in ['rsync']), re.search('::[^:]+$', self.netloc), self.netloc ) ) 270 271 # This happens for implicit local paths. 272 if not pu.scheme: 273 return 274 275 # Our backends do not handle implicit hosts. 276 if pu.scheme in urlparser.uses_netloc and not pu.hostname: 277 raise InvalidBackendURL("Missing hostname in a backend URL which " 278 "requires an explicit hostname: %s" 279 "" % (url_string)) 280 281 # Our backends do not handle implicit relative paths. 282 if pu.scheme not in urlparser.uses_netloc and not pu.path.startswith('//'): 283 raise InvalidBackendURL("missing // - relative paths not supported " 284 "for scheme %s: %s" 285 "" % (pu.scheme, url_string))
286
287 - def geturl(self):
288 return self.url_string
289 290
291 -def strip_auth_from_url(parsed_url):
292 """Return a URL from a urlparse object without a username or password.""" 293 294 # Get a copy of the network location without the username or password. 295 straight_netloc = parsed_url.netloc.split('@')[-1] 296 297 # Replace the full network location with the stripped copy. 298 return parsed_url.geturl().replace(parsed_url.netloc, straight_netloc, 1)
299 300 301 # Decorator for backend operation functions to simplify writing one that 302 # retries. Make sure to add a keyword argument 'raise_errors' to your function 303 # and if it is true, raise an exception on an error. If false, fatal-log it.
304 -def retry(fn):
305 def iterate(*args): 306 for n in range(1, globals.num_retries): 307 try: 308 kwargs = {"raise_errors" : True} 309 return fn(*args, **kwargs) 310 except Exception, e: 311 log.Warn("Attempt %s failed: %s: %s" 312 % (n, e.__class__.__name__, str(e))) 313 log.Debug("Backtrace of previous error: %s" 314 % exception_traceback()) 315 if isinstance(e, TemporaryLoadException): 316 time.sleep(30) # wait a bit before trying again 317 # Now try one last time, but fatal-log instead of raising errors 318 kwargs = {"raise_errors" : False} 319 return fn(*args, **kwargs)
320 return iterate 321 322
323 -class Backend:
324 """ 325 Represents a generic duplicity backend, capable of storing and 326 retrieving files. 327 328 Concrete sub-classes are expected to implement: 329 330 - put 331 - get 332 - list 333 - delete 334 - close (if needed) 335 336 Optional: 337 338 - move 339 """
340 - def __init__(self, parsed_url):
341 self.parsed_url = parsed_url
342
343 - def put(self, source_path, remote_filename = None):
344 """ 345 Transfer source_path (Path object) to remote_filename (string) 346 347 If remote_filename is None, get the filename from the last 348 path component of pathname. 349 """ 350 raise NotImplementedError()
351
352 - def move(self, source_path, remote_filename = None):
353 """ 354 Move source_path (Path object) to remote_filename (string) 355 356 Same as put(), but unlinks source_path in the process. This allows the 357 local backend to do this more efficiently using rename. 358 """ 359 self.put(source_path, remote_filename) 360 source_path.delete()
361
362 - def get(self, remote_filename, local_path):
363 """Retrieve remote_filename and place in local_path""" 364 raise NotImplementedError()
365
366 - def list(self):
367 """ 368 Return list of filenames (strings) present in backend 369 """ 370 raise NotImplementedError()
371
372 - def delete(self, filename_list):
373 """ 374 Delete each filename in filename_list, in order if possible. 375 """ 376 raise NotImplementedError()
377 378 # Should never cause FatalError. 379 # Returns a dictionary of dictionaries. The outer dictionary maps 380 # filenames to metadata dictionaries. Supported metadata are: 381 # 382 # 'size': if >= 0, size of file 383 # if -1, file is not found 384 # if None, error querying file 385 # 386 # Returned dictionary is guaranteed to contain a metadata dictionary for 387 # each filename, but not all metadata are guaranteed to be present.
388 - def query_info(self, filename_list, raise_errors=True):
389 """ 390 Return metadata about each filename in filename_list 391 """ 392 info = {} 393 if hasattr(self, '_query_list_info'): 394 info = self._query_list_info(filename_list) 395 elif hasattr(self, '_query_file_info'): 396 for filename in filename_list: 397 info[filename] = self._query_file_info(filename) 398 399 # Fill out any missing entries (may happen if backend has no support 400 # or its query_list support is lazy) 401 for filename in filename_list: 402 if filename not in info: 403 info[filename] = {} 404 405 return info
406 407 """ use getpass by default, inherited backends may overwrite this behaviour """ 408 use_getpass = True 409
410 - def get_password(self):
411 """ 412 Return a password for authentication purposes. The password 413 will be obtained from the backend URL, the environment, by 414 asking the user, or by some other method. When applicable, the 415 result will be cached for future invocations. 416 """ 417 if self.parsed_url.password: 418 return self.parsed_url.password 419 420 try: 421 password = os.environ['FTP_PASSWORD'] 422 except KeyError: 423 if self.use_getpass: 424 password = getpass.getpass("Password for '%s@%s': " % 425 (self.parsed_url.username,self.parsed_url.hostname) ) 426 os.environ['FTP_PASSWORD'] = password 427 else: 428 password = None 429 return password
430
431 - def munge_password(self, commandline):
432 """ 433 Remove password from commandline by substituting the password 434 found in the URL, if any, with a generic place-holder. 435 436 This is intended for display purposes only, and it is not 437 guaranteed that the results are correct (i.e., more than just 438 the ':password@' may be substituted. 439 """ 440 if self.parsed_url.password: 441 return re.sub( r'(:([^\s:/@]+)@([^\s@]+))', r':*****@\3', commandline ) 442 else: 443 return commandline
444 445 """ 446 DEPRECATED: 447 run_command(_persist) - legacy wrappers for subprocess_popen(_persist) 448 """
449 - def run_command(self, commandline):
450 return self.subprocess_popen(commandline)
451 - def run_command_persist(self, commandline):
453 454 """ 455 DEPRECATED: 456 popen(_persist) - legacy wrappers for subprocess_popen(_persist) 457 """
458 - def popen(self, commandline):
459 result, stdout, stderr = self.subprocess_popen(commandline) 460 return stdout
461 - def popen_persist(self, commandline):
462 result, stdout, stderr = self.subprocess_popen_persist(commandline) 463 return stdout
464
465 - def _subprocess_popen(self, commandline):
466 """ 467 For internal use. 468 Execute the given command line, interpreted as a shell command. 469 Returns int Exitcode, string StdOut, string StdErr 470 """ 471 from subprocess import Popen, PIPE 472 p = Popen(commandline, shell=True, stdout=PIPE, stderr=PIPE) 473 stdout, stderr = p.communicate() 474 475 return p.returncode, stdout, stderr
476
477 - def subprocess_popen(self, commandline):
478 """ 479 Execute the given command line with error check. 480 Returns int Exitcode, string StdOut, string StdErr 481 482 Raise a BackendException on failure. 483 """ 484 private = self.munge_password(commandline) 485 log.Info(_("Reading results of '%s'") % private) 486 result, stdout, stderr = self._subprocess_popen(commandline) 487 if result != 0: 488 raise BackendException("Error running '%s'" % private) 489 return result, stdout, stderr
490 491 """ a dictionary for persist breaking exceptions, syntax is 492 { 'command' : [ code1, code2 ], ... } see ftpbackend for an example """ 493 popen_persist_breaks = {} 494
495 - def subprocess_popen_persist(self, commandline):
496 """ 497 Execute the given command line with error check. 498 Retries globals.num_retries times with 30s delay. 499 Returns int Exitcode, string StdOut, string StdErr 500 501 Raise a BackendException on failure. 502 """ 503 private = self.munge_password(commandline) 504 505 for n in range(1, globals.num_retries+1): 506 # sleep before retry 507 if n > 1: 508 time.sleep(30) 509 log.Info(_("Reading results of '%s'") % private) 510 result, stdout, stderr = self._subprocess_popen(commandline) 511 if result == 0: 512 return result, stdout, stderr 513 514 try: 515 m = re.search("^\s*([\S]+)", commandline) 516 cmd = m.group(1) 517 ignores = self.popen_persist_breaks[ cmd ] 518 ignores.index(result) 519 """ ignore a predefined set of error codes """ 520 return 0, '', '' 521 except (KeyError, ValueError): 522 pass 523 524 log.Warn(gettext.ngettext("Running '%s' failed with code %d (attempt #%d)", 525 "Running '%s' failed with code %d (attempt #%d)", n) % 526 (private, result, n)) 527 if stdout or stderr: 528 log.Warn(_("Error is:\n%s") % stderr + (stderr and stdout and "\n") + stdout) 529 530 log.Warn(gettext.ngettext("Giving up trying to execute '%s' after %d attempt", 531 "Giving up trying to execute '%s' after %d attempts", 532 globals.num_retries) % (private, globals.num_retries)) 533 raise BackendException("Error running '%s'" % private)
534
535 - def get_fileobj_read(self, filename, parseresults = None):
536 """ 537 Return fileobject opened for reading of filename on backend 538 539 The file will be downloaded first into a temp file. When the 540 returned fileobj is closed, the temp file will be deleted. 541 """ 542 if not parseresults: 543 parseresults = file_naming.parse(filename) 544 assert parseresults, "Filename not correctly parsed" 545 tdp = dup_temp.new_tempduppath(parseresults) 546 self.get(filename, tdp) 547 tdp.setdata() 548 return tdp.filtered_open_with_delete("rb")
549
550 - def get_fileobj_write(self, filename, 551 parseresults = None, 552 sizelist = None):
553 """ 554 Return fileobj opened for writing, which will cause the file 555 to be written to the backend on close(). 556 557 The file will be encoded as specified in parseresults (or as 558 read from the filename), and stored in a temp file until it 559 can be copied over and deleted. 560 561 If sizelist is not None, it should be set to an empty list. 562 The number of bytes will be inserted into the list. 563 """ 564 if not parseresults: 565 parseresults = file_naming.parse(filename) 566 assert parseresults, "Filename %s not correctly parsed" % filename 567 tdp = dup_temp.new_tempduppath(parseresults) 568 569 def close_file_hook(): 570 """This is called when returned fileobj is closed""" 571 self.put(tdp, filename) 572 if sizelist is not None: 573 tdp.setdata() 574 sizelist.append(tdp.getsize()) 575 tdp.delete()
576 577 fh = dup_temp.FileobjHooked(tdp.filtered_open("wb")) 578 fh.addhook(close_file_hook) 579 return fh
580
581 - def get_data(self, filename, parseresults = None):
582 """ 583 Retrieve a file from backend, process it, return contents. 584 """ 585 fin = self.get_fileobj_read(filename, parseresults) 586 buf = fin.read() 587 assert not fin.close() 588 return buf
589
590 - def put_data(self, buffer, filename, parseresults = None):
591 """ 592 Put buffer into filename on backend after processing. 593 """ 594 fout = self.get_fileobj_write(filename, parseresults) 595 fout.write(buffer) 596 assert not fout.close()
597
598 - def close(self):
599 """ 600 Close the backend, releasing any resources held and 601 invalidating any file objects obtained from the backend. 602 """ 603 pass
604