1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 """Read from and write to tar format archives.
31 """
32
33 __version__ = "$Revision: 1.14 $"
34
35
36 version = "0.9.0"
37 __author__ = "Lars Gustäbel (lars@gustaebel.de)"
38 __date__ = "$Date: 2011/11/25 19:41:15 $"
39 __cvsid__ = "$Id: duplicity.tarfile-pysrc.html,v 1.14 2011/11/25 19:41:15 loafman Exp $"
40 __credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42
43
44
45 import sys
46 import os
47 import shutil
48 import stat
49 import errno
50 import time
51 import struct
52 import copy
53 import re
54 import operator
55
56 try:
57 import grp, pwd
58 except ImportError:
59 grp = pwd = None
60
61
62 __all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
63
64
65
66
67 NUL = "\0"
68 BLOCKSIZE = 512
69 RECORDSIZE = BLOCKSIZE * 20
70 GNU_MAGIC = "ustar \0"
71 POSIX_MAGIC = "ustar\x0000"
72
73 LENGTH_NAME = 100
74 LENGTH_LINK = 100
75 LENGTH_PREFIX = 155
76
77 REGTYPE = "0"
78 AREGTYPE = "\0"
79 LNKTYPE = "1"
80 SYMTYPE = "2"
81 CHRTYPE = "3"
82 BLKTYPE = "4"
83 DIRTYPE = "5"
84 FIFOTYPE = "6"
85 CONTTYPE = "7"
86
87 GNUTYPE_LONGNAME = "L"
88 GNUTYPE_LONGLINK = "K"
89 GNUTYPE_SPARSE = "S"
90
91 XHDTYPE = "x"
92 XGLTYPE = "g"
93 SOLARIS_XHDTYPE = "X"
94
95 USTAR_FORMAT = 0
96 GNU_FORMAT = 1
97 PAX_FORMAT = 2
98 DEFAULT_FORMAT = GNU_FORMAT
99
100
101
102
103
104 SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
105 SYMTYPE, DIRTYPE, FIFOTYPE,
106 CONTTYPE, CHRTYPE, BLKTYPE,
107 GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
108 GNUTYPE_SPARSE)
109
110
111 REGULAR_TYPES = (REGTYPE, AREGTYPE,
112 CONTTYPE, GNUTYPE_SPARSE)
113
114
115 GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
116 GNUTYPE_SPARSE)
117
118
119 PAX_FIELDS = ("path", "linkpath", "size", "mtime",
120 "uid", "gid", "uname", "gname")
121
122
123
124 PAX_NUMBER_FIELDS = {
125 "atime": float,
126 "ctime": float,
127 "mtime": float,
128 "uid": int,
129 "gid": int,
130 "size": int
131 }
132
133
134
135
136 S_IFLNK = 0120000
137 S_IFREG = 0100000
138 S_IFBLK = 0060000
139 S_IFDIR = 0040000
140 S_IFCHR = 0020000
141 S_IFIFO = 0010000
142
143 TSUID = 04000
144 TSGID = 02000
145 TSVTX = 01000
146
147 TUREAD = 0400
148 TUWRITE = 0200
149 TUEXEC = 0100
150 TGREAD = 0040
151 TGWRITE = 0020
152 TGEXEC = 0010
153 TOREAD = 0004
154 TOWRITE = 0002
155 TOEXEC = 0001
156
157
158
159
160 ENCODING = sys.getfilesystemencoding()
161 if ENCODING is None:
162 ENCODING = sys.getdefaultencoding()
163
164
165
166
167
168 -def stn(s, length):
169 """Convert a python string to a null-terminated string buffer.
170 """
171 return s[:length] + (length - len(s)) * NUL
172
174 """Convert a null-terminated string field to a python string.
175 """
176
177 p = s.find("\0")
178 if p == -1:
179 return s
180 return s[:p]
181
183 """Convert a number field to a python number.
184 """
185
186
187 if s[0] != chr(0200):
188 try:
189 n = int(nts(s) or "0", 8)
190 except ValueError:
191 raise InvalidHeaderError("invalid header")
192 else:
193 n = 0L
194 for i in xrange(len(s) - 1):
195 n <<= 8
196 n += ord(s[i + 1])
197 return n
198
200 """Convert a python number to a number field.
201 """
202
203
204
205
206
207
208 if 0 <= n < 8 ** (digits - 1):
209 s = "%0*o" % (digits - 1, n) + NUL
210 else:
211 if format != GNU_FORMAT or n >= 256 ** (digits - 1):
212 raise ValueError("overflow in number field")
213
214 if n < 0:
215
216
217 n = struct.unpack("L", struct.pack("l", n))[0]
218
219 s = ""
220 for i in xrange(digits - 1):
221 s = chr(n & 0377) + s
222 n >>= 8
223 s = chr(0200) + s
224 return s
225
226 -def uts(s, encoding, errors):
227 """Convert a unicode object to a string.
228 """
229 if errors == "utf-8":
230
231
232
233 try:
234 return s.encode(encoding, "strict")
235 except UnicodeEncodeError:
236 x = []
237 for c in s:
238 try:
239 x.append(c.encode(encoding, "strict"))
240 except UnicodeEncodeError:
241 x.append(c.encode("utf8"))
242 return "".join(x)
243 else:
244 return s.encode(encoding, errors)
245
247 """Calculate the checksum for a member's header by summing up all
248 characters except for the chksum field which is treated as if
249 it was filled with spaces. According to the GNU tar sources,
250 some tars (Sun and NeXT) calculate chksum with signed char,
251 which will be different if there are chars in the buffer with
252 the high bit set. So we calculate two checksums, unsigned and
253 signed.
254 """
255 unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
256 signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
257 return unsigned_chksum, signed_chksum
258
260 """Copy length bytes from fileobj src to fileobj dst.
261 If length is None, copy the entire content.
262 """
263 if length == 0:
264 return
265 if length is None:
266 shutil.copyfileobj(src, dst)
267 return
268
269 BUFSIZE = 16 * 1024
270 blocks, remainder = divmod(length, BUFSIZE)
271 for b in xrange(blocks):
272 buf = src.read(BUFSIZE)
273 if len(buf) < BUFSIZE:
274 raise IOError("end of file reached")
275 dst.write(buf)
276
277 if remainder != 0:
278 buf = src.read(remainder)
279 if len(buf) < remainder:
280 raise IOError("end of file reached")
281 dst.write(buf)
282 return
283
284 filemode_table = (
285 ((S_IFLNK, "l"),
286 (S_IFREG, "-"),
287 (S_IFBLK, "b"),
288 (S_IFDIR, "d"),
289 (S_IFCHR, "c"),
290 (S_IFIFO, "p")),
291
292 ((TUREAD, "r"),),
293 ((TUWRITE, "w"),),
294 ((TUEXEC|TSUID, "s"),
295 (TSUID, "S"),
296 (TUEXEC, "x")),
297
298 ((TGREAD, "r"),),
299 ((TGWRITE, "w"),),
300 ((TGEXEC|TSGID, "s"),
301 (TSGID, "S"),
302 (TGEXEC, "x")),
303
304 ((TOREAD, "r"),),
305 ((TOWRITE, "w"),),
306 ((TOEXEC|TSVTX, "t"),
307 (TSVTX, "T"),
308 (TOEXEC, "x"))
309 )
312 """Convert a file's mode to a string of the form
313 -rwxrwxrwx.
314 Used by TarFile.list()
315 """
316 perm = []
317 for table in filemode_table:
318 for bit, char in table:
319 if mode & bit == bit:
320 perm.append(char)
321 break
322 else:
323 perm.append("-")
324 return "".join(perm)
325
327 """Base exception."""
328 pass
330 """General exception for extract errors."""
331 pass
333 """Exception for unreadble tar archives."""
334 pass
336 """Exception for unavailable compression methods."""
337 pass
339 """Exception for unsupported operations on stream-like TarFiles."""
340 pass
342 """Base exception for header errors."""
343 pass
345 """Exception for empty headers."""
346 pass
348 """Exception for truncated headers."""
349 pass
351 """Exception for end of file headers."""
352 pass
354 """Exception for invalid headers."""
355 pass
357 """Exception for missing and invalid extended headers."""
358 pass
359
364 """Low-level file object. Supports reading and writing.
365 It is used instead of a regular file object for streaming
366 access.
367 """
368
370 mode = {
371 "r": os.O_RDONLY,
372 "w": os.O_WRONLY | os.O_CREAT | os.O_TRUNC,
373 }[mode]
374 if hasattr(os, "O_BINARY"):
375 mode |= os.O_BINARY
376 self.fd = os.open(name, mode, 0666)
377
380
381 - def read(self, size):
382 return os.read(self.fd, size)
383
386
388 """Class that serves as an adapter between TarFile and
389 a stream-like object. The stream-like object only
390 needs to have a read() or write() method and is accessed
391 blockwise. Use of gzip or bzip2 compression is possible.
392 A stream-like object could be for example: sys.stdin,
393 sys.stdout, a socket, a tape device etc.
394
395 _Stream is intended to be used only internally.
396 """
397
398 - def __init__(self, name, mode, comptype, fileobj, bufsize):
399 """Construct a _Stream object.
400 """
401 self._extfileobj = True
402 if fileobj is None:
403 fileobj = _LowLevelFile(name, mode)
404 self._extfileobj = False
405
406 if comptype == '*':
407
408
409 fileobj = _StreamProxy(fileobj)
410 comptype = fileobj.getcomptype()
411
412 self.name = name or ""
413 self.mode = mode
414 self.comptype = comptype
415 self.fileobj = fileobj
416 self.bufsize = bufsize
417 self.buf = ""
418 self.pos = 0L
419 self.closed = False
420
421 if comptype == "gz":
422 try:
423 import zlib
424 except ImportError:
425 raise CompressionError("zlib module is not available")
426 self.zlib = zlib
427 self.crc = zlib.crc32("") & 0xffffffffL
428 if mode == "r":
429 self._init_read_gz()
430 else:
431 self._init_write_gz()
432
433 if comptype == "bz2":
434 try:
435 import bz2
436 except ImportError:
437 raise CompressionError("bz2 module is not available")
438 if mode == "r":
439 self.dbuf = ""
440 self.cmp = bz2.BZ2Decompressor()
441 else:
442 self.cmp = bz2.BZ2Compressor()
443
445 if hasattr(self, "closed") and not self.closed:
446 self.close()
447
449 """Initialize for writing with gzip compression.
450 """
451 self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
452 -self.zlib.MAX_WBITS,
453 self.zlib.DEF_MEM_LEVEL,
454 0)
455 timestamp = struct.pack("<L", long(time.time()))
456 self.__write("\037\213\010\010%s\002\377" % timestamp)
457 if self.name.endswith(".gz"):
458 self.name = self.name[:-3]
459 self.__write(self.name + NUL)
460
462 """Write string s to the stream.
463 """
464 if self.comptype == "gz":
465 self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
466 self.pos += len(s)
467 if self.comptype != "tar":
468 s = self.cmp.compress(s)
469 self.__write(s)
470
472 """Write string s to the stream if a whole new block
473 is ready to be written.
474 """
475 self.buf += s
476 while len(self.buf) > self.bufsize:
477 self.fileobj.write(self.buf[:self.bufsize])
478 self.buf = self.buf[self.bufsize:]
479
481 """Close the _Stream object. No operation should be
482 done on it afterwards.
483 """
484 if self.closed:
485 return
486
487 if self.mode == "w" and self.comptype != "tar":
488 self.buf += self.cmp.flush()
489
490 if self.mode == "w" and self.buf:
491 self.fileobj.write(self.buf)
492 self.buf = ""
493 if self.comptype == "gz":
494
495
496
497
498
499
500 self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
501 self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
502
503 if not self._extfileobj:
504 self.fileobj.close()
505
506 self.closed = True
507
509 """Initialize for reading a gzip compressed fileobj.
510 """
511 self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
512 self.dbuf = ""
513
514
515 if self.__read(2) != "\037\213":
516 raise ReadError("not a gzip file")
517 if self.__read(1) != "\010":
518 raise CompressionError("unsupported compression method")
519
520 flag = ord(self.__read(1))
521 self.__read(6)
522
523 if flag & 4:
524 xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
525 self.read(xlen)
526 if flag & 8:
527 while True:
528 s = self.__read(1)
529 if not s or s == NUL:
530 break
531 if flag & 16:
532 while True:
533 s = self.__read(1)
534 if not s or s == NUL:
535 break
536 if flag & 2:
537 self.__read(2)
538
540 """Return the stream's file pointer position.
541 """
542 return self.pos
543
544 - def seek(self, pos=0):
545 """Set the stream's file pointer to pos. Negative seeking
546 is forbidden.
547 """
548 if pos - self.pos >= 0:
549 blocks, remainder = divmod(pos - self.pos, self.bufsize)
550 for i in xrange(blocks):
551 self.read(self.bufsize)
552 self.read(remainder)
553 else:
554 raise StreamError("seeking backwards is not allowed")
555 return self.pos
556
557 - def read(self, size=None):
558 """Return the next size number of bytes from the stream.
559 If size is not defined, return all bytes of the stream
560 up to EOF.
561 """
562 if size is None:
563 t = []
564 while True:
565 buf = self._read(self.bufsize)
566 if not buf:
567 break
568 t.append(buf)
569 buf = "".join(t)
570 else:
571 buf = self._read(size)
572 self.pos += len(buf)
573 return buf
574
576 """Return size bytes from the stream.
577 """
578 if self.comptype == "tar":
579 return self.__read(size)
580
581 c = len(self.dbuf)
582 t = [self.dbuf]
583 while c < size:
584 buf = self.__read(self.bufsize)
585 if not buf:
586 break
587 try:
588 buf = self.cmp.decompress(buf)
589 except IOError:
590 raise ReadError("invalid compressed data")
591 t.append(buf)
592 c += len(buf)
593 t = "".join(t)
594 self.dbuf = t[size:]
595 return t[:size]
596
598 """Return size bytes from stream. If internal buffer is empty,
599 read another block from the stream.
600 """
601 c = len(self.buf)
602 t = [self.buf]
603 while c < size:
604 buf = self.fileobj.read(self.bufsize)
605 if not buf:
606 break
607 t.append(buf)
608 c += len(buf)
609 t = "".join(t)
610 self.buf = t[size:]
611 return t[:size]
612
615 """Small proxy class that enables transparent compression
616 detection for the Stream interface (mode 'r|*').
617 """
618
620 self.fileobj = fileobj
621 self.buf = self.fileobj.read(BLOCKSIZE)
622
623 - def read(self, size):
624 self.read = self.fileobj.read
625 return self.buf
626
628 if self.buf.startswith("\037\213\010"):
629 return "gz"
630 if self.buf.startswith("BZh91"):
631 return "bz2"
632 return "tar"
633
636
639 """Small proxy class that enables external file object
640 support for "r:bz2" and "w:bz2" modes. This is actually
641 a workaround for a limitation in bz2 module's BZ2File
642 class which (unlike gzip.GzipFile) has no support for
643 a file object argument.
644 """
645
646 blocksize = 16 * 1024
647
649 self.fileobj = fileobj
650 self.mode = mode
651 self.name = getattr(self.fileobj, "name", None)
652 self.init()
653
655 import bz2
656 self.pos = 0
657 if self.mode == "r":
658 self.bz2obj = bz2.BZ2Decompressor()
659 self.fileobj.seek(0)
660 self.buf = ""
661 else:
662 self.bz2obj = bz2.BZ2Compressor()
663
664 - def read(self, size):
665 b = [self.buf]
666 x = len(self.buf)
667 while x < size:
668 raw = self.fileobj.read(self.blocksize)
669 if not raw:
670 break
671 data = self.bz2obj.decompress(raw)
672 b.append(data)
673 x += len(data)
674 self.buf = "".join(b)
675
676 buf = self.buf[:size]
677 self.buf = self.buf[size:]
678 self.pos += len(buf)
679 return buf
680
681 - def seek(self, pos):
682 if pos < self.pos:
683 self.init()
684 self.read(pos - self.pos)
685
688
690 self.pos += len(data)
691 raw = self.bz2obj.compress(data)
692 self.fileobj.write(raw)
693
695 if self.mode == "w":
696 raw = self.bz2obj.flush()
697 self.fileobj.write(raw)
698
704 """A thin wrapper around an existing file object that
705 provides a part of its data as an individual file
706 object.
707 """
708
709 - def __init__(self, fileobj, offset, size, sparse=None):
710 self.fileobj = fileobj
711 self.offset = offset
712 self.size = size
713 self.sparse = sparse
714 self.position = 0
715
717 """Return the current file position.
718 """
719 return self.position
720
721 - def seek(self, position):
722 """Seek to a position in the file.
723 """
724 self.position = position
725
726 - def read(self, size=None):
727 """Read data from the file.
728 """
729 if size is None:
730 size = self.size - self.position
731 else:
732 size = min(size, self.size - self.position)
733
734 if self.sparse is None:
735 return self.readnormal(size)
736 else:
737 return self.readsparse(size)
738
740 """Read operation for regular files.
741 """
742 self.fileobj.seek(self.offset + self.position)
743 self.position += size
744 return self.fileobj.read(size)
745
747 """Read operation for sparse files.
748 """
749 data = []
750 while size > 0:
751 buf = self.readsparsesection(size)
752 if not buf:
753 break
754 size -= len(buf)
755 data.append(buf)
756 return "".join(data)
757
759 """Read a single section of a sparse file.
760 """
761 section = self.sparse.find(self.position)
762
763 if section is None:
764 return ""
765
766 size = min(size, section.offset + section.size - self.position)
767
768 if isinstance(section, _data):
769 realpos = section.realpos + self.position - section.offset
770 self.fileobj.seek(self.offset + realpos)
771 self.position += size
772 return self.fileobj.read(size)
773 else:
774 self.position += size
775 return NUL * size
776
780 """File-like object for reading an archive member.
781 Is returned by TarFile.extractfile().
782 """
783 blocksize = 1024
784
786 self.fileobj = _FileInFile(tarfile.fileobj,
787 tarinfo.offset_data,
788 tarinfo.size,
789 getattr(tarinfo, "sparse", None))
790 self.name = tarinfo.name
791 self.mode = "r"
792 self.closed = False
793 self.size = tarinfo.size
794
795 self.position = 0
796 self.buffer = ""
797
798 - def read(self, size=None):
799 """Read at most size bytes from the file. If size is not
800 present or None, read all data until EOF is reached.
801 """
802 if self.closed:
803 raise ValueError("I/O operation on closed file")
804
805 buf = ""
806 if self.buffer:
807 if size is None:
808 buf = self.buffer
809 self.buffer = ""
810 else:
811 buf = self.buffer[:size]
812 self.buffer = self.buffer[size:]
813
814 if size is None:
815 buf += self.fileobj.read()
816 else:
817 buf += self.fileobj.read(size - len(buf))
818
819 self.position += len(buf)
820 return buf
821
823 """Read one entire line from the file. If size is present
824 and non-negative, return a string with at most that
825 size, which may be an incomplete line.
826 """
827 if self.closed:
828 raise ValueError("I/O operation on closed file")
829
830 if "\n" in self.buffer:
831 pos = self.buffer.find("\n") + 1
832 else:
833 buffers = [self.buffer]
834 while True:
835 buf = self.fileobj.read(self.blocksize)
836 buffers.append(buf)
837 if not buf or "\n" in buf:
838 self.buffer = "".join(buffers)
839 pos = self.buffer.find("\n") + 1
840 if pos == 0:
841
842 pos = len(self.buffer)
843 break
844
845 if size != -1:
846 pos = min(size, pos)
847
848 buf = self.buffer[:pos]
849 self.buffer = self.buffer[pos:]
850 self.position += len(buf)
851 return buf
852
854 """Return a list with all remaining lines.
855 """
856 result = []
857 while True:
858 line = self.readline()
859 if not line: break
860 result.append(line)
861 return result
862
864 """Return the current file position.
865 """
866 if self.closed:
867 raise ValueError("I/O operation on closed file")
868
869 return self.position
870
871 - def seek(self, pos, whence=0):
872 """Seek to a position in the file.
873 """
874 if self.closed:
875 raise ValueError("I/O operation on closed file")
876
877 if whence == 0:
878 self.position = min(max(pos, 0), self.size)
879 elif whence == 1:
880 if pos < 0:
881 self.position = max(self.position + pos, 0)
882 else:
883 self.position = min(self.position + pos, self.size)
884 elif whence == 2:
885 self.position = max(min(self.size + pos, self.size), 0)
886 else:
887 raise ValueError("Invalid argument")
888
889 self.buffer = ""
890 self.fileobj.seek(self.position)
891
893 """Close the file object.
894 """
895 self.closed = True
896
898 """Get an iterator over the file's lines.
899 """
900 while True:
901 line = self.readline()
902 if not line:
903 break
904 yield line
905
906
907
908
909
910 -class TarInfo(object):
911 """Informational class which holds the details about an
912 archive member given by a tar header block.
913 TarInfo objects are returned by TarFile.getmember(),
914 TarFile.getmembers() and TarFile.gettarinfo() and are
915 usually created internally.
916 """
917
919 """Construct a TarInfo object. name is the optional name
920 of the member.
921 """
922 self.name = name
923 self.mode = 0644
924 self.uid = 0
925 self.gid = 0
926 self.size = 0
927 self.mtime = 0
928 self.chksum = 0
929 self.type = REGTYPE
930 self.linkname = ""
931 self.uname = ""
932 self.gname = ""
933 self.devmajor = 0
934 self.devminor = 0
935
936 self.offset = 0
937 self.offset_data = 0
938
939 self.pax_headers = {}
940
941
942
947 path = property(_getpath, _setpath)
948
952 self.linkname = linkname
953 linkpath = property(_getlinkpath, _setlinkpath)
954
956 return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
957
959 """Return the TarInfo's attributes as a dictionary.
960 """
961 info = {
962 "name": self.name,
963 "mode": self.mode & 07777,
964 "uid": self.uid,
965 "gid": self.gid,
966 "size": self.size,
967 "mtime": self.mtime,
968 "chksum": self.chksum,
969 "type": self.type,
970 "linkname": self.linkname,
971 "uname": self.uname,
972 "gname": self.gname,
973 "devmajor": self.devmajor,
974 "devminor": self.devminor
975 }
976
977 if info["type"] == DIRTYPE and not info["name"].endswith("/"):
978 info["name"] += "/"
979
980 for key in ("name", "linkname", "uname", "gname"):
981 if type(info[key]) is unicode:
982 info[key] = info[key].encode(encoding, errors)
983
984 return info
985
999
1012
1026
1028 """Return the object as a ustar header block. If it cannot be
1029 represented this way, prepend a pax extended header sequence
1030 with supplement information.
1031 """
1032 info["magic"] = POSIX_MAGIC
1033 pax_headers = self.pax_headers.copy()
1034
1035
1036
1037 for name, hname, length in (
1038 ("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1039 ("uname", "uname", 32), ("gname", "gname", 32)):
1040
1041 if hname in pax_headers:
1042
1043 continue
1044
1045 val = info[name].decode(encoding, errors)
1046
1047
1048 try:
1049 val.encode("ascii")
1050 except UnicodeEncodeError:
1051 pax_headers[hname] = val
1052 continue
1053
1054 if len(info[name]) > length:
1055 pax_headers[hname] = val
1056
1057
1058
1059 for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1060 if name in pax_headers:
1061
1062 info[name] = 0
1063 continue
1064
1065 val = info[name]
1066 if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1067 pax_headers[name] = unicode(val)
1068 info[name] = 0
1069
1070
1071 if pax_headers:
1072 buf = self._create_pax_generic_header(pax_headers)
1073 else:
1074 buf = ""
1075
1076 return buf + self._create_header(info, USTAR_FORMAT)
1077
1078 @classmethod
1083
1085 """Split a name longer than 100 chars into a prefix
1086 and a name part.
1087 """
1088 prefix = name[:LENGTH_PREFIX + 1]
1089 while prefix and prefix[-1] != "/":
1090 prefix = prefix[:-1]
1091
1092 name = name[len(prefix):]
1093 prefix = prefix[:-1]
1094
1095 if not prefix or len(name) > LENGTH_NAME:
1096 raise ValueError("name is too long")
1097 return prefix, name
1098
1099 @staticmethod
1101 """Return a header block. info is a dictionary with file
1102 information, format must be one of the *_FORMAT constants.
1103 """
1104 parts = [
1105 stn(info.get("name", ""), 100),
1106 itn(info.get("mode", 0) & 07777, 8, format),
1107 itn(info.get("uid", 0), 8, format),
1108 itn(info.get("gid", 0), 8, format),
1109 itn(info.get("size", 0), 12, format),
1110 itn(info.get("mtime", 0), 12, format),
1111 " ",
1112 info.get("type", REGTYPE),
1113 stn(info.get("linkname", ""), 100),
1114 stn(info.get("magic", POSIX_MAGIC), 8),
1115 stn(info.get("uname", ""), 32),
1116 stn(info.get("gname", ""), 32),
1117 itn(info.get("devmajor", 0), 8, format),
1118 itn(info.get("devminor", 0), 8, format),
1119 stn(info.get("prefix", ""), 155)
1120 ]
1121
1122 buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1123 chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1124 buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1125 return buf
1126
1127 @staticmethod
1129 """Return the string payload filled with zero bytes
1130 up to the next 512 byte border.
1131 """
1132 blocks, remainder = divmod(len(payload), BLOCKSIZE)
1133 if remainder > 0:
1134 payload += (BLOCKSIZE - remainder) * NUL
1135 return payload
1136
1137 @classmethod
1139 """Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1140 for name.
1141 """
1142 name += NUL
1143
1144 info = {}
1145 info["name"] = "././@LongLink"
1146 info["type"] = type
1147 info["size"] = len(name)
1148 info["magic"] = GNU_MAGIC
1149
1150
1151 return cls._create_header(info, USTAR_FORMAT) + \
1152 cls._create_payload(name)
1153
1154 @classmethod
1156 """Return a POSIX.1-2001 extended or global header sequence
1157 that contains a list of keyword, value pairs. The values
1158 must be unicode objects.
1159 """
1160 records = []
1161 for keyword, value in pax_headers.iteritems():
1162 keyword = keyword.encode("utf8")
1163 value = value.encode("utf8")
1164 l = len(keyword) + len(value) + 3
1165 n = p = 0
1166 while True:
1167 n = l + len(str(p))
1168 if n == p:
1169 break
1170 p = n
1171 records.append("%d %s=%s\n" % (p, keyword, value))
1172 records = "".join(records)
1173
1174
1175
1176 info = {}
1177 info["name"] = "././@PaxHeader"
1178 info["type"] = type
1179 info["size"] = len(records)
1180 info["magic"] = POSIX_MAGIC
1181
1182
1183 return cls._create_header(info, USTAR_FORMAT) + \
1184 cls._create_payload(records)
1185
1186 @classmethod
1188 """Construct a TarInfo object from a 512 byte string buffer.
1189 """
1190 if len(buf) == 0:
1191 raise EmptyHeaderError("empty header")
1192 if len(buf) != BLOCKSIZE:
1193 raise TruncatedHeaderError("truncated header")
1194 if buf.count(NUL) == BLOCKSIZE:
1195 raise EOFHeaderError("end of file header")
1196
1197 chksum = nti(buf[148:156])
1198 if chksum not in calc_chksums(buf):
1199 raise InvalidHeaderError("bad checksum")
1200
1201 obj = cls()
1202 obj.buf = buf
1203 obj.name = nts(buf[0:100])
1204 obj.mode = nti(buf[100:108])
1205 obj.uid = nti(buf[108:116])
1206 obj.gid = nti(buf[116:124])
1207 obj.size = nti(buf[124:136])
1208 obj.mtime = nti(buf[136:148])
1209 obj.chksum = chksum
1210 obj.type = buf[156:157]
1211 obj.linkname = nts(buf[157:257])
1212 obj.uname = nts(buf[265:297])
1213 obj.gname = nts(buf[297:329])
1214 obj.devmajor = nti(buf[329:337])
1215 obj.devminor = nti(buf[337:345])
1216 prefix = nts(buf[345:500])
1217
1218
1219
1220 if obj.type == AREGTYPE and obj.name.endswith("/"):
1221 obj.type = DIRTYPE
1222
1223
1224 if obj.isdir():
1225 obj.name = obj.name.rstrip("/")
1226
1227
1228 if prefix and obj.type not in GNU_TYPES:
1229 obj.name = prefix + "/" + obj.name
1230 return obj
1231
1232 @classmethod
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1265
1282
1304
1306 """Process a GNU sparse header plus extra headers.
1307 """
1308 buf = self.buf
1309 sp = _ringbuffer()
1310 pos = 386
1311 lastpos = 0L
1312 realpos = 0L
1313
1314
1315 for i in xrange(4):
1316 try:
1317 offset = nti(buf[pos:pos + 12])
1318 numbytes = nti(buf[pos + 12:pos + 24])
1319 except ValueError:
1320 break
1321 if offset > lastpos:
1322 sp.append(_hole(lastpos, offset - lastpos))
1323 sp.append(_data(offset, numbytes, realpos))
1324 realpos += numbytes
1325 lastpos = offset + numbytes
1326 pos += 24
1327
1328 isextended = ord(buf[482])
1329 origsize = nti(buf[483:495])
1330
1331
1332
1333 while isextended == 1:
1334 buf = tarfile.fileobj.read(BLOCKSIZE)
1335 pos = 0
1336 for i in xrange(21):
1337 try:
1338 offset = nti(buf[pos:pos + 12])
1339 numbytes = nti(buf[pos + 12:pos + 24])
1340 except ValueError:
1341 break
1342 if offset > lastpos:
1343 sp.append(_hole(lastpos, offset - lastpos))
1344 sp.append(_data(offset, numbytes, realpos))
1345 realpos += numbytes
1346 lastpos = offset + numbytes
1347 pos += 24
1348 isextended = ord(buf[504])
1349
1350 if lastpos < origsize:
1351 sp.append(_hole(lastpos, origsize - lastpos))
1352
1353 self.sparse = sp
1354
1355 self.offset_data = tarfile.fileobj.tell()
1356 tarfile.offset = self.offset_data + self._block(self.size)
1357 self.size = origsize
1358
1359 return self
1360
1362 """Process an extended or global header as described in
1363 POSIX.1-2001.
1364 """
1365
1366 buf = tarfile.fileobj.read(self._block(self.size))
1367
1368
1369
1370
1371 if self.type == XGLTYPE:
1372 pax_headers = tarfile.pax_headers
1373 else:
1374 pax_headers = tarfile.pax_headers.copy()
1375
1376
1377
1378
1379
1380 regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1381 pos = 0
1382 while True:
1383 match = regex.match(buf, pos)
1384 if not match:
1385 break
1386
1387 length, keyword = match.groups()
1388 length = int(length)
1389 value = buf[match.end(2) + 1:match.start(1) + length - 1]
1390
1391 keyword = keyword.decode("utf8")
1392 value = value.decode("utf8")
1393
1394 pax_headers[keyword] = value
1395 pos += length
1396
1397
1398 try:
1399 next = self.fromtarfile(tarfile)
1400 except HeaderError:
1401 raise SubsequentHeaderError("missing or bad subsequent header")
1402
1403 if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1404
1405 next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1406 next.offset = self.offset
1407
1408 if "size" in pax_headers:
1409
1410
1411
1412 offset = next.offset_data
1413 if next.isreg() or next.type not in SUPPORTED_TYPES:
1414 offset += next._block(next.size)
1415 tarfile.offset = offset
1416
1417 return next
1418
1420 """Replace fields with supplemental information from a previous
1421 pax extended or global header.
1422 """
1423 for keyword, value in pax_headers.iteritems():
1424 if keyword not in PAX_FIELDS:
1425 continue
1426
1427 if keyword == "path":
1428 value = value.rstrip("/")
1429
1430 if keyword in PAX_NUMBER_FIELDS:
1431 try:
1432 value = PAX_NUMBER_FIELDS[keyword](value)
1433 except ValueError:
1434 value = 0
1435 else:
1436 value = uts(value, encoding, errors)
1437
1438 setattr(self, keyword, value)
1439
1440 self.pax_headers = pax_headers.copy()
1441
1443 """Round up a byte count by BLOCKSIZE and return it,
1444 e.g. _block(834) => 1024.
1445 """
1446 blocks, remainder = divmod(count, BLOCKSIZE)
1447 if remainder:
1448 blocks += 1
1449 return blocks * BLOCKSIZE
1450
1471
1474 """The TarFile Class provides an interface to tar archives.
1475 """
1476
1477 debug = 0
1478
1479 dereference = False
1480
1481
1482 ignore_zeros = False
1483
1484
1485 errorlevel = 1
1486
1487
1488
1489 format = DEFAULT_FORMAT
1490
1491 encoding = ENCODING
1492
1493 errors = None
1494
1495 tarinfo = TarInfo
1496
1497 fileobject = ExFileObject
1498
1499 - def __init__(self, name=None, mode="r", fileobj=None, format=None,
1500 tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1501 errors=None, pax_headers=None, debug=None, errorlevel=None):
1502 """Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1503 read from an existing archive, 'a' to append data to an existing
1504 file or 'w' to create a new file overwriting an existing one. `mode'
1505 defaults to 'r'.
1506 If `fileobj' is given, it is used for reading or writing data. If it
1507 can be determined, `mode' is overridden by `fileobj's mode.
1508 `fileobj' is not closed, when TarFile is closed.
1509 """
1510 if len(mode) > 1 or mode not in "raw":
1511 raise ValueError("mode must be 'r', 'a' or 'w'")
1512 self.mode = mode
1513 self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1514
1515 if not fileobj:
1516 if self.mode == "a" and not os.path.exists(name):
1517
1518 self.mode = "w"
1519 self._mode = "wb"
1520 fileobj = bltn_open(name, self._mode)
1521 self._extfileobj = False
1522 else:
1523 if name is None and hasattr(fileobj, "name"):
1524 name = fileobj.name
1525 if hasattr(fileobj, "mode"):
1526 self._mode = fileobj.mode
1527 self._extfileobj = True
1528 if name:
1529 self.name = os.path.abspath(name)
1530 else:
1531 self.name = None
1532 self.fileobj = fileobj
1533
1534
1535 if format is not None:
1536 self.format = format
1537 if tarinfo is not None:
1538 self.tarinfo = tarinfo
1539 if dereference is not None:
1540 self.dereference = dereference
1541 if ignore_zeros is not None:
1542 self.ignore_zeros = ignore_zeros
1543 if encoding is not None:
1544 self.encoding = encoding
1545
1546 if errors is not None:
1547 self.errors = errors
1548 elif mode == "r":
1549 self.errors = "utf-8"
1550 else:
1551 self.errors = "strict"
1552
1553 if pax_headers is not None and self.format == PAX_FORMAT:
1554 self.pax_headers = pax_headers
1555 else:
1556 self.pax_headers = {}
1557
1558 if debug is not None:
1559 self.debug = debug
1560 if errorlevel is not None:
1561 self.errorlevel = errorlevel
1562
1563
1564 self.closed = False
1565 self.members = []
1566 self._loaded = False
1567 self.offset = self.fileobj.tell()
1568
1569 self.inodes = {}
1570
1571
1572 try:
1573 if self.mode == "r":
1574 self.firstmember = None
1575 self.firstmember = self.next()
1576
1577 if self.mode == "a":
1578
1579
1580 while True:
1581 self.fileobj.seek(self.offset)
1582 try:
1583 tarinfo = self.tarinfo.fromtarfile(self)
1584 self.members.append(tarinfo)
1585 except EOFHeaderError:
1586 self.fileobj.seek(self.offset)
1587 break
1588 except HeaderError, e:
1589 raise ReadError(str(e))
1590
1591 if self.mode in "aw":
1592 self._loaded = True
1593
1594 if self.pax_headers:
1595 buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1596 self.fileobj.write(buf)
1597 self.offset += len(buf)
1598 except:
1599 if not self._extfileobj:
1600 self.fileobj.close()
1601 self.closed = True
1602 raise
1603
1607 import warnings
1608 warnings.warn("use the format attribute instead", DeprecationWarning,
1609 2)
1610 if value:
1611 self.format = USTAR_FORMAT
1612 else:
1613 self.format = GNU_FORMAT
1614 posix = property(_getposix, _setposix)
1615
1616
1617
1618
1619
1620
1621
1622
1623
1624
1625
1626
1627 @classmethod
1628 - def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1629 """Open a tar archive for reading, writing or appending. Return
1630 an appropriate TarFile class.
1631
1632 mode:
1633 'r' or 'r:*' open for reading with transparent compression
1634 'r:' open for reading exclusively uncompressed
1635 'r:gz' open for reading with gzip compression
1636 'r:bz2' open for reading with bzip2 compression
1637 'a' or 'a:' open for appending, creating the file if necessary
1638 'w' or 'w:' open for writing without compression
1639 'w:gz' open for writing with gzip compression
1640 'w:bz2' open for writing with bzip2 compression
1641
1642 'r|*' open a stream of tar blocks with transparent compression
1643 'r|' open an uncompressed stream of tar blocks for reading
1644 'r|gz' open a gzip compressed stream of tar blocks
1645 'r|bz2' open a bzip2 compressed stream of tar blocks
1646 'w|' open an uncompressed stream for writing
1647 'w|gz' open a gzip compressed stream for writing
1648 'w|bz2' open a bzip2 compressed stream for writing
1649 """
1650
1651 if not name and not fileobj:
1652 raise ValueError("nothing to open")
1653
1654 if mode in ("r", "r:*"):
1655
1656 for comptype in cls.OPEN_METH:
1657 func = getattr(cls, cls.OPEN_METH[comptype])
1658 if fileobj is not None:
1659 saved_pos = fileobj.tell()
1660 try:
1661 return func(name, "r", fileobj, **kwargs)
1662 except (ReadError, CompressionError), e:
1663 if fileobj is not None:
1664 fileobj.seek(saved_pos)
1665 continue
1666 raise ReadError("file could not be opened successfully")
1667
1668 elif ":" in mode:
1669 filemode, comptype = mode.split(":", 1)
1670 filemode = filemode or "r"
1671 comptype = comptype or "tar"
1672
1673
1674
1675 if comptype in cls.OPEN_METH:
1676 func = getattr(cls, cls.OPEN_METH[comptype])
1677 else:
1678 raise CompressionError("unknown compression type %r" % comptype)
1679 return func(name, filemode, fileobj, **kwargs)
1680
1681 elif "|" in mode:
1682 filemode, comptype = mode.split("|", 1)
1683 filemode = filemode or "r"
1684 comptype = comptype or "tar"
1685
1686 if filemode not in "rw":
1687 raise ValueError("mode must be 'r' or 'w'")
1688
1689 t = cls(name, filemode,
1690 _Stream(name, filemode, comptype, fileobj, bufsize),
1691 **kwargs)
1692 t._extfileobj = False
1693 return t
1694
1695 elif mode in "aw":
1696 return cls.taropen(name, mode, fileobj, **kwargs)
1697
1698 raise ValueError("undiscernible mode")
1699
1700 @classmethod
1701 - def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1702 """Open uncompressed tar archive name for reading or writing.
1703 """
1704 if len(mode) > 1 or mode not in "raw":
1705 raise ValueError("mode must be 'r', 'a' or 'w'")
1706 return cls(name, mode, fileobj, **kwargs)
1707
1708 @classmethod
1709 - def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1710 """Open gzip compressed tar archive name for reading or writing.
1711 Appending is not allowed.
1712 """
1713 if len(mode) > 1 or mode not in "rw":
1714 raise ValueError("mode must be 'r' or 'w'")
1715
1716 try:
1717 import gzip
1718 gzip.GzipFile
1719 except (ImportError, AttributeError):
1720 raise CompressionError("gzip module is not available")
1721
1722 if fileobj is None:
1723 fileobj = bltn_open(name, mode + "b")
1724
1725 try:
1726 t = cls.taropen(name, mode,
1727 gzip.GzipFile(name, mode, compresslevel, fileobj),
1728 **kwargs)
1729 except IOError:
1730 raise ReadError("not a gzip file")
1731 t._extfileobj = False
1732 return t
1733
1734 @classmethod
1735 - def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1736 """Open bzip2 compressed tar archive name for reading or writing.
1737 Appending is not allowed.
1738 """
1739 if len(mode) > 1 or mode not in "rw":
1740 raise ValueError("mode must be 'r' or 'w'.")
1741
1742 try:
1743 import bz2
1744 except ImportError:
1745 raise CompressionError("bz2 module is not available")
1746
1747 if fileobj is not None:
1748 fileobj = _BZ2Proxy(fileobj, mode)
1749 else:
1750 fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
1751
1752 try:
1753 t = cls.taropen(name, mode, fileobj, **kwargs)
1754 except (IOError, EOFError):
1755 raise ReadError("not a bzip2 file")
1756 t._extfileobj = False
1757 return t
1758
1759
1760 OPEN_METH = {
1761 "tar": "taropen",
1762 "gz": "gzopen",
1763 "bz2": "bz2open"
1764 }
1765
1766
1767
1768
1770 """Close the TarFile. In write-mode, two finishing zero blocks are
1771 appended to the archive.
1772 """
1773 if self.closed:
1774 return
1775
1776 if self.mode in "aw":
1777 self.fileobj.write(NUL * (BLOCKSIZE * 2))
1778 self.offset += (BLOCKSIZE * 2)
1779
1780
1781 blocks, remainder = divmod(self.offset, RECORDSIZE)
1782 if remainder > 0:
1783 self.fileobj.write(NUL * (RECORDSIZE - remainder))
1784
1785 if not self._extfileobj:
1786 self.fileobj.close()
1787 self.closed = True
1788
1790 """Return a TarInfo object for member `name'. If `name' can not be
1791 found in the archive, KeyError is raised. If a member occurs more
1792 than once in the archive, its last occurrence is assumed to be the
1793 most up-to-date version.
1794 """
1795 tarinfo = self._getmember(name)
1796 if tarinfo is None:
1797 raise KeyError("filename %r not found" % name)
1798 return tarinfo
1799
1801 """Return the members of the archive as a list of TarInfo objects. The
1802 list has the same order as the members in the archive.
1803 """
1804 self._check()
1805 if not self._loaded:
1806 self._load()
1807
1808 return self.members
1809
1811 """Return the members of the archive as a list of their names. It has
1812 the same order as the list returned by getmembers().
1813 """
1814 return [tarinfo.name for tarinfo in self.getmembers()]
1815
1816 - def gettarinfo(self, name=None, arcname=None, fileobj=None):
1817 """Create a TarInfo object for either the file `name' or the file
1818 object `fileobj' (using os.fstat on its file descriptor). You can
1819 modify some of the TarInfo's attributes before you add it using
1820 addfile(). If given, `arcname' specifies an alternative name for the
1821 file in the archive.
1822 """
1823 self._check("aw")
1824
1825
1826
1827 if fileobj is not None:
1828 name = fileobj.name
1829
1830
1831
1832
1833 if arcname is None:
1834 arcname = name
1835 drv, arcname = os.path.splitdrive(arcname)
1836 arcname = arcname.replace(os.sep, "/")
1837 arcname = arcname.lstrip("/")
1838
1839
1840
1841 tarinfo = self.tarinfo()
1842 tarinfo.tarfile = self
1843
1844
1845
1846 if fileobj is None:
1847 if hasattr(os, "lstat") and not self.dereference:
1848 statres = os.lstat(name)
1849 else:
1850 statres = os.stat(name)
1851 else:
1852 statres = os.fstat(fileobj.fileno())
1853 linkname = ""
1854
1855 stmd = statres.st_mode
1856 if stat.S_ISREG(stmd):
1857 inode = (statres.st_ino, statres.st_dev)
1858 if not self.dereference and statres.st_nlink > 1 and \
1859 inode in self.inodes and arcname != self.inodes[inode]:
1860
1861
1862 type = LNKTYPE
1863 linkname = self.inodes[inode]
1864 else:
1865
1866
1867 type = REGTYPE
1868 if inode[0]:
1869 self.inodes[inode] = arcname
1870 elif stat.S_ISDIR(stmd):
1871 type = DIRTYPE
1872 elif stat.S_ISFIFO(stmd):
1873 type = FIFOTYPE
1874 elif stat.S_ISLNK(stmd):
1875 type = SYMTYPE
1876 linkname = os.readlink(name)
1877 elif stat.S_ISCHR(stmd):
1878 type = CHRTYPE
1879 elif stat.S_ISBLK(stmd):
1880 type = BLKTYPE
1881 else:
1882 return None
1883
1884
1885
1886 tarinfo.name = arcname
1887 tarinfo.mode = stmd
1888 tarinfo.uid = statres.st_uid
1889 tarinfo.gid = statres.st_gid
1890 if type == REGTYPE:
1891 tarinfo.size = statres.st_size
1892 else:
1893 tarinfo.size = 0L
1894 tarinfo.mtime = statres.st_mtime
1895 tarinfo.type = type
1896 tarinfo.linkname = linkname
1897 if pwd:
1898 try:
1899 tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1900 except KeyError:
1901 pass
1902 if grp:
1903 try:
1904 tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1905 except KeyError:
1906 pass
1907
1908 if type in (CHRTYPE, BLKTYPE):
1909 if hasattr(os, "major") and hasattr(os, "minor"):
1910 tarinfo.devmajor = os.major(statres.st_rdev)
1911 tarinfo.devminor = os.minor(statres.st_rdev)
1912 return tarinfo
1913
1914 - def list(self, verbose=True):
1915 """Print a table of contents to sys.stdout. If `verbose' is False, only
1916 the names of the members are printed. If it is True, an `ls -l'-like
1917 output is produced.
1918 """
1919 self._check()
1920
1921 for tarinfo in self:
1922 if verbose:
1923 print filemode(tarinfo.mode),
1924 print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1925 tarinfo.gname or tarinfo.gid),
1926 if tarinfo.ischr() or tarinfo.isblk():
1927 print "%10s" % ("%d,%d" \
1928 % (tarinfo.devmajor, tarinfo.devminor)),
1929 else:
1930 print "%10d" % tarinfo.size,
1931 print "%d-%02d-%02d %02d:%02d:%02d" \
1932 % time.localtime(tarinfo.mtime)[:6],
1933
1934 if tarinfo.isdir():
1935 print tarinfo.name + "/",
1936 else:
1937 print tarinfo.name,
1938
1939 if verbose:
1940 if tarinfo.issym():
1941 print "->", tarinfo.linkname,
1942 if tarinfo.islnk():
1943 print "link to", tarinfo.linkname,
1944 print
1945
1946 - def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
1947 """Add the file `name' to the archive. `name' may be any type of file
1948 (directory, fifo, symbolic link, etc.). If given, `arcname'
1949 specifies an alternative name for the file in the archive.
1950 Directories are added recursively by default. This can be avoided by
1951 setting `recursive' to False. `exclude' is a function that should
1952 return True for each filename to be excluded. `filter' is a function
1953 that expects a TarInfo object argument and returns the changed
1954 TarInfo object, if it returns None the TarInfo object will be
1955 excluded from the archive.
1956 """
1957 self._check("aw")
1958
1959 if arcname is None:
1960 arcname = name
1961
1962
1963 if exclude is not None:
1964 import warnings
1965 warnings.warn("use the filter argument instead",
1966 DeprecationWarning, 2)
1967 if exclude(name):
1968 self._dbg(2, "tarfile: Excluded %r" % name)
1969 return
1970
1971
1972 if self.name is not None and os.path.abspath(name) == self.name:
1973 self._dbg(2, "tarfile: Skipped %r" % name)
1974 return
1975
1976 self._dbg(1, name)
1977
1978
1979 tarinfo = self.gettarinfo(name, arcname)
1980
1981 if tarinfo is None:
1982 self._dbg(1, "tarfile: Unsupported type %r" % name)
1983 return
1984
1985
1986 if filter is not None:
1987 tarinfo = filter(tarinfo)
1988 if tarinfo is None:
1989 self._dbg(2, "tarfile: Excluded %r" % name)
1990 return
1991
1992
1993 if tarinfo.isreg():
1994 f = bltn_open(name, "rb")
1995 self.addfile(tarinfo, f)
1996 f.close()
1997
1998 elif tarinfo.isdir():
1999 self.addfile(tarinfo)
2000 if recursive:
2001 for f in os.listdir(name):
2002 self.add(os.path.join(name, f), os.path.join(arcname, f),
2003 recursive, exclude, filter)
2004
2005 else:
2006 self.addfile(tarinfo)
2007
2008 - def addfile(self, tarinfo, fileobj=None):
2009 """Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2010 given, tarinfo.size bytes are read from it and added to the archive.
2011 You can create TarInfo objects using gettarinfo().
2012 On Windows platforms, `fileobj' should always be opened with mode
2013 'rb' to avoid irritation about the file size.
2014 """
2015 self._check("aw")
2016
2017 tarinfo = copy.copy(tarinfo)
2018
2019 buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2020 self.fileobj.write(buf)
2021 self.offset += len(buf)
2022
2023
2024 if fileobj is not None:
2025 copyfileobj(fileobj, self.fileobj, tarinfo.size)
2026 blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2027 if remainder > 0:
2028 self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2029 blocks += 1
2030 self.offset += blocks * BLOCKSIZE
2031
2032 self.members.append(tarinfo)
2033
2035 """Extract all members from the archive to the current working
2036 directory and set owner, modification time and permissions on
2037 directories afterwards. `path' specifies a different directory
2038 to extract to. `members' is optional and must be a subset of the
2039 list returned by getmembers().
2040 """
2041 directories = []
2042
2043 if members is None:
2044 members = self
2045
2046 for tarinfo in members:
2047 if tarinfo.isdir():
2048
2049 directories.append(tarinfo)
2050 tarinfo = copy.copy(tarinfo)
2051 tarinfo.mode = 0700
2052 self.extract(tarinfo, path)
2053
2054
2055 directories.sort(key=operator.attrgetter('name'))
2056 directories.reverse()
2057
2058
2059 for tarinfo in directories:
2060 dirpath = os.path.join(path, tarinfo.name)
2061 try:
2062 self.chown(tarinfo, dirpath)
2063 self.utime(tarinfo, dirpath)
2064 self.chmod(tarinfo, dirpath)
2065 except ExtractError, e:
2066 if self.errorlevel > 1:
2067 raise
2068 else:
2069 self._dbg(1, "tarfile: %s" % e)
2070
2072 """Extract a member from the archive to the current working directory,
2073 using its full name. Its file information is extracted as accurately
2074 as possible. `member' may be a filename or a TarInfo object. You can
2075 specify a different directory using `path'.
2076 """
2077 self._check("r")
2078
2079 if isinstance(member, basestring):
2080 tarinfo = self.getmember(member)
2081 else:
2082 tarinfo = member
2083
2084
2085 if tarinfo.islnk():
2086 tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2087
2088 try:
2089 self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2090 except EnvironmentError, e:
2091 if self.errorlevel > 0:
2092 raise
2093 else:
2094 if e.filename is None:
2095 self._dbg(1, "tarfile: %s" % e.strerror)
2096 else:
2097 self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2098 except ExtractError, e:
2099 if self.errorlevel > 1:
2100 raise
2101 else:
2102 self._dbg(1, "tarfile: %s" % e)
2103
2105 """Extract a member from the archive as a file object. `member' may be
2106 a filename or a TarInfo object. If `member' is a regular file, a
2107 file-like object is returned. If `member' is a link, a file-like
2108 object is constructed from the link's target. If `member' is none of
2109 the above, None is returned.
2110 The file-like object is read-only and provides the following
2111 methods: read(), readline(), readlines(), seek() and tell()
2112 """
2113 self._check("r")
2114
2115 if isinstance(member, basestring):
2116 tarinfo = self.getmember(member)
2117 else:
2118 tarinfo = member
2119
2120 if tarinfo.isreg():
2121 return self.fileobject(self, tarinfo)
2122
2123 elif tarinfo.type not in SUPPORTED_TYPES:
2124
2125
2126 return self.fileobject(self, tarinfo)
2127
2128 elif tarinfo.islnk() or tarinfo.issym():
2129 if isinstance(self.fileobj, _Stream):
2130
2131
2132
2133 raise StreamError("cannot extract (sym)link as file object")
2134 else:
2135
2136 return self.extractfile(self._find_link_target(tarinfo))
2137 else:
2138
2139
2140 return None
2141
2143 """Extract the TarInfo object tarinfo to a physical
2144 file called targetpath.
2145 """
2146
2147
2148
2149 targetpath = targetpath.rstrip("/")
2150 targetpath = targetpath.replace("/", os.sep)
2151
2152
2153 upperdirs = os.path.dirname(targetpath)
2154 if upperdirs and not os.path.exists(upperdirs):
2155
2156
2157 os.makedirs(upperdirs)
2158
2159 if tarinfo.islnk() or tarinfo.issym():
2160 self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2161 else:
2162 self._dbg(1, tarinfo.name)
2163
2164 if tarinfo.isreg():
2165 self.makefile(tarinfo, targetpath)
2166 elif tarinfo.isdir():
2167 self.makedir(tarinfo, targetpath)
2168 elif tarinfo.isfifo():
2169 self.makefifo(tarinfo, targetpath)
2170 elif tarinfo.ischr() or tarinfo.isblk():
2171 self.makedev(tarinfo, targetpath)
2172 elif tarinfo.islnk() or tarinfo.issym():
2173 self.makelink(tarinfo, targetpath)
2174 elif tarinfo.type not in SUPPORTED_TYPES:
2175 self.makeunknown(tarinfo, targetpath)
2176 else:
2177 self.makefile(tarinfo, targetpath)
2178
2179 self.chown(tarinfo, targetpath)
2180 if not tarinfo.issym():
2181 self.chmod(tarinfo, targetpath)
2182 self.utime(tarinfo, targetpath)
2183
2184
2185
2186
2187
2188
2189 - def makedir(self, tarinfo, targetpath):
2190 """Make a directory called targetpath.
2191 """
2192 try:
2193
2194
2195 os.mkdir(targetpath, 0700)
2196 except EnvironmentError, e:
2197 if e.errno != errno.EEXIST:
2198 raise
2199
2200 - def makefile(self, tarinfo, targetpath):
2201 """Make a file called targetpath.
2202 """
2203 source = self.extractfile(tarinfo)
2204 target = bltn_open(targetpath, "wb")
2205 copyfileobj(source, target)
2206 source.close()
2207 target.close()
2208
2210 """Make a file from a TarInfo object with an unknown type
2211 at targetpath.
2212 """
2213 self.makefile(tarinfo, targetpath)
2214 self._dbg(1, "tarfile: Unknown file type %r, " \
2215 "extracted as regular file." % tarinfo.type)
2216
2217 - def makefifo(self, tarinfo, targetpath):
2218 """Make a fifo called targetpath.
2219 """
2220 if hasattr(os, "mkfifo"):
2221 os.mkfifo(targetpath)
2222 else:
2223 raise ExtractError("fifo not supported by system")
2224
2225 - def makedev(self, tarinfo, targetpath):
2226 """Make a character or block device called targetpath.
2227 """
2228 if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2229 raise ExtractError("special devices not supported by system")
2230
2231 mode = tarinfo.mode
2232 if tarinfo.isblk():
2233 mode |= stat.S_IFBLK
2234 else:
2235 mode |= stat.S_IFCHR
2236
2237 os.mknod(targetpath, mode,
2238 os.makedev(tarinfo.devmajor, tarinfo.devminor))
2239
2240 - def makelink(self, tarinfo, targetpath):
2241 """Make a (symbolic) link called targetpath. If it cannot be created
2242 (platform limitation), we try to make a copy of the referenced file
2243 instead of a link.
2244 """
2245 if hasattr(os, "symlink") and hasattr(os, "link"):
2246
2247 if tarinfo.issym():
2248 os.symlink(tarinfo.linkname, targetpath)
2249 else:
2250
2251 if os.path.exists(tarinfo._link_target):
2252 os.link(tarinfo._link_target, targetpath)
2253 else:
2254 self._extract_member(self._find_link_target(tarinfo), targetpath)
2255 else:
2256 try:
2257 self._extract_member(self._find_link_target(tarinfo), targetpath)
2258 except KeyError:
2259 raise ExtractError("unable to resolve link inside archive")
2260
2261 - def chown(self, tarinfo, targetpath):
2262 """Set owner of targetpath according to tarinfo.
2263 """
2264 if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2265
2266 try:
2267 g = grp.getgrnam(tarinfo.gname)[2]
2268 except KeyError:
2269 try:
2270 g = grp.getgrgid(tarinfo.gid)[2]
2271 except KeyError:
2272 g = os.getgid()
2273 try:
2274 u = pwd.getpwnam(tarinfo.uname)[2]
2275 except KeyError:
2276 try:
2277 u = pwd.getpwuid(tarinfo.uid)[2]
2278 except KeyError:
2279 u = os.getuid()
2280 try:
2281 if tarinfo.issym() and hasattr(os, "lchown"):
2282 os.lchown(targetpath, u, g)
2283 else:
2284 if sys.platform != "os2emx":
2285 os.chown(targetpath, u, g)
2286 except EnvironmentError, e:
2287 raise ExtractError("could not change owner")
2288
2289 - def chmod(self, tarinfo, targetpath):
2290 """Set file permissions of targetpath according to tarinfo.
2291 """
2292 if hasattr(os, 'chmod'):
2293 try:
2294 os.chmod(targetpath, tarinfo.mode)
2295 except EnvironmentError, e:
2296 raise ExtractError("could not change mode")
2297
2298 - def utime(self, tarinfo, targetpath):
2299 """Set modification time of targetpath according to tarinfo.
2300 """
2301 if not hasattr(os, 'utime'):
2302 return
2303 try:
2304 os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2305 except EnvironmentError, e:
2306 raise ExtractError("could not change modification time")
2307
2308
2310 """Return the next member of the archive as a TarInfo object, when
2311 TarFile is opened for reading. Return None if there is no more
2312 available.
2313 """
2314 self._check("ra")
2315 if self.firstmember is not None:
2316 m = self.firstmember
2317 self.firstmember = None
2318 return m
2319
2320
2321 self.fileobj.seek(self.offset)
2322 tarinfo = None
2323 while True:
2324 try:
2325 tarinfo = self.tarinfo.fromtarfile(self)
2326 except EOFHeaderError, e:
2327 if self.ignore_zeros:
2328 self._dbg(2, "0x%X: %s" % (self.offset, e))
2329 self.offset += BLOCKSIZE
2330 continue
2331 except InvalidHeaderError, e:
2332 if self.ignore_zeros:
2333 self._dbg(2, "0x%X: %s" % (self.offset, e))
2334 self.offset += BLOCKSIZE
2335 continue
2336 elif self.offset == 0:
2337 raise ReadError(str(e))
2338 except EmptyHeaderError:
2339 if self.offset == 0:
2340 raise ReadError("empty file")
2341 except TruncatedHeaderError, e:
2342 if self.offset == 0:
2343 raise ReadError(str(e))
2344 except SubsequentHeaderError, e:
2345 raise ReadError(str(e))
2346 break
2347
2348 if tarinfo is not None:
2349 self.members.append(tarinfo)
2350 else:
2351 self._loaded = True
2352
2353 return tarinfo
2354
2355
2356
2357
2358 - def _getmember(self, name, tarinfo=None, normalize=False):
2359 """Find an archive member by name from bottom to top.
2360 If tarinfo is given, it is used as the starting point.
2361 """
2362
2363 members = self.getmembers()
2364
2365
2366 if tarinfo is not None:
2367 members = members[:members.index(tarinfo)]
2368
2369 if normalize:
2370 name = os.path.normpath(name)
2371
2372 for member in reversed(members):
2373 if normalize:
2374 member_name = os.path.normpath(member.name)
2375 else:
2376 member_name = member.name
2377
2378 if name == member_name:
2379 return member
2380
2382 """Read through the entire archive file and look for readable
2383 members.
2384 """
2385 while True:
2386 tarinfo = self.next()
2387 if tarinfo is None:
2388 break
2389 self._loaded = True
2390
2391 - def _check(self, mode=None):
2392 """Check if TarFile is still open, and if the operation's mode
2393 corresponds to TarFile's mode.
2394 """
2395 if self.closed:
2396 raise IOError("%s is closed" % self.__class__.__name__)
2397 if mode is not None and self.mode not in mode:
2398 raise IOError("bad operation for mode %r" % self.mode)
2399
2401 """Find the target member of a symlink or hardlink member in the
2402 archive.
2403 """
2404 if tarinfo.issym():
2405
2406 linkname = os.path.dirname(tarinfo.name) + "/" + tarinfo.linkname
2407 limit = None
2408 else:
2409
2410
2411 linkname = tarinfo.linkname
2412 limit = tarinfo
2413
2414 member = self._getmember(linkname, tarinfo=limit, normalize=True)
2415 if member is None:
2416 raise KeyError("linkname %r not found" % linkname)
2417 return member
2418
2420 """Provide an iterator object.
2421 """
2422 if self._loaded:
2423 return iter(self.members)
2424 else:
2425 return TarIter(self)
2426
2427 - def _dbg(self, level, msg):
2428 """Write debugging output to sys.stderr.
2429 """
2430 if level <= self.debug:
2431 print >> sys.stderr, msg
2432
2434 self._check()
2435 return self
2436
2437 - def __exit__(self, type, value, traceback):
2438 if type is None:
2439 self.close()
2440 else:
2441
2442
2443 if not self._extfileobj:
2444 self.fileobj.close()
2445 self.closed = True
2446
2449 """Iterator Class.
2450
2451 for tarinfo in TarFile(...):
2452 suite...
2453 """
2454
2461 """Return iterator object.
2462 """
2463 return self
2465 """Return the next item using TarFile's next() method.
2466 When all members have been read, set TarFile as _loaded.
2467 """
2468
2469
2470
2471 if not self.tarfile._loaded:
2472 tarinfo = self.tarfile.next()
2473 if not tarinfo:
2474 self.tarfile._loaded = True
2475 raise StopIteration
2476 else:
2477 try:
2478 tarinfo = self.tarfile.members[self.index]
2479 except IndexError:
2480 raise StopIteration
2481 self.index += 1
2482 return tarinfo
2483
2486 """Base class for _data and _hole.
2487 """
2489 self.offset = offset
2490 self.size = size
2492 return self.offset <= offset < self.offset + self.size
2493
2495 """Represent a data section in a sparse file.
2496 """
2497 - def __init__(self, offset, size, realpos):
2500
2502 """Represent a hole section in a sparse file.
2503 """
2504 pass
2505
2507 """Ringbuffer class which increases performance
2508 over a regular list.
2509 """
2512 - def find(self, offset):
2513 idx = self.idx
2514 while True:
2515 item = self[idx]
2516 if offset in item:
2517 break
2518 idx += 1
2519 if idx == len(self):
2520 idx = 0
2521 if idx == self.idx:
2522
2523 return None
2524 self.idx = idx
2525 return item
2526
2527
2528
2529
2530 TAR_PLAIN = 0
2531 TAR_GZIPPED = 8
2533 """TarFile class compatible with standard module zipfile's
2534 ZipFile class.
2535 """
2537 from warnings import warnpy3k
2538 warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2539 stacklevel=2)
2540 if compression == TAR_PLAIN:
2541 self.tarfile = TarFile.taropen(file, mode)
2542 elif compression == TAR_GZIPPED:
2543 self.tarfile = TarFile.gzopen(file, mode)
2544 else:
2545 raise ValueError("unknown compression constant")
2546 if mode[0:1] == "r":
2547 members = self.tarfile.getmembers()
2548 for m in members:
2549 m.filename = m.name
2550 m.file_size = m.size
2551 m.date_time = time.gmtime(m.mtime)[:6]
2563 - def read(self, name):
2565 - def write(self, filename, arcname=None, compress_type=None):
2568 try:
2569 from cStringIO import StringIO
2570 except ImportError:
2571 from StringIO import StringIO
2572 import calendar
2573 tinfo = TarInfo(zinfo.filename)
2574 tinfo.size = len(bytes)
2575 tinfo.mtime = calendar.timegm(zinfo.date_time)
2576 self.tarfile.addfile(tinfo, StringIO(bytes))
2579
2585 """Return True if name points to a tar archive that we
2586 are able to handle, else return False.
2587 """
2588 try:
2589 t = open(name)
2590 t.close()
2591 return True
2592 except TarError:
2593 return False
2594
2595 bltn_open = open
2596 open = TarFile.open
2597