The tarfile module handles TAR archives, the Unix standard for bundling files. Supports gzip, bzip2, and xz compression.

Reading TAR Files

import tarfile
 
# List contents
with tarfile.open('archive.tar.gz', 'r:gz') as tar:
    for member in tar.getmembers():
        print(f"{member.name} ({member.size} bytes)")
 
# Simple list
with tarfile.open('archive.tar.gz', 'r:gz') as tar:
    print(tar.getnames())

Extracting Files

import tarfile
 
# Extract all
with tarfile.open('archive.tar.gz', 'r:gz') as tar:
    tar.extractall('output_dir')
 
# Extract single file
with tarfile.open('archive.tar.gz', 'r:gz') as tar:
    tar.extract('file.txt', 'output_dir')
 
# Extract with filter (Python 3.12+ security)
with tarfile.open('archive.tar.gz', 'r:gz') as tar:
    tar.extractall('output_dir', filter='data')

Reading File Contents

import tarfile
 
with tarfile.open('archive.tar.gz', 'r:gz') as tar:
    # Get file object
    f = tar.extractfile('file.txt')
    if f:
        content = f.read().decode('utf-8')
        print(content)

Creating TAR Files

import tarfile
 
# Uncompressed TAR
with tarfile.open('archive.tar', 'w') as tar:
    tar.add('file1.txt')
    tar.add('file2.txt')
    tar.add('folder/')
 
# With gzip compression
with tarfile.open('archive.tar.gz', 'w:gz') as tar:
    tar.add('file.txt')
 
# With bzip2 compression
with tarfile.open('archive.tar.bz2', 'w:bz2') as tar:
    tar.add('file.txt')
 
# With xz compression (best ratio)
with tarfile.open('archive.tar.xz', 'w:xz') as tar:
    tar.add('file.txt')

Rename Files in Archive

import tarfile
 
with tarfile.open('archive.tar.gz', 'w:gz') as tar:
    # Add with different name
    tar.add('local/path/file.txt', arcname='renamed.txt')
    
    # Add directory with different name
    tar.add('src/', arcname='source/')

Filter Files

import tarfile
 
def exclude_pyc(tarinfo):
    if tarinfo.name.endswith('.pyc'):
        return None  # Exclude
    return tarinfo
 
with tarfile.open('archive.tar.gz', 'w:gz') as tar:
    tar.add('project/', filter=exclude_pyc)

Add String Content

import tarfile
import io
 
with tarfile.open('archive.tar.gz', 'w:gz') as tar:
    # Create TarInfo
    data = b"File content here"
    info = tarfile.TarInfo(name='generated.txt')
    info.size = len(data)
    
    # Add from bytes
    tar.addfile(info, io.BytesIO(data))

Open Modes

import tarfile
 
# Read modes
'r'     # Auto-detect compression
'r:'    # No compression
'r:gz'  # Gzip
'r:bz2' # Bzip2
'r:xz'  # XZ/LZMA
 
# Write modes
'w'     # No compression
'w:gz'  # Gzip
'w:bz2' # Bzip2
'w:xz'  # XZ/LZMA
 
# Append (uncompressed only)
'a'     # Append to existing

Practical Examples

Backup with Exclusions

import tarfile
import os
from datetime import datetime
 
def backup_project(source_dir, backup_dir):
    timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
    backup_path = os.path.join(backup_dir, f'backup_{timestamp}.tar.gz')
    
    def exclude(tarinfo):
        excludes = ['.git', '__pycache__', 'node_modules', '.env']
        for excl in excludes:
            if excl in tarinfo.name:
                return None
        return tarinfo
    
    with tarfile.open(backup_path, 'w:gz') as tar:
        tar.add(source_dir, arcname='backup', filter=exclude)
    
    return backup_path

Stream TAR to Response

import tarfile
import io
 
def create_tar_response(files):
    """Create TAR in memory for HTTP response."""
    buffer = io.BytesIO()
    
    with tarfile.open(fileobj=buffer, mode='w:gz') as tar:
        for name, content in files.items():
            data = content.encode('utf-8')
            info = tarfile.TarInfo(name=name)
            info.size = len(data)
            tar.addfile(info, io.BytesIO(data))
    
    buffer.seek(0)
    return buffer.getvalue()

Extract Specific Files

import tarfile
 
def extract_matching(tar_path, pattern, output_dir):
    import fnmatch
    
    with tarfile.open(tar_path, 'r:*') as tar:
        for member in tar.getmembers():
            if fnmatch.fnmatch(member.name, pattern):
                tar.extract(member, output_dir)
                print(f"Extracted: {member.name}")
 
extract_matching('archive.tar.gz', '*.py', './extracted')

Safe Extraction

import tarfile
import os
 
def safe_extract(tar_path, output_dir):
    """Extract with path traversal protection."""
    output_dir = os.path.abspath(output_dir)
    
    with tarfile.open(tar_path, 'r:*') as tar:
        for member in tar.getmembers():
            # Check for path traversal
            member_path = os.path.abspath(
                os.path.join(output_dir, member.name)
            )
            if not member_path.startswith(output_dir):
                raise ValueError(f"Path traversal detected: {member.name}")
        
        tar.extractall(output_dir)

TarInfo Attributes

import tarfile
 
with tarfile.open('archive.tar.gz', 'r:gz') as tar:
    for info in tar.getmembers():
        print(f"Name: {info.name}")
        print(f"Size: {info.size}")
        print(f"Mode: {oct(info.mode)}")
        print(f"UID/GID: {info.uid}/{info.gid}")
        print(f"Is file: {info.isfile()}")
        print(f"Is dir: {info.isdir()}")
        print(f"Is link: {info.issym()}")
        print()

Quick Reference

import tarfile
 
# Open
tar = tarfile.open('file.tar.gz', 'r:gz')
tar = tarfile.open('file.tar.gz', 'w:gz')
 
# Read operations
tar.getnames()          # List file names
tar.getmembers()        # List TarInfo objects
tar.extractfile(name)   # Get file object
tar.extract(name, path) # Extract one file
tar.extractall(path)    # Extract all
 
# Write operations
tar.add(path)                    # Add file/directory
tar.add(path, arcname='new')     # Add with different name
tar.addfile(tarinfo, fileobj)    # Add from TarInfo + data
 
# Always close
tar.close()
 
# Or use context manager
with tarfile.open(...) as tar:
    ...
ModeExtensionCompression
w:.tarNone
w:gz.tar.gzGzip
w:bz2.tar.bz2Bzip2
w:xz.tar.xzXZ/LZMA

tarfile is the Unix archiving standard in Python. Use it for backups, deployment packages, and cross-platform file bundles.

React to this post: