Incremental Rsync backup script to take daily, weekly and monthly backups automatically

The script makes daily backups of target folders and keeps user rights and folder structure using rsync. The script keeps three adjacent daily backups and after three days only weekly backups are held and other daily backups are removed. After a few weeks only the first weekly backup of each month is kept. This saves a lot of space as recent changes are held daily, but older changes are stored only weekly and then monthly. Backups are stored in separate folders in the backup drive. In each of the folder there is a subfolder for each of the target folders for backup with their backuped contents inside. The backup structure can be mounted as a read only drive which allows the users to retrieve their missing content without any administrative operations.

Rsync uses hard links to store similar files which saves huge amount of backup space as only modified files are written into the backup drive. All unmodified files are hardlinked between the backup instances.

The script is designed to be run daily using cron.

Download the script here


#!/usr/bin/python
# -*- coding: utf-8 -*-

# Copyright (C) 2014, Heikki Hyyti
#
# Permission is hereby granted, free of charge, to any person obtaining a
# copy of this software and associated documentation files (the "Software"),
# to deal in the Software without restriction, including without limitation
# the rights to use, copy, modify, merge, publish, distribute, sublicense,
# and/or sell copies of the Software, and to permit persons to whom the
# Software is furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
# THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
# DEALINGS IN THE SOFTWARE.

# --- configurations --- #

# Programs to do operations
rsync_prog = '/usr/bin/rsync'
move_prog = '/bin/mv'
remove_prog = '/bin/rm'
mkdir_prog = '/bin/mkdir'
timestamp_prog = '/bin/date \'+%Y-%m-%d %H:%M\' >'

# File to list exclude rules (configure this to your exclude file)
exclude_file = '/etc/rsync_exclude'

# Mountpoint of backup disk (configure this to your backup device mountpoint)
backup_mountpoint = '/backup'

# Name base is a path to the backup structure without mountpoint (ie. mountpoint/name_base.01/data_directories)
# (Configure this to base name on your backup device)
backup_name_base = 'backupdir/snapshot'

# Number of monthly, weekly and daily backups 
num_of_monthly_backups = 6
num_of_weekly_backups = 4
num_of_daily_backups = 4

# Locations to backup (configure these with full paths)
target_directories = ['list of','directories here', 'with full paths']

# Timestamp filename to write file with current date of backup to the backup directory
timestamp_filename = 'timestamp.txt'

# Target weekday to take weekly backups (Monday is 1 and Sunday is 7)
# Monthly backups are always taken at the first week of the month
target_weekday = 6

# Debug level 0...2
DEBUG = 0


# --- Code starts, function declarations --- #

import subprocess
from commands import getstatusoutput
from os import path
from sys import exit
from pipes import quote
import datetime

# Function to backup given directory to backup structure
def backup_directory(backup_source):
    if (DEBUG > 1): print('Backup \'%s\'' % backup_source)
    
    # Remove last '/' from soure directory if given
    if (backup_source[-1] == '/'):
	backup_source = backup_source[:-1]	    

    # Set backup targets and link target if exists
    backup_target = '%s/%s/' % (backup_mountpoint, backup_name_base)
    backup_links = '%s/%s.01/' % (backup_mountpoint, backup_name_base)
    
    links_string = ' --link-dest=%s ' % (backup_links)
    if (not path.isdir(backup_links)):
	links_string = ' '
    
    exclude_string = ' --exclude-from=%s --delete-excluded' % quote(exclude_file)
    if (not path.exists(exclude_file)):
	exclude_string = ''
    
    
    # Create new folder for the new backup if it doesn't exist (write timestamp file to directory)
    if (not path.isdir(backup_target)):
	mkdir_command = '%s %s' % (mkdir_prog, backup_target)
	if (DEBUG > 0):
	    print mkdir_command
	
	mkdir_exec = getstatusoutput(mkdir_command)
	if (mkdir_exec[0] != 0):
	    print mkdir_exec

	    
    # Build and execute Backup Command
    backup_command = '%s -a --delete%s%s%s %s' % (rsync_prog, exclude_string, links_string, backup_source, backup_target)
    if (DEBUG > 0):
	print backup_command
    
    backup_exec = getstatusoutput(backup_command)
    if (backup_exec[0] != 0):
	print backup_exec
    else:
	# On success, print timestamp to folder
    	timestamp_command = '%s %s' % (timestamp_prog, '%s%s' % (backup_target, timestamp_filename))
	if (DEBUG > 0):
	    print timestamp_command
	
	timestamp_exec = getstatusoutput(timestamp_command)
	if (timestamp_exec[0] != 0):
	    print timestamp_exec
	    
    return backup_exec
    
    
# Move old backup from source to target
def move_old_backup_to(source, target):
    if (DEBUG > 1): print('Move \'%s\' to \'%s\'' % (source, target))
    
    # Allow operations only in backup directory
    sd = '%s/%s' % (backup_mountpoint, source) 
    td = '%s/%s' % (backup_mountpoint, target)
    
    if (path.isdir(sd)):
        if (path.isdir(td)):
	    print('ERROR: target path \'%s\' already exists' % td)
	    return -1
	    
	else:
	    move_command = '%s %s %s' % (move_prog, sd, td)
	    if (DEBUG > 0):
		print move_command
	    
	    move_exec = getstatusoutput(move_command)
	    if (move_exec[0] != 0):
		print move_exec
		
	    return move_exec
	    
    else:
	print('ERROR: source path \'%s\' does not exist' % sd)
	return False

	
# Remove target directory
def remove_directory(target):
    if (DEBUG > 1): print('Remove \'%s\'' % target)
    
    # Allow operations only in backup directory
    td = '%s/%s' % (backup_mountpoint, target)
    
    if (path.isdir(td)):
	remove_command = '%s -r %s' % (remove_prog, td)
	if (DEBUG > 0):
	    print remove_command
	
	remove_exec = getstatusoutput(remove_command)
	if (remove_exec[0] != 0):
	    print remove_exec
	    
	return remove_exec

    else:
	print('ERROR: target path \'%s\' does not exist' % td)
	return False

	
# Remove backup with index
def remove_backup(idx):
    target = '%s.%02d' % (backup_name_base,idx)
    if (path.isdir('%s/%s' % (backup_mountpoint, target))):
	return remove_directory(target)
    return False

    
# Remove all too old backups (if number of backups is decreased smaller, this removes all older backups)
def remove_all_old_backups():
    #Remove backup with id N_max and all larger
    idx = N_max
    while (remove_backup(idx)):
	idx += 1
    
	
# Algorithm to move and remove backups in daily, weekly and monthly fashion
# it takes current_date as an argument for testing purposes, usually it should be used using default argument
def store_old_backups(current_date = datetime.datetime.today()):
    if (current_date.isoweekday() == target_weekday):
	if (current_date.day <= 7):
	    # Monthly backups (only at first week of a month)
	    for i in range(N_max, N_w, -1):
		source = '%s.%02d' % (backup_name_base,i-1)
		target = '%s.%02d' % (backup_name_base,i)
		if (path.isdir('%s/%s' % (backup_mountpoint, source))):
		    move_old_backup_to(source, target)
	else:
	    #remove last weekly backup as it was not moved to monthly backups
	    remove_backup(N_w)
	    
	# Weekly backups
	for i in range(N_w, N_d, -1):
	    source = '%s.%02d' % (backup_name_base,i-1)
	    target = '%s.%02d' % (backup_name_base,i)
	    if (path.isdir('%s/%s' % (backup_mountpoint, source))):
		move_old_backup_to(source, target)
    else:
	#remove last daily backup as it was not moved to weekly backups
	remove_backup(N_d)
	    
    # Daily backups
    for i in range(N_d, 1, -1):
	source = '%s.%02d' % (backup_name_base,i-1)
	target = '%s.%02d' % (backup_name_base,i)
	if (path.isdir('%s/%s' % (backup_mountpoint, source))):
	    move_old_backup_to(source, target)
    
    # Move last backup to first daily archive
    source = '%s' % backup_name_base
    target = '%s.%02d' % (backup_name_base,1)
    if (path.isdir('%s/%s' % (backup_mountpoint, source))):
	move_old_backup_to(source, target)

	    
# A function to check if backup disk is mounted
def is_backup_disk_mounted(mountpoint):
    df = subprocess.Popen(["df"], stdout=subprocess.PIPE)
    output = df.communicate()[0]
    lines = output.split("\n")
    for i in range(1, len(lines)):
	lineparts = lines[i].split()
	if (len(lineparts) > 5):
	    if (lineparts[5] == mountpoint):
		return True 

    return False
	    
    
# --- Backup script starts --- #

# Compute number of backup history
if num_of_monthly_backups < 0:
    num_of_monthly_backups = 0
if num_of_weekly_backups < 0:
    num_of_weekly_backups = 0
if num_of_daily_backups < 1:
    num_of_daily_backups = 1
    
N_max = num_of_monthly_backups + num_of_weekly_backups + num_of_daily_backups
N_w = num_of_weekly_backups + num_of_daily_backups
N_d = num_of_daily_backups


# Check that backup mountpoint doesn't have '/' at the end (this restricts backing up to '/' and other mountpoints don't end to it)
if (backup_mountpoint[-1] == '/'):
    backup_mountpoint = backup_mountpoint[:-1]

# Check that base name doesn't have '/' at the end
if (backup_name_base[-1] == '/'):
    backup_name_base = backup_name_base[:-1]
    
# Check that backup directory structure and mountpoint exist
backup_path = '%s/%s' % (backup_mountpoint, backup_name_base)
backup_root = backup_path[:(backup_path.rindex('/'))]

if (not is_backup_disk_mounted(backup_mountpoint)):
    print('ERROR: backup mountpoint \'%s\' does not exist, backup not completed!' % backup_mountpoint)
    exit(-1)

if (not path.isdir(backup_root)):
    print('ERROR: backup root path \'%s\' does not exist, backup not completed!' % backup_root)
    exit(-1)

    
# Algorithm starts
remove_all_old_backups()
store_old_backups()

for i in range(len(target_directories)):
    backup_directory(quote(target_directories[i]))