diff options
author | Milo Casagrande <milo@ubuntu.com> | 2013-03-15 15:27:00 +0100 |
---|---|---|
committer | Milo Casagrande <milo@ubuntu.com> | 2013-03-15 15:27:00 +0100 |
commit | 077f78f01b5222e771b8d39fc0d3dc4161580260 (patch) | |
tree | 74e7ae8deef7e75175ab59a90b4c35190b06f711 | |
parent | fbe283747f3fc122680ce03851511e18d25191a8 (diff) |
Reworked rsync script in python.
-rw-r--r-- | scripts/rsync-mirror | 316 |
1 files changed, 255 insertions, 61 deletions
diff --git a/scripts/rsync-mirror b/scripts/rsync-mirror index 56b5ae8..1cbad1d 100644 --- a/scripts/rsync-mirror +++ b/scripts/rsync-mirror @@ -1,5 +1,5 @@ -#!/bin/bash - +#!/usr/bin/env python +# # Copyright (C) 2013 Linaro Ltd. # # This program is free software: you can redistribute it and/or modify @@ -15,63 +15,257 @@ # You should have received a copy of the GNU General Public License # along with this program. If not, see <http://www.gnu.org/licenses/> -# Script to ssh-rsync repositories between machines. - -RHODECODE_USER=rhodecode -SSH_USER=git-linaro-staging -SSH_KEY=/home/ubuntu/.ssh/id_rsa -GIT_SERVER=git.linaro.org -LINARO_PRIVATE_REMOTE=/srv/linaro-private.git.linaro.org/ -LANDING_TEAMS_REMOTE=/srv/landing-teams.git.linaro.org/ -PUBLIC_REPO_PATH=/srv/git.linaro.org/git/ -LOCAL_REPO_PATH=/opt/rhodecode/git_repos -PRIVATE_PATH=$LOCAL_REPO_PATH/private -LINARO_PRIVATE_PATH=$PRIVATE_PATH/linaro-private -LANDING_TEAMS_PATH=$PRIVATE_PATH/landing-teams - -# TODO: when we have groups and user set up correctly both on the staging -# and production instance, we need to change the rsync command in oder to -# preserve also users, groups and access levels. With that, the function -# can be removed and not used anymore. - -function set_own_perm { - # At the moment defaults everything to the rhodecode user and group. - # Might change in the future. - chown -R $RHODECODE_USER:$RHODECODE_USER $LOCAL_REPO_PATH - - # chmod-correctly directories and files. - find $LOCAL_REPO_PATH -type d -exec chmod 775 {} \; - find $LOCAL_REPO_PATH -type f -exec chmod 664 {} \; -} - -# Always set the permission/ownership. -trap set_own_perm EXIT HUP QUIT TERM KILL - -# Create the necessaries directories to hold private repositories. -# We store everything under a single 'private' directory as a subdir -# of RhodeCode git repositories one. -if [ ! -d $LINARO_PRIVATE_PATH ] -then - mkdir -p $LINARO_PRIVATE_PATH -fi -if [ ! -d $LANDING_TEAMS_PATH ] -then - mkdir -p $LANDING_TEAMS_PATH -fi - -# Sync private repositories, following symlinks and deleting locally. -rsync -e "ssh -i $SSH_KEY -l $SSH_USER" -r -l --copy-unsafe-links -z -t \ - --compress-level=8 --delete \ - $SSH_USER@$GIT_SERVER:$LINARO_PRIVATE_REMOTE $LINARO_PRIVATE_PATH - -rsync -e "ssh -i $SSH_KEY -l $SSH_USER" -r -l --copy-unsafe-links -z -t \ - --compress-level=8 --delete \ - $SSH_USER@$GIT_SERVER:$LANDING_TEAMS_REMOTE $LANDING_TEAMS_PATH - -# Sync normal repositories, following symlinks and deleting locally. -# Filter on the 'private' directory, otherwise it will be deleted since -# we copy everything in the top directories of RhodeCode git. -rsync -e "ssh -i $SSH_KEY -l $SSH_USER" -r -l --copy-unsafe-links -z -t \ - --compress-level=8 --delete --filter=-r_/private/ \ - $SSH_USER@$GIT_SERVER:$PUBLIC_REPO_PATH $LOCAL_REPO_PATH +# This script needs to be run as root, or at least as the user that has +# access to the underlying SSH key used for rsync connection. +# How to run it: +# sudo rsync-mirror --rescan-repos --api-key KEY + +import atexit +import argparse +import logging +import os +import pwd +import subprocess +from pwd import getpwnam + +from logging.handlers import TimedRotatingFileHandler +from tempfile import gettempdir + +# Default user for operations. +DEFAULT_USER = "rhodecode" +# Default read-only git URL. +BASE_PATH = "http://git.linaro.org/git-ro/" +# Path to local bin directory, %s is the user name. +LOCAL_BIN_DIR = "/home/%s/.local/bin" +# Default API host for RhodeCode. +DEFAULT_API_HOST = "http://0.0.0.0:5000" +# Name for a lock file. +LOCK_FILE_NAME = "rsync-mirror.lock" +LOCK_FILE = os.path.join(gettempdir(), LOCK_FILE_NAME) + +FILE_NAME = os.path.basename(__file__) +# Default log directory and log file. +DEFAULT_LOG_DIR = "/var/log/rhodecode" +LOG_FILE_NAME = FILE_NAME + ".log" +# When to rotate logs. +DEFAULT_ROTATING_TIME = 'midnight' +# How many old logs to keep. +KEEP_MAX_LOGS = 10 + +# Default logger. +logger = logging.getLogger(FILE_NAME) + +# Variables needed for rsync. +SSH_USER = "git-linaro-staging" +SSH_KEY = "/home/ubuntu/.ssh/id_rsa" +GIT_SERVER = "git.linaro.org" + +# Paths on the remote end. +LINARO_PRIVATE_REMOTE = "/srv/linaro-private.git.linaro.org/" +LANDING_TEAMS_REMOTE = "/srv/landing-teams.git.linaro.org/" +PUBLIC_REPO_PATH = "/srv/git.linaro.org/git/" + +# Local paths for the repositories. +PRIVATE_DIR_NAME = "private" +LOCAL_REPO_PATH = "/opt/rhodecode/git_repos" +# Paths on the local end, where everything will be stored. +PRIVATE_PATH = os.path.join(LOCAL_REPO_PATH, PRIVATE_DIR_NAME) +LINARO_PRIVATE_PATH = os.path.join(PRIVATE_PATH, "linaro-private") +LANDING_TEAMS_PATH = os.path.join(PRIVATE_PATH, "landing-teams") + + +@atexit.register +def set_own_perm(): + """Set correct ownership and permission of files and directories.""" + import stat + usr = getpwnam(DEFAULT_USER).pw_uid + grp = getpwnam(DEFAULT_USER).pw_gid + + # Directories are 775 + dir_perm = stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR + dir_perm |= stat.S_IRGRP | stat.S_IWGRP | stat.S_IXGRP + dir_perm |= stat.S_IROTH | stat.S_IXOTH + + # Files are 664 + file_perm = stat.S_IRUSR | stat.S_IWUSR + file_perm |= stat.S_IRGRP | stat.S_IWGRP + file_perm |= stat.S_IROTH + + for root, dirs, files in os.walk(LOCAL_REPO_PATH): + for ldir in dirs: + dir_path = os.path.join(root, ldir) + os.chown(dir_path, usr, grp) + os.chmod(dir_path, dir_perm) + for f in files: + file_path = os.path.join(root, f) + os.chown(file_path, usr, grp) + os.chmod(file_path, file_perm) + + +def args_parser(): + """Sets up the argument parser.""" + parser = argparse.ArgumentParser() + parser.add_argument("--user", + default=DEFAULT_USER, + help="User to run the commands as.") + parser.add_argument("--rescan-repos", + action="store_true", + help="If the directory containing repositories " + "should be re-scanned when adding new ones.") + parser.add_argument("--api-key", + help="The RhodeCode API key to use for re-scanning " + "the repositories.") + parser.add_argument("--api-host", + default=DEFAULT_API_HOST, + help="The host URL where API interface is located. " + "Defaults to '%s'." % DEFAULT_API_HOST) + parser.add_argument("--log-dir", + default=DEFAULT_LOG_DIR, + help="Directory to store logs. Defaults to '%s'." % + DEFAULT_LOG_DIR) + parser.add_argument("--debug", + action="store_true", + help="Print debugging statements.") + return parser + + +def setup_logging(debug, log_dir): + """Sets up logging. + + :param debug: If the level should be set to DEBUG. + :type bool + :param log_dir: Where to store file based logs. + """ + th_formatter = "%(asctime)s %(levelname)-8s %(message)s" + log_file = os.path.join(log_dir, LOG_FILE_NAME) + + timed_handler = TimedRotatingFileHandler(log_file, + when=DEFAULT_ROTATING_TIME, + backupCount=KEEP_MAX_LOGS) + timed_handler.setFormatter(logging.Formatter(th_formatter)) + + if debug: + logger.setLevel(logging.DEBUG) + timed_handler.setLevel(logging.DEBUG) + else: + logger.setLevel(logging.INFO) + timed_handler.setLevel(logging.INFO) + + logger.addHandler(timed_handler) + + +def check_paths(paths): + """Check that the paths are on the filesystem. + + :param paths: A list of paths + """ + for path in paths: + if not os.path.exists(path): + os.makedirs(path) + + +def rsync_repo(what, where, r_filter=None): + """Syncs locally remote repositories. + + :param what: The remote repository. + :param where: The local path. + :param r_filter: A filter to apply to rsync. + """ + cmd_args = ["rsync", "-e"] + cmd_args += ["'ssh -i %s -l %s'" % (SSH_KEY, SSH_USER)] + cmd_args += ["-r", "-l", "--copy-unsafe-links", "-z", "-t"] + cmd_args += ["--compress-level=8", "--delete"] + if r_filter: + cmd_args += ["--filter=%s" % r_filter] + cmd_args += ["%s@%s:%s" % (SSH_USER, GIT_SERVER, what)] + cmd_args += [where] + execute_command(cmd_args) + + +def rescan_git_directory(api_key, api_host, user=None): + """Rescans git directories for new repositories added. + + :param api_key: The RhodeCode API key. + :type str + :param api_host: The RhodeCode host where to run the remote command. + :type str + :param user: The user to run the command as. + :type str + """ + actual_user = pwd.getpwuid(os.getuid())[0] + if user: + bin_dir = LOCAL_BIN_DIR % user + else: + # Try to gess a user. + bin_dir = LOCAL_BIN_DIR % actual_user + + api_key_cmd = "--apikey=%s" % api_key + api_host_cmd = "--apihost=%s" % api_host + + api_cmd = os.path.join(bin_dir, "rhodecode-api") + cmd_args = [api_cmd, api_key_cmd, api_host_cmd, "rescan_repos"] + if actual_user == DEFAULT_USER or not user: + execute_command(cmd_args) + else: + execute_command(cmd_args, as_sudo=True, user=user) + + +def execute_command(cmd_args, as_sudo=False, user=None, work_dir=os.getcwd()): + """Executes the command using Popen. + + :param cmd_args: The list of command and parameters to run. + :param as_sudo: If the command has to be run with 'sudo'. + :param user: Runs the comand as the specified user. + :param work_dir: Where the command should be run from. + """ + exec_args = [] + if not isinstance(cmd_args, list): + cmd_args = [cmd_args] + + if as_sudo: + exec_args = ["sudo"] + + if user and as_sudo: + exec_args += ["-u", user, "-H"] + + exec_args += cmd_args + process = subprocess.Popen(exec_args, + cwd=work_dir, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + p_out, p_err = process.communicate() + + if process.returncode != 0: + logger.error("Error executing the following command: %s" % + " ".join(cmd_args)) + logger.debug("The full command line is: %s" % " ".join(exec_args)) + logger.debug(p_err) + + +if __name__ == '__main__': + parser = args_parser() + args = parser.parse_args() + + if os.path.exists(LOCK_FILE): + print "Another process is still running: cannot acquire lock." + else: + setup_logging(args.debug, args.log_dir) + try: + with open(LOCK_FILE, 'w'): + check_paths([LINARO_PRIVATE_PATH, LANDING_TEAMS_PATH]) + # First private, then the public ones + rsync_repo(LINARO_PRIVATE_REMOTE, LINARO_PRIVATE_PATH) + rsync_repo(LANDING_TEAMS_REMOTE, LANDING_TEAMS_PATH) + + # We need to filter on the private directory. + r_filter = "-r_/%s/" % PRIVATE_DIR_NAME + rsync_repo(PUBLIC_REPO_PATH, LOCAL_REPO_PATH, r_filter) + + if args.rescan_repos: + print "Re-scanning git repositories directory..." + rescan_git_directory(args.api_key, + args.api_host, + user=args.user) + finally: + os.unlink(LOCK_FILE) |