diff options
Diffstat (limited to 'Tools/portbuild/scripts/pollmachine')
-rwxr-xr-x | Tools/portbuild/scripts/pollmachine | 301 |
1 files changed, 0 insertions, 301 deletions
diff --git a/Tools/portbuild/scripts/pollmachine b/Tools/portbuild/scripts/pollmachine deleted file mode 100755 index ddc7100a94d1..000000000000 --- a/Tools/portbuild/scripts/pollmachine +++ /dev/null @@ -1,301 +0,0 @@ -#!/usr/bin/env python -# -# pollmachine -# -# Monitors build machines and notifies qmgr of changes - -# -# pollmachine [options] [arch] ... -# - update every machine in the mlist file for [arch] -# -# pollmachine [options] [arch/mach] ... -# - update individual machine(s) for specified architecture -# -# options are: -# -daemon : poll repeatedly - -# -# TODO: -# XXX qmgr notification of new/removed machines -# XXX counter before declaring a machine as dead -# Declares a machine as online if it reports 0 data from infoseek? - -# * Deal with machines change OS/kernel version -# - ACL list might change! -# - take machine offline, update ACL/arch/etc, reboot, bring online - -import sys, threading, socket -from time import sleep -import os, subprocess, logging - -if len(sys.argv) < 1: - print "Usage: %s <arch> [<arch> ...]" % sys.argv[0] - sys.exit(1) - -arches=set() -mlist={} -polldelay=0 -for i in sys.argv[1:]: - if i == "-daemon": - polldelay = 180 - continue - - if "/" in i: - item=i.partition("/") - arch=item[0] - mach=item[2] - arches.add(arch) - try: - mlist[arch].add(mach) - except KeyError: - mlist[arch] = set((mach,)) - else: - arches.add(i) - -pb="/var/portbuild" - -# set of machines for each arch -machines={} -for i in arches: - machines[i]=set() - -# Mapping from machine names to monitor threads -pollthreads={} - -class MachinePoll(threading.Thread): - """ Poll a machine regularly """ - - mach = None # Which machine name to poll - arch = None # Which arch is this assigned to - - # Which host/port to poll for this machine status (might be SSH - # tunnel endpoint) - host = None - port = 414 - - timeout = None # How often to poll - shutdown = False # Exit at next poll wakeup - - # State variables tracked - online = False - - # Dictionary of variables reported by the client - vars = None - - def __init__(self, mach, arch, timeout, host, port): - super(MachinePoll, self).__init__() - self.mach = mach - self.arch = arch - self.timeout = timeout - self.host = host - self.port = port - - # How many times the connection timed out since last success - self.timeouts = 0 - - self.vars = {} - - self.setDaemon(True) - - def run(self): - while True: - if self.shutdown: - break - - self.poll() - - if not self.timeout: - break - else: - sleep(self.timeout) - - def poll(self): - """ Poll the status of this machine """ - - nowonline = False - lines = [] - try: - s = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - s.settimeout(60) - s.connect((self.host, self.port)) - - data = "" - while len(data) < 65536: - chunk = s.recv(8192) - if not chunk: - break - data += chunk - - nowonline = True - self.timeouts = 0 - lines = data.split("\n") - except socket.timeout: - if self.online: - logging.info("[%s] Connection timeout" % self.mach) - self.timeouts += 1 - if self.timeouts < 3: - nowonline = self.online - except: - pass - finally: - try: - s.close() - except: - pass - - if nowonline != self.online: - logging.info("[%s] Now %s" % (self.mach, "online" if nowonline else "OFFLINE")) - self.online = nowonline - if self.online: - self.timeouts = 0 - # XXX inform qmgr of state change - - if self.online and not lines and not self.timeouts: - # reportload script is missing - dosetup=1 - else: - dosetup=0 - - for line in lines: - if line == "": - continue - line=line.rstrip() - part=line.partition('=') - if part[1] != '=' or not part[0]: -# if "No such file or directory" in line: -# # Client may require setting up post-boot -# dosetup=1 - logging.info("[%s] Bad input: %s" % (self.mach, line)) - # Assume client needs setting up - dosetup=1 - try: - old = self.vars[part[0]] - except KeyError: - old = "" - if old != part[2]: - self.vars[part[0]] = part[2] -# logging.info("%s@%s: \"%s\" -> \"%s\"" % (part[0], self.mach, old, part[2])) - # XXX update qmgr - - try: - envs = self.vars['buildenvs'] - for e in envs.split(): - (arch, branch, buildid) = e.split("/") - f = "/var/portbuild/%s/%s/builds/%s/.active" % \ - (arch, branch, buildid) - if os.path.exists(f): - continue - # Clean up a stale buildenv - logging.info("[%s] Cleaning up stale build: %s" % (self.mach, e)) - (err, out) = self.setup(branch, buildid, "-nocopy -full") - if err: - logging.info("[%s] Error from cleanup" % (self.mach)) - for l in out.split("\n"): - if l == "": - continue - logging.info("[%s] %s" % (self.mach, l)) - - except KeyError: - pass - - if dosetup: - logging.info("[%s] Setting up machine" % (self.mach)) - (err, out) = self.setup("-", "-") - if err: - logging.info("[%s] Error from setup" % (self.mach)) - for l in out.split("\n"): - if l == "": - continue - logging.info("[%s] %s" % (self.mach, l)) - logging.info("[%s] Setup complete" % (self.mach)) - - # Validate that arch has not changed (e.g. i386 -> amd64) - try: - if self.arch != self.vars['arch']: - logging.info("[%s] Unexpected arch: %s -> %s" % \ - (self.mach, self.arch, self.vars['arch'])) - except KeyError: - pass - - # Record current system load - try: - f = file("%s/%s/loads/%s" % (pb, self.arch, self.mach), "w") - except: - return - try: - f.write("%s %s\n" % (self.vars['jobs'], self.vars['load'])) - except: - pass - f.close() - - def setup(self, branch, buildid, args = ""): - cmd = "su ports-%s -c \"/var/portbuild/scripts/dosetupnode %s %s %s %s %s\""\ - % (self.arch, self.arch, branch, buildid, self.mach, args) - child = subprocess.Popen(cmd, shell=True, stderr = subprocess.STDOUT, - stdout = subprocess.PIPE) - err = child.wait() - out = "".join(child.stdout.readlines()) - return (err, out) - -logging.basicConfig(level=logging.INFO, - format='[%(asctime)s] %(message)s', - datefmt='%d %b %Y %H:%M:%S', - filename='/var/log/pollmachine.log', filemode='w') - -log_console = logging.StreamHandler() -log_console.setLevel(logging.INFO) -formatter = logging.Formatter('[%(asctime)s] %(message)s', - datefmt = '%d %b %Y %H:%M:%S') -log_console.setFormatter(formatter) -logging.getLogger('').addHandler(log_console) - -while True: - for arch in arches: - try: - now = mlist[arch] - except KeyError: - mlistfile="%s/%s/mlist" % (pb, arch) - try: - f = file(mlistfile, "r") - except OSError, error: - raise - - now=set(mach.rstrip() for mach in f.readlines()) - f.close() - - gone = machines[arch].difference(now) - new = now.difference(machines[arch]) - - machines[arch]=now - - for mach in gone: - logging.info("Removing machine %s/%s" % (arch, mach)) - # XXX disable from qmgr - pollthreads[mach].shutdown=True - del pollthreads[mach] - - for mach in new: - logging.info("Adding machine %s/%s" % (arch, mach)) - # XXX set up qmgr - - pc="%s/%s/portbuild.conf" % (pb, arch) - pch="%s/%s/portbuild.%s" % (pb, arch, mach) - cmd = "test -f %s && . %s; test -f %s && . %s; echo $infoseek_host; echo $infoseek_port" % (pc, pc, pch, pch) - config = subprocess.Popen(cmd, shell = True, - stdout = subprocess.PIPE) - host=config.stdout.readline().rstrip() - if not host: - host = mach - port=config.stdout.readline().rstrip() - try: - port = int(port) - except (TypeError, ValueError): - port = 414 - - pollthreads[mach] = MachinePoll(mach, arch, polldelay, host, port) - pollthreads[mach].start() - - if not polldelay: - break - - sleep(polldelay) |