aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorkris <kris@FreeBSD.org>2004-12-28 13:40:51 +0800
committerkris <kris@FreeBSD.org>2004-12-28 13:40:51 +0800
commit7aab54f4d80c0a9a9dc065a273024a9bfa8a83ed (patch)
tree51d4a1eb4feefaac06a7ce0262480157de3328b3
parent0402e8269f1d60eb9bfffecc2cbff97bc38a01d8 (diff)
downloadfreebsd-ports-gnome-7aab54f4d80c0a9a9dc065a273024a9bfa8a83ed.tar.gz
freebsd-ports-gnome-7aab54f4d80c0a9a9dc065a273024a9bfa8a83ed.tar.zst
freebsd-ports-gnome-7aab54f4d80c0a9a9dc065a273024a9bfa8a83ed.zip
Overhaul of the job scheduler. The new scheduler runs builds
synchronously instead of probabilistically scheduling jobs, which means that the job load on a machine never exceeds a desired threshold, and we can preferentially use faster machines when they are available. This has a dramatic effect on package build throughput, although I don't yet have precise measurements of the performance improvements. Specifically, the changes are: * Introduce the new variable maxjobs in portbuild. This replaces the build scheduling weights previously listed in the mlist file, which now changes format to list the build machines only, ranked in order of preference for job dispatches (i.e. faster machines first). * The ${arch}/queue directory is used to list machines available for jobs (file content is the number of jobs currently running on the machine). Changes to files in this directory are serialized using lockf on the .lock file. * Claim a machine with the getmachine script, with the .lock held. This picks the machine with the fewestnumber of jobs running, which is listed highest in the mlist file in case of multiple machines with equal load. The job counter is incremented, and the file removed if the counter reaches ${maxjobs} for that machine. If all machines are busy, sleep for 15 seconds and retry. * After we have claimed a machine, we run claim-chroot on it to claim an empty chroot, as before. If the claim fails, release the job from the queue with the releasemachine script and retry after a 15 second wait. * When the build is finished, decrement the job counter with the releasemachine script, with .lock held. * The checkmachines script now exists only to poll the load averages for admin convenience (every 2 minutes), and to ping for unreachable machines. When a machine cannot be reached, remove the entry in the queue directory to stop further job dispatches to it. This needs more work to deal with reinitialization of machines after they become available again.
-rwxr-xr-xTools/portbuild/scripts/checkmachines33
-rwxr-xr-xTools/portbuild/scripts/getmachine53
-rwxr-xr-xTools/portbuild/scripts/pollmachine33
-rwxr-xr-xTools/portbuild/scripts/releasemachine29
4 files changed, 119 insertions, 29 deletions
diff --git a/Tools/portbuild/scripts/checkmachines b/Tools/portbuild/scripts/checkmachines
index 7f32bac93128..c6108d63b0a4 100755
--- a/Tools/portbuild/scripts/checkmachines
+++ b/Tools/portbuild/scripts/checkmachines
@@ -24,37 +24,12 @@ while true; do
for i in ${arches}; do
mlist=${buildroot}/${i}/mlist
- unset DISPLAY
-
- min=99
set $(cat $mlist)
- while [ $# -gt 1 ]; do
+ while [ $# -gt 0 ]; do
m=$1
- l=$2
- if (/usr/local/bin/nc -w 5 $m infoseek > ${buildroot}/${i}/loads/$m < /dev/null); then
- num=$(awk '{print $1}' ${buildroot}/${i}/loads/$m)
- if [ "x$num" = "x" ]; then
- # logger "checkmachines: file ${buildroot}/${i}/loads/$m is empty"
- num=99
- fi
- else
- # Don't ever want to list machines we couldn't connect to
- num=999
- fi
-
- num=$(($num / $l))
-
- if [ $num -lt $min ]; then
- mach=$m
- min=$num
- elif [ $num = $min ]; then
- mach="$mach $m"
- fi
-
- shift 2
-
+ lockf ${buildroot}/${i}/queue/.lock ${buildroot}/scripts/pollmachine ${i} ${m}
+ shift 1
done
- echo "$mach" > ${buildroot}/${i}/ulist
-
done
+ sleep 120
done
diff --git a/Tools/portbuild/scripts/getmachine b/Tools/portbuild/scripts/getmachine
new file mode 100755
index 000000000000..849d300e6270
--- /dev/null
+++ b/Tools/portbuild/scripts/getmachine
@@ -0,0 +1,53 @@
+#!/bin/sh
+#
+# Choose the least-loaded machine in the queue dir
+# Called with lock held
+# We know that everything in this directory has space for another job
+
+pb=$1
+arch=$2
+branch=$3
+
+qdir=${pb}/${arch}/queue
+
+cd $qdir
+set *
+if [ "$1" = "*" ]; then
+ echo ""
+ exit 1
+fi
+
+min=9999
+while [ $# -gt 0 ]; do
+ m=$1
+ num=$(cat $m)
+ if [ $num -lt $min ]; then
+ mach=$m
+ min=$num
+ elif [ $num -eq $min ]; then
+ mach="${mach} ${m}"
+ fi
+ shift
+done
+
+if [ "$min" = 9999 -o -z "${mach}" ]; then
+ echo ""
+ exit 1
+fi
+
+# Choose highest-priority machine that is free
+mach=$(echo ${mach} | tr -s ' ' '\n' | grep -F -f - ${pb}/${arch}/mlist | head -1)
+
+. ${pb}/${arch}/portbuild.conf
+test -f ${pb}/${arch}/portbuild.${mach} && . ${pb}/${arch}/portbuild.${mach}
+
+# Now that we've found a machine, register our claim in the queue
+if [ "$((${min}+1))" -ge "${maxjobs}" ]; then
+ rm ${mach}
+else
+ echo $(($min+1)) > ${mach}
+fi
+
+# Report to caller
+echo ${mach}
+exit 0
diff --git a/Tools/portbuild/scripts/pollmachine b/Tools/portbuild/scripts/pollmachine
new file mode 100755
index 000000000000..75cd9885ddee
--- /dev/null
+++ b/Tools/portbuild/scripts/pollmachine
@@ -0,0 +1,33 @@
+#!/bin/sh
+
+buildroot=/var/portbuild
+
+i=$1
+m=$2
+if [ "$3" = "-queue" ]; then
+ queue=1
+else
+ queue=0
+fi
+
+infoseek_host=$m
+infoseek_port=infoseek
+. ${buildroot}/${i}/portbuild.conf
+if [ -f ${buildroot}/${i}/portbuild.${m} ]; then
+ . ${buildroot}/${i}/portbuild.${m}
+fi
+if (/usr/local/bin/nc -w 5 ${infoseek_host} ${infoseek_port} > ${buildroot}/${i}/loads/$m < /dev/null); then
+ if [ "${queue}" = 1 ]; then
+ num=$(awk '{print $1}' ${buildroot}/${i}/loads/$m)
+ if [ "$num" -lt "${maxjobs}" ]; then
+ echo ${num} > ${buildroot}/${i}/queue/$m
+ chown ports-${i} ${buildroot}/${i}/queue/$m
+ else
+ rm -f ${buildroot}/${i}/queue/$m
+ fi
+ fi
+else
+ rm -f ${buildroot}/${i}/queue/$m
+ exit 1
+fi
+exit 0
diff --git a/Tools/portbuild/scripts/releasemachine b/Tools/portbuild/scripts/releasemachine
new file mode 100755
index 000000000000..66d23b193845
--- /dev/null
+++ b/Tools/portbuild/scripts/releasemachine
@@ -0,0 +1,29 @@
+#!/bin/sh
+
+buildroot=/var/portbuild
+
+i=$1
+m=$2
+
+infoseek_host=$m
+infoseek_port=infoseek
+. ${buildroot}/${i}/portbuild.conf
+if [ -f ${buildroot}/${i}/portbuild.${m} ]; then
+ . ${buildroot}/${i}/portbuild.${m}
+fi
+
+q=${buildroot}/${i}/queue/$m
+if [ -f ${q} ]; then
+ num=$(cat $q)
+else
+ num=${maxjobs}
+fi
+
+if [ "$num" -gt 0 ]; then
+ echo $((${num}-1)) > $q
+else
+ echo 0 > $q
+fi
+
+chown ports-${i} ${q}
+exit 0