[37] | 1 | #!/usr/bin/perl |
---|
| 2 | # |
---|
| 3 | # loadbalance: |
---|
| 4 | # This script tries to limit the load on a computer. |
---|
| 5 | # It is applicable to large batches of small jobs. |
---|
| 6 | # It runs only a few jobs at a time, with low priority, and |
---|
| 7 | # watches the uptime to make sure it stays below the load limit. |
---|
| 8 | # |
---|
| 9 | # Usage: loadbalance <maxloadfile> <commandfile> [options] |
---|
| 10 | # Options: |
---|
| 11 | # -logdir logdir saves the cmd,stdout,stderr files |
---|
| 12 | # to another logdir (default is |
---|
| 13 | # /tmp/loadbalance_PID |
---|
| 14 | # -xload Do pop up xload windows |
---|
| 15 | # -noxload Don't pop up xload windows (default) |
---|
| 16 | # |
---|
| 17 | # Where: <maxloadfile> is a file that lists the maximum load for |
---|
| 18 | # each machine. It should list each machine on a separate |
---|
| 19 | # line, e.g: |
---|
| 20 | # |
---|
| 21 | # radiance 6 |
---|
| 22 | # lambert 6 |
---|
| 23 | # wavelet 2 |
---|
| 24 | # snell 1 |
---|
| 25 | # |
---|
| 26 | # You can change this file while it's running, and it will |
---|
| 27 | # try to match the new numbers. It will not run any processes |
---|
| 28 | # on machines unlisted here. |
---|
| 29 | # |
---|
| 30 | # <commandfile> Is a list of commands to be executed, one |
---|
| 31 | # command per line. They should not depend on being executed |
---|
| 32 | # within the initial directory; e.g: |
---|
| 33 | # |
---|
| 34 | # cd /usr/data/raytrace; myrt -o f0.rgb f0.iv |
---|
| 35 | # cd /usr/data/raytrace; myrt -o f1.rgb f1.iv |
---|
| 36 | # cd /usr/data/raytrace; myrt -o f2.rgb f2.iv |
---|
| 37 | # cd /usr/data/raytrace; myrt -o f3.rgb f3.iv |
---|
| 38 | # .... |
---|
| 39 | # |
---|
| 40 | # You can use a single dash ("-") instead of commandfile, |
---|
| 41 | # and it will read from stdin. |
---|
| 42 | # |
---|
| 43 | # |
---|
| 44 | |
---|
| 45 | sub printUsage { |
---|
| 46 | print STDERR "Usage: loadbalance <maxloadfile> <commandfile> [options]\n"; |
---|
| 47 | print STDERR "Options:\n"; |
---|
| 48 | print STDERR " -logdir logdir saves the cmd,stdout,stderr files\n"; |
---|
| 49 | print STDERR " to another logdir (default is\n"; |
---|
| 50 | print STDERR " /tmp/loadbalance_PID\n"; |
---|
| 51 | print STDERR " -xload Do pop up xload windows\n"; |
---|
| 52 | print STDERR " -noxload Don't pop up xload windows (default)\n"; |
---|
| 53 | print STDERR "\n"; |
---|
| 54 | print STDERR "Where: <maxloadfile> is a file that lists the maximum load for\n"; |
---|
| 55 | print STDERR " each machine. An optional 3rd word lists the minimum\n"; |
---|
| 56 | print STDERR " number of jobs (not load!) for each machine.\n"; |
---|
| 57 | print STDERR " maxloadfile should list each machine on a separate line, e.g:\n"; |
---|
| 58 | print STDERR "\n"; |
---|
| 59 | print STDERR " radiance 6 3\n"; |
---|
| 60 | print STDERR " lambert 6 2\n"; |
---|
| 61 | print STDERR " wavelet 2\n"; |
---|
| 62 | print STDERR " snell 1\n"; |
---|
| 63 | print STDERR " aegean 0\n"; |
---|
| 64 | print STDERR " \n"; |
---|
| 65 | print STDERR " You can change this file while it's running, and it will\n"; |
---|
| 66 | print STDERR " try to match the new numbers. However, you can only change\n"; |
---|
| 67 | print STDERR " the numbers, NOT THE MACHINES. Please keep the same machines\n"; |
---|
| 68 | print STDERR " in the same order, to aid bookkeeping.\n"; |
---|
| 69 | print STDERR "\n"; |
---|
| 70 | print STDERR " <commandfile> Is a list of commands to be executed, one\n"; |
---|
| 71 | print STDERR " command per line. They should not depend on being executed\n"; |
---|
| 72 | print STDERR " within the initial directory; e.g:\n"; |
---|
| 73 | print STDERR "\n"; |
---|
| 74 | print STDERR " cd /usr/data/raytrace; myrt -o f0.rgb f0.iv\n"; |
---|
| 75 | print STDERR " cd /usr/data/raytrace; myrt -o f1.rgb f1.iv\n"; |
---|
| 76 | print STDERR " cd /usr/data/raytrace; myrt -o f2.rgb f2.iv\n"; |
---|
| 77 | print STDERR " cd /usr/data/raytrace; myrt -o f3.rgb f3.iv\n"; |
---|
| 78 | print STDERR " ....\n"; |
---|
| 79 | print STDERR "\n"; |
---|
| 80 | print STDERR " You can use a single dash (\"-\") instead of commandfile,\n"; |
---|
| 81 | print STDERR " and it will read from stdin.\n"; |
---|
| 82 | exit(-1); |
---|
| 83 | } |
---|
| 84 | |
---|
| 85 | # Timesteps, in seconds. This is how often it checks to see if it |
---|
| 86 | # should spawn a new process. |
---|
| 87 | $TIMEINTERVAL = 6.0; |
---|
| 88 | # Amount to decay PENDING each time step. |
---|
| 89 | # This decay compensates for uptime's 1-min lag. |
---|
| 90 | $STEPDECAY = 0.8; |
---|
| 91 | |
---|
| 92 | # Option defaults |
---|
| 93 | $DOXLOAD = 0; |
---|
| 94 | |
---|
| 95 | # Default directory for log files |
---|
| 96 | $AUTOMOUNTPREFIX = "/n/"; |
---|
| 97 | $BASEHOST = `hostname`; |
---|
| 98 | chop $BASEHOST; |
---|
| 99 | $LOGDIR = $AUTOMOUNTPREFIX.$BASEHOST."/tmp/loadbalance_$$"; |
---|
| 100 | |
---|
| 101 | # Make stderr autoflush |
---|
| 102 | use IO::Handle; |
---|
| 103 | STDOUT->autoflush(1); |
---|
| 104 | |
---|
| 105 | # Open command and maxload files |
---|
| 106 | if ($#ARGV < 1) { |
---|
| 107 | print STDERR "Wrong number of arguments...\n"; |
---|
| 108 | &printUsage; |
---|
| 109 | exit(-1); |
---|
| 110 | } else { |
---|
| 111 | # Figure out and open maxload file... |
---|
| 112 | $maxloadfile = $ARGV[0]; |
---|
| 113 | if (substr($maxloadfile,0,1) eq "-") { |
---|
| 114 | print STDERR "Unknown flag: $ARGV[0]...\n"; |
---|
| 115 | &printUsage; |
---|
| 116 | } else { |
---|
| 117 | if (!open(MAXLOAD, $maxloadfile)) { |
---|
| 118 | print STDERR "Unable to open maxload file $maxloadfile...\n"; |
---|
| 119 | &printUsage; |
---|
| 120 | } |
---|
| 121 | |
---|
| 122 | # Initialize some arrays for tracking load, and then |
---|
| 123 | # close maxload for now... we'll open it every time we want |
---|
| 124 | # to check it, so that it can be updated on-the-fly |
---|
| 125 | print STDERR "Host check: Making sure all listed hosts are reachable,\n"; |
---|
| 126 | print STDERR " and have proper ssh permissions...\n"; |
---|
| 127 | for ($n=0; ($line = <MAXLOAD>); $n++) { |
---|
| 128 | ($host[$n], $limit[$n]) = split(' ', $line); |
---|
| 129 | |
---|
| 130 | # First just run simple rsh, to verify host is usable. |
---|
| 131 | if ($limit[$n] >= 1) { |
---|
| 132 | # since rsh is disabled on most machines, I changed this to ssh - leslie |
---|
| 133 | $errstat = `ssh $host[$n] date\n`; |
---|
| 134 | # $errstat = `rsh $host[$n] date\n`; |
---|
| 135 | if ($?) { |
---|
| 136 | die "Error: `ssh $host[$n] date` failed. Fix ssh settings,\n". |
---|
| 137 | "or remove host $host[$n] from loadlimit file: $maxloadfile.\n"; |
---|
| 138 | # die "Error: `rsh $host[$n] date` failed. Fix rsh settings,\n". |
---|
| 139 | # "or remove host $host[$n] from loadlimit file: $maxloadfile.\n"; |
---|
| 140 | } |
---|
| 141 | } else { |
---|
| 142 | print STDERR "$host[$n]: Skipping ssh check, limit less than 1.\n"; |
---|
| 143 | } |
---|
| 144 | |
---|
| 145 | # ok, so ssh works. Get initial uptime info. |
---|
| 146 | $load[$n] = &uptime($host[$n]); |
---|
| 147 | $oldload[$n] = $load[$n]; |
---|
| 148 | $pending[$n] = 0; |
---|
| 149 | print STDERR "Using host $host[$n], load $load[$n], limit $limit[$n].\n"; |
---|
| 150 | $guessload[$n] = 0; |
---|
| 151 | |
---|
| 152 | } |
---|
| 153 | close(MAXLOAD); |
---|
| 154 | } |
---|
| 155 | |
---|
| 156 | # Figure out and open command file... |
---|
| 157 | $commandfile = $ARGV[1]; |
---|
| 158 | if ($commandfile eq '-') { |
---|
| 159 | open(COMMAND, "&STDIN"); |
---|
| 160 | $ncommands = "???"; |
---|
| 161 | } elsif (substr($commandfile,0,1) eq "-") { |
---|
| 162 | print STDERR "Unknown flag: $ARGV[1]...\n"; |
---|
| 163 | &printUsage; |
---|
| 164 | } else { |
---|
| 165 | ($ncommands, @rest) = split(' ', `wc -l $commandfile\n`); |
---|
| 166 | if (!open(COMMAND, $commandfile)) { |
---|
| 167 | print STDERR "Unable to open command file $commandfile...\n"; |
---|
| 168 | &printUsage; |
---|
| 169 | } |
---|
| 170 | } |
---|
| 171 | } |
---|
| 172 | |
---|
| 173 | # Parse options |
---|
| 174 | $currarg = 2; |
---|
| 175 | while ($currarg <= $#ARGV) { |
---|
| 176 | # Handle -logdir |
---|
| 177 | if ($ARGV[$currarg] eq "-logdir") { |
---|
| 178 | $LOGDIR = $ARGV[$currarg+1]; |
---|
| 179 | $currarg +=2; |
---|
| 180 | # check arg existed |
---|
| 181 | if ($LOGDIR eq "") { |
---|
| 182 | print STDERR "Error: no logdir???\n\n"; |
---|
| 183 | &printUsage(); |
---|
| 184 | } |
---|
| 185 | # add absolute path to logdir |
---|
| 186 | if (substr($LOGDIR, 0, 1) ne "/") { |
---|
| 187 | $PWD = `pwd`; chop $PWD; |
---|
| 188 | $LOGDIR = "$PWD/$LOGDIR"; |
---|
| 189 | } |
---|
| 190 | # Add /n/basehost, if necessary |
---|
| 191 | if (substr($LOGDIR, 0, length($AUTOMOUNTPREFIX)) ne |
---|
| 192 | $AUTOMOUNTPREFIX) { |
---|
| 193 | $LOGDIR = $AUTOMOUNTPREFIX.$BASEHOST.$LOGDIR; |
---|
| 194 | } |
---|
| 195 | |
---|
| 196 | } elsif ($ARGV[$currarg] eq "-noxload") { |
---|
| 197 | $DOXLOAD = 0; |
---|
| 198 | $currarg++; |
---|
| 199 | } elsif ($ARGV[$currarg] eq "-xload") { |
---|
| 200 | $DOXLOAD = 1; |
---|
| 201 | $currarg++; |
---|
| 202 | } else { |
---|
| 203 | print STDERR "Error: Unhandled arg $ARGV[$currarg].\n\n"; |
---|
| 204 | &printUsage(); |
---|
| 205 | } |
---|
| 206 | } |
---|
| 207 | |
---|
| 208 | # Make sure logdir is usable |
---|
| 209 | if (-e $LOGDIR) { |
---|
| 210 | if (-d $LOGDIR) { |
---|
| 211 | -x $LOGDIR || die "Error: logdir $LOGDIR does not have execute permissions\n"; |
---|
| 212 | -w $LOGDIR || die "Error: logdir $LOGDIR does not have write permissions\n"; |
---|
| 213 | |
---|
| 214 | # print STDERR "Note, loadbalance using existing logdir $LOGDIR...\n"; |
---|
| 215 | # Clear any old loadbalance logfiles |
---|
| 216 | $cmd = "cd $LOGDIR; /bin/ls | /bin/egrep '\$loadbalance_' | ". |
---|
| 217 | "xargs /bin/rm -f\n"; |
---|
| 218 | print STDERR "Clearing old log files...."; |
---|
| 219 | system $cmd; |
---|
| 220 | print STDERR "Done.\n"; |
---|
| 221 | |
---|
| 222 | } else { |
---|
| 223 | print STDERR "Error: loadbalance: logdir $LOGDIR exists, \n". |
---|
| 224 | "and is not a directory.\n"; |
---|
| 225 | printUsage(); |
---|
| 226 | } |
---|
| 227 | } else { |
---|
| 228 | # make logdir |
---|
| 229 | $errmsg = `mkdir $LOGDIR\n`; |
---|
| 230 | if ($?) { |
---|
| 231 | die "Error: Could not mkdir $LOGDIR\n"; |
---|
| 232 | } |
---|
| 233 | } |
---|
| 234 | |
---|
| 235 | # Pop up an xload window for fun.... :-) |
---|
| 236 | if ($DOXLOAD) { |
---|
| 237 | print STDERR "Starting xloads..."; |
---|
| 238 | for ($n=0; $n <= $#host; $n++) { |
---|
| 239 | if ($limit[$n] >= 1) { |
---|
| 240 | $scale = int($limit[$n]) + 1; |
---|
| 241 | $geomscale = ($scale+1) * 20; |
---|
| 242 | $cmd = "ssh $host[$n] xload -fg green -hl blue -bg black ". |
---|
| 243 | "-scale $scale -geom 300x$geomscale &\n"; |
---|
| 244 | # $cmd = "rsh $host[$n] xload -fg green -hl blue -bg black ". |
---|
| 245 | # "-scale $scale -geom 300x$geomscale &\n"; |
---|
| 246 | # print ($cmd); |
---|
| 247 | system($cmd); |
---|
| 248 | } |
---|
| 249 | } |
---|
| 250 | print STDERR "Done!\n"; |
---|
| 251 | } |
---|
| 252 | |
---|
| 253 | |
---|
| 254 | ###################################################################### |
---|
| 255 | ########## |
---|
| 256 | ########## Main Loop |
---|
| 257 | ########## |
---|
| 258 | ###################################################################### |
---|
| 259 | |
---|
| 260 | |
---|
| 261 | # Loop until commands are exhausted |
---|
| 262 | for ($cmdno=1; ($commandline = <COMMAND>); $cmdno++) { |
---|
| 263 | chop($commandline); |
---|
| 264 | # Find a host for ssh. Wait, if necessary... |
---|
| 265 | $host = &findHost(); |
---|
| 266 | $psfile = &addps($host, $cmdno); |
---|
| 267 | # print STDERR "Adding psfile: $psfile....\n"; |
---|
| 268 | |
---|
| 269 | # run 1 copy of the program |
---|
| 270 | # Note, the command must be run in the background, or else this |
---|
| 271 | # script will never run more than 1 at a time... :-) |
---|
| 272 | # |
---|
| 273 | # Also note, that this does not add the "npri -h 250", which |
---|
| 274 | # cuts down the priority of the process. (Some things, like |
---|
| 275 | # cd, don't work with npri). So you'll want to add that |
---|
| 276 | # yourself into the commmand line... :-) |
---|
| 277 | # $cmd="rsh $host \"npri -h 250 $commandline\" &\n"; |
---|
| 278 | |
---|
| 279 | # Redirect STDOUT, STDERR to log files |
---|
| 280 | open(SAVEOUT, ">&STDOUT"); |
---|
| 281 | open(SAVEERR, ">&STDERR"); |
---|
| 282 | $stdoutname = $psfile; $stdoutname =~ s/_ps_/_stdout_/; |
---|
| 283 | $stderrname = $psfile; $stderrname =~ s/_ps_/_stderr_/; |
---|
| 284 | |
---|
| 285 | # Set up the command... |
---|
| 286 | $cmdname = $psfile; $cmdname =~ s/_ps_/_cmd_/; |
---|
| 287 | open(CMD, ">$cmdname"); |
---|
| 288 | $cmdlines = $commandline; |
---|
| 289 | $cmdlines =~ s/;/\n/g; |
---|
| 290 | print CMD "$cmdlines\n"; |
---|
| 291 | print CMD "/bin/rm $psfile\n"; |
---|
| 292 | close(CMD); |
---|
| 293 | # $cmd = "rsh $host npri -h 250 csh $cmdname &\n"; |
---|
| 294 | $cmd = "ssh $host /bin/nice -20 csh -ef $cmdname &\n"; |
---|
| 295 | # $cmd = "rsh $host /bin/nice -20 csh -ef $cmdname &\n"; |
---|
| 296 | print STDERR $cmd; |
---|
| 297 | |
---|
| 298 | open(STDOUT, ">$stdoutname"); |
---|
| 299 | open(STDERR, ">$stderrname"); |
---|
| 300 | select(STDERR); $| = 1; |
---|
| 301 | select(STDOUT); $| = 1; |
---|
| 302 | |
---|
| 303 | # Actually run the ssh command |
---|
| 304 | system($cmd); |
---|
| 305 | |
---|
| 306 | #restore STDOUT, STDERR |
---|
| 307 | open(STDOUT, ">&SAVEOUT"); |
---|
| 308 | open(STDERR, ">&SAVEERR"); |
---|
| 309 | |
---|
| 310 | } |
---|
| 311 | |
---|
| 312 | # Once all the commands are started, wait for them all to |
---|
| 313 | # finish |
---|
| 314 | $nleft = &countallps(); |
---|
| 315 | print STDERR "loadbalance waiting for $nleft processes to finish...\n"; |
---|
| 316 | while ($nleft > 0) { |
---|
| 317 | $oleft = $nleft; |
---|
| 318 | $nleft = &countallps(); |
---|
| 319 | $nleft = int($nleft); |
---|
| 320 | |
---|
| 321 | if ($nleft != $oleft) { |
---|
| 322 | print STDERR "$nleft..."; |
---|
| 323 | } |
---|
| 324 | sleep 4; |
---|
| 325 | } |
---|
| 326 | print STDERR " Done!\n"; |
---|
| 327 | |
---|
| 328 | |
---|
| 329 | |
---|
| 330 | |
---|
| 331 | |
---|
| 332 | ###################################################################### |
---|
| 333 | ########## |
---|
| 334 | ########## Helper functions |
---|
| 335 | ########## |
---|
| 336 | ###################################################################### |
---|
| 337 | |
---|
| 338 | |
---|
| 339 | # Find a host for ssh. Wait, if necessary. |
---|
| 340 | sub findHost { |
---|
| 341 | while (1) { |
---|
| 342 | # Make sure we have reloaded maxload file, if necessary. |
---|
| 343 | if (!$midmaxloadfile) { |
---|
| 344 | open(MAXLOAD, $maxloadfile); |
---|
| 345 | for ($n=0; ($line = <MAXLOAD>); $n++) { |
---|
| 346 | @words = split(' ', $line); |
---|
| 347 | $host[$n] = $words[0]; |
---|
| 348 | $limit[$n] = $words[1]; |
---|
| 349 | $minlimit[$n] = $words[2]; |
---|
| 350 | } |
---|
| 351 | close(MAXLOAD); |
---|
| 352 | $midmaxloadfile = 1; |
---|
| 353 | } |
---|
| 354 | |
---|
| 355 | # Run through the list, checking uptime... |
---|
| 356 | for ($n=0; $n <= $#host; $n++) { |
---|
| 357 | # Skip machines with limit 0 |
---|
| 358 | if ($limit[$n] == 0) { |
---|
| 359 | next; |
---|
| 360 | } |
---|
| 361 | # Compute guessload, our guess of the what the load "should" |
---|
| 362 | # be.... |
---|
| 363 | $load[$n] = &uptime($host[$n]); |
---|
| 364 | $guessload = $load[$n]; |
---|
| 365 | if ($load[$n] > $oldload[$n]) { |
---|
| 366 | # Add derivative to load if rising... |
---|
| 367 | $load += (60.0 / $TIMEINTERVAL) * ($load[$n] - $oldload[$n]); |
---|
| 368 | } |
---|
| 369 | $oldload[$n] = $guessload; |
---|
| 370 | |
---|
| 371 | # Count number of processes still running on this host |
---|
| 372 | $pscount = 0+ &countps($host[$n]); |
---|
| 373 | |
---|
| 374 | # Reduce pending to pscount, if it's too big |
---|
| 375 | # Since the pending load (from our jobs) cannot be larger |
---|
| 376 | # than the total number of (our) jobs |
---|
| 377 | $pending[$n] = $pscount if ($pending[$n] > $pscount); |
---|
| 378 | |
---|
| 379 | if (($guessload + $pending[$n] + 1 <= $limit[$n] && |
---|
| 380 | $pscount +1 <= $limit[$n]) || |
---|
| 381 | $minlimit[$n] > $pscount) { |
---|
| 382 | # We found a processor to use.... |
---|
| 383 | print STDERR "========\n"; |
---|
| 384 | $icommand++; |
---|
| 385 | print STDERR "Using $host[$n] ($icommand of $ncommands): load: $load[$n], guess: $guessload,". |
---|
| 386 | " pending: $pending[$n], limit: $limit[$n], ". |
---|
| 387 | "pscount: $pscount ...\n"; |
---|
| 388 | # add 1.1, to be cautious... |
---|
| 389 | $pending[$n] += 1.1; |
---|
| 390 | return($host[$n]); |
---|
| 391 | } |
---|
| 392 | } |
---|
| 393 | # Print waiting message so people know it's still alive... |
---|
| 394 | |
---|
| 395 | print STDOUT "Uptimes: "; |
---|
| 396 | for ($n=0; $n <= $#host; $n++) { |
---|
| 397 | print STDOUT $host[$n]." ".$load[$n].", "; |
---|
| 398 | } |
---|
| 399 | print "\r"; |
---|
| 400 | |
---|
| 401 | # If we get here, we ran through the whole list. |
---|
| 402 | # wait 8 seconds, decay pending, and loop again... |
---|
| 403 | $midmaxloadfile = 0; |
---|
| 404 | sleep $TIMEINTERVAL; |
---|
| 405 | for ($n=0; $n <= $#pending; $n++) { |
---|
| 406 | $pending[$n] *= $STEPDECAY; |
---|
| 407 | } |
---|
| 408 | |
---|
| 409 | } |
---|
| 410 | } |
---|
| 411 | |
---|
| 412 | # Add a file to LOGDIR, to record that a process is running. |
---|
| 413 | # The process will remove it when it's done. |
---|
| 414 | # returns the name of the file. |
---|
| 415 | sub addps { |
---|
| 416 | $host = $_[0]; |
---|
| 417 | $cmdno = $_[1]; |
---|
| 418 | $basehost = `hostname`; |
---|
| 419 | chop $basehost; |
---|
| 420 | $psfile = "$LOGDIR/loadbalance_ps_".$host."_$cmdno"; |
---|
| 421 | system("touch $psfile\n"); |
---|
| 422 | return $psfile; |
---|
| 423 | } |
---|
| 424 | |
---|
| 425 | # Count the files in LOGDIR for a particular host, to know |
---|
| 426 | # how many processes we are running there.... |
---|
| 427 | sub countps { |
---|
| 428 | $host = $_[0]; |
---|
| 429 | $basehost = `hostname`; |
---|
| 430 | chop $basehost; |
---|
| 431 | $cmd = "ls $LOGDIR | grep loadbalance_ps_".$host." | wc -l"; |
---|
| 432 | $pscount = `$cmd`; |
---|
| 433 | return $pscount; |
---|
| 434 | } |
---|
| 435 | |
---|
| 436 | # Count the files in /LOGDIR for all hosts, to know the total number |
---|
| 437 | # of running processes... |
---|
| 438 | sub countallps { |
---|
| 439 | $basehost = `hostname`; |
---|
| 440 | chop $basehost; |
---|
| 441 | $cmd = "ls $LOGDIR | grep loadbalance_ps_ | wc -l"; |
---|
| 442 | $pscount = `$cmd`; |
---|
| 443 | return $pscount; |
---|
| 444 | } |
---|
| 445 | |
---|
| 446 | |
---|
| 447 | # Get the uptime on a remote system |
---|
| 448 | sub uptime { |
---|
| 449 | $host = $_[0]; |
---|
| 450 | # Run uptime to get the system load |
---|
| 451 | # Port to Linux - Changing rup to ssh <machine> uptime since it looks like rup is not |
---|
| 452 | # part of the default Linux install. |
---|
| 453 | |
---|
| 454 | local($loadstr) = `ssh $host uptime`; |
---|
| 455 | # local($loadstr) = `rup $host\n`; |
---|
| 456 | |
---|
| 457 | # Find the word after "average:" |
---|
| 458 | @words = split(' ', $loadstr); |
---|
| 459 | |
---|
| 460 | $upt = ""; |
---|
| 461 | for ($i=0; $i <= $#words; $i++) { |
---|
| 462 | if ($words[$i] eq "average:") { |
---|
| 463 | $upt = $words[$i+1]; |
---|
| 464 | } |
---|
| 465 | } |
---|
| 466 | if ($upt eq "") { |
---|
| 467 | print STDERR "WARNING! ssh $host uptime failed!\n"; |
---|
| 468 | return(99999999); |
---|
| 469 | } |
---|
| 470 | # Chop off comma, return uptime... |
---|
| 471 | chop($upt); |
---|
| 472 | return($upt + 0); |
---|
| 473 | } |
---|
| 474 | |
---|
| 475 | |
---|