#!/bin/sh
##
## A monitoring agent for the netguard server.
##
## Copyright (c) 1993, 1994
##
## S. Schoek, J. Schoenwaelder
## TU Braunschweig, Germany
## Institute for Operating Systems and Computer Networks
##
## Permission to use, copy, modify, and distribute this
## software and its documentation for any purpose and without
## fee is hereby granted, provided that this copyright
## notice appears in all copies.  The University of Braunschweig
## makes no representations about the suitability of this
## software for any purpose.  It is provided "as is" without
## express or implied warranty.
##

# Tcl sees the next lines as an assignment to variable `kludge'.
# For sh, the two shifts cancel the effect of the set, and then we
# run scotty on this script.

set kludge { $*
    shift
    shift
    if test -x ../scotty ; then
      exec ../scotty -nf $0 $*
    else
      exec scotty -nf $0 $*
    fi
}

##
## The following global variables are used to control the monitoring
## script. Some of the are initialized at the end of this script.
##

set server ""
set port ""
set tool_name ""
set interval 60

##
## Write background error messages to stderr.
##

proc scottyerror { msg } { 
    global errorInfo
    puts stderr "$msg\n$errorInfo"
}

##
## Delete an element from a list.
##
 
proc ldelete { list element } {
    upvar $list mylist
    set result ""
    foreach e $mylist {
        if {$e != $element} { lappend result $e }
    }
    return $result
}

##
## Connect back to the netguard server. Returns a file descriptor
## or an empty string if it fails for some reason. The file descriptor
## is also saved in the global variable socket.
##

proc connect { host port } {

    global socket tool_name

    if {[catch {tcp connect $host $port} socket]} {
        syslog error "$tool_name: $socket"
	catch { 
	    unset socket 
	}
        return ""
    }

    syslog info "$tool_name: connect"

    addinput $socket "readserver $socket"

    return $socket
}

##
## Disconnect from the netguard server. This proc also removes the
## global variable socket.
##

proc disconnect {} {
    
    global socket tool_name

    catch {
	removeinput $socket
	tcp close $socket
	unset socket
    }

    syslog info "$tool_name: disconnect"
    exit
}

##
## Read a message from the server and do what the master requests
## us to do. This proc gets called from scottys event manager.
##

proc readserver { socket } {

    global tool_name

    if {[catch {gets $socket} answer]} {
	syslog error "$tool_name: $answer"
	disconnect
	return
    }

    if {[eof $socket]} {
	syslog error "$tool_name: server is gone"
	disconnect
	return
    }

    set action [lindex $answer 0]
    set cmd    [lindex $answer 1]
    set result [lindex $answer 2]

    if {$action == "ok"} {
	switch $cmd {
	    {accepted} {
		catch {
		    puts $socket "name $tool_name"
		    flush $socket
		}
		return
	    }
	    {close} {
		syslog debug "$tool_name: closed connection to server"
		disconnect
		exit
	    }
	    {var_data} {
		prepare $result
		syslog debug "$tool_name: $action $cmd $result"
		return		
	    }
	    {default} {
		syslog debug "$tool_name: unknown cmd: $action $cmd $result"
	    }
	}
    }

}

##
## try to send data from hosts to control_daemon
## format data vars {{data} .. {data} {timestamp}}
## there is only one result written to socket, no data is collected
##

proc send_data {} {
    global tool_name
    global variable_list
    global node_list
    global trouble_result
    global socket
    global time
    
    set result ""
    foreach node $node_list {
	set host [lindex $node 0]
	foreach var [lindex $node 1] {
	    set var_name [lindex $var 0]
	    if {[info exists variable_list($host,$var_name,data)]} {
		set res $variable_list($host,$var_name,tag)
		lappend res $variable_list($host,$var_name,data)
		unset variable_list($host,$var_name,data)
		lappend result $res
	    }
	}
    }
    if {$result != ""} {
	lappend result $time(time_stamp)
	set mess "data values"
	lappend mess $result
	if {[catch {puts $socket $mess; flush $socket} result]} {
	    syslog error "$tool_name: $result"
	    disconnect
	}
    }
    catch {unset time(time_stamp)}
}

##
##  store information about variables and hosts in datastructures
##

##
## prepare the config-list for measurement
##

proc prepare {list} {
    global variable_list
    global node_list
    global error_list

    if {[info exists node_list]} {return}
    foreach node $list {
	set host [lindex $node 0] 
	foreach var [lindex $node 1] {
	    set var_name [lindex $var 0]
	    lappend variable_list($host,$var_name,interval) [lindex $var 1]
	    lappend variable_list($host,$var_name,rem_time) [lindex $var 1]
	    lappend variable_list($host,$var_name,tag) [lindex $var 2]
	}
	lappend node_list $node
	set error_list($host) no_error
    }
}

##
## procedures for doing the measurementjob
## -- get data and calculate data (aggregate)
##

##
## Compute the diff between two rstat calls.
## and calculate and compare the interval time of the specified variable
##

proc rstat_diff {l1 l2 period host} {
    global variable_list
    global node_list
    global interval

    set len [llength $l1]
    set res ""
    ## if one argument is -1 then host was not reachable
    ## we set then the value, if the variables intervaltime
    ## is reached to 0, until the host is again reachable
    ## 
    if {($l1 == -1) || ($l2 == -1)} {
	foreach node $node_list {
	    set host1 [lindex $node 0] 
	    if {$host1 == $host} {
		foreach var [lindex $node 1] {
		    set var [lindex $var 0]
		    set time \
                    [expr {$variable_list($host1,$var,rem_time) - $interval}]
		    set variable_list($host1,$var,rem_time) $time 
		    if {$variable_list($host1,$var,rem_time) <= 0} { 
			set time $variable_list($host1,$var,interval)
			set variable_list($host1,$var,rem_time) $time
			set variable_list($host1,$var,data) 0
		    }
		}
		break
	    }
	}
	return
    }
    ## here we calculate the result for one periode
    ## but only, if the variable shall be measured in this periode
    for {set i "0"} {$i < $len} {incr i} {
        set el1 [lindex $l1 $i]
        set el2 [lindex $l2 $i]
	set var [lindex $el1 0]
        set tmp [lindex $el1 2]
	if {[info exists variable_list($host,$var,tag)]} {
	    set time [expr {$variable_list($host,$var,rem_time) - $interval}]
	    set variable_list($host,$var,rem_time) $time 
	    if {$variable_list($host,$var,rem_time) <= 0} { 
		set time $variable_list($host,$var,interval)
		set variable_list($host,$var,rem_time) $time
		if {[lindex $el1 1] == "Counter"} {
		    set tmp [expr {[lindex $el1 2]-[lindex $el2 2]}]
		    if {$period <= 0} {
			set tmp 0
		    } else {
			set tmp [expr {"$tmp.0" / $period}]
		    }
		} else {
		    if {[lindex $el1 1] == "Gauge"} {
			set tmp [expr {[lindex $el1 2] / 256.0}]
		    } else {
			if {[lindex $el1 1] == "TimeTicks"} {
			    set tmp [lindex $el1 2]
			}
		    }
		}
		set variable_list($host,$var,data) $tmp
	    }
	}
    }
}

##
## try to get information about load of hosts and store them
##

proc load {} {
    global tool_name
    global socket
    global node_list
    global error_list
    global stat
    global interval
    global time
    
    ## set a readable timestamp
    if {![info exists time(time_stamp)]} {
	set time(time_stamp) [getdate]
    }
    foreach node $node_list  {
	set host [lindex $node 0]
	set ip [lindex $node 2]
	if {[catch {sunrpc stat $ip} res]} {
	    set res -1
	    if {$error_list($host) == "no_error"} {
		set message "$host $ip at [getdate] not reachable interval: \
                             $time(time_stamp)"
	 	syslog debug "$tool_name: $message"
		set error_list($host) error
	    } 
	} else {
	    if {$error_list($host) == "error"} {
		set message "$host $ip at [getdate] reachable interval: \
                             $time(time_stamp)"
	 	syslog debug "$tool_name: $message"
	    }
	    set error_list($host) no_error
	}
	set stat($ip) $res
	set time($ip) [getclock]
    }
}

##
## try to get information about load of hosts and store them
##

proc load1 {} {
    global tool_name
    global error_list
    global socket
    global node_list
    global stat
    global time
    
    foreach node $node_list  {
	set host [lindex $node 0]
	set ip [lindex $node 2]
	if {$stat($ip) != -1} {
	    if {[catch {sunrpc stat $ip} res]} { 
		set res -1
		if {$error_list($host) == "no_error"} {
		    set message "$host $ip at [getdate] not reachable \
                                 interval: $time(time_stamp)"
		    syslog debug "$tool_name: $message"
		    set error_list($host) error
		}
	    } else {
		if {$error_list($host) == "error"} {
		    set message "$host $ip at [getdate] reachable interval: \
                                 $time(time_stamp)"
		    syslog debug "$tool_name: $message"
		}
		set error_list($host) no_error
	    }
	} else {
	    set res -1
	}
	set now [getclock]
	rstat_diff $res $stat($ip) [expr {$now - $time($ip)}] $host
    }
}

##
## A asynchronous version of the ugly measure loop. This will be removed
## when we get real jobs for all kind of intervals. The jobs will be
## created when a prepare message is received, not here!
##

proc loop {} {

    global socket
    static notfirst

    if {![info exists socket]} {
	job kill [job current]
	exit
    }

    if {![info exists notfirst]} {
	set notfirst 1
    } else {
	catch {
	    load1
	    send_data
	}
    }

    load
}

##
## Here starts the main script. After some initialization, we fall
## into the event loop processing our jobs avery few seconds.
##

if {[llength $argv] < 2} {
    puts stderr "Usage: ip_netguard port name"
    flush stderr
    exit
}

set port      [lindex $argv 0]
set tool_name [lindex $argv 1]

if {[catch {exec hostname} server]} {
    syslog error "$tool_name: $server"
    exit
}
set server 127.0.0.1

if {[connect $server $port] == ""} {
    syslog error "$tool_name: can not connect to server"
    exit
}

set delay [expr {(60 - ([getclock] % 60)) * 1000}]
after $delay "job create loop [expr {$interval * 1000}]"
