#!/bin/sh
##
## A monitoring agent for the netguard server.
##
## Copyright (c) 1993, 1994
##
## S. Schoek, J. Schoenwaelder
## TU Braunschweig, Germany
## Institute for Operating Systems and Computer Networks
##
## Permission to use, copy, modify, and distribute this
## software and its documentation for any purpose and without
## fee is hereby granted, provided that this copyright
## notice appears in all copies.  The University of Braunschweig
## makes no representations about the suitability of this
## software for any purpose.  It is provided "as is" without
## express or implied warranty.
##

# Tcl sees the next lines as an assignment to variable `kludge'.
# For sh, the two shifts cancel the effect of the set, and then we
# run scotty on this script.

set kludge { $*
    shift
    shift
    if test -x ../scotty ; then
      exec ../scotty -nf $0 $*
    else
      exec scotty -nf $0 $*
    fi
}

##
## The following global variables are used to control the monitoring
## script. Some of the are initialized at the end of this script.
##

set server ""
set port ""
set tool_name ""
set interval 60

set community   public
set snmp_port   161

##
## Write background error messages to stderr.
##

proc scottyerror { msg } { 
    global errorInfo
    puts stderr "$msg\n$errorInfo"
}

##
## Delete an element from a list.
##
 
proc ldelete { list element } {
    upvar $list mylist
    set result ""
    foreach e $mylist {
        if {$e != $element} { lappend result $e }
    }
    return $result
}

##
## Connect back to the netguard server. Returns a file descriptor
## or an empty string if it fails for some reason. The file descriptor
## is also saved in the global variable socket.
##

proc connect { host port } {

    global socket tool_name

    if {[catch {tcp connect $host $port} socket]} {
        syslog error "$tool_name: $socket"
	catch { 
	    unset socket 
	}
        return ""
    }

    syslog info "$tool_name: connect"

    addinput $socket "readserver $socket"

    return $socket
}

##
## Disconnect from the netguard server. This proc also removes the
## global variable socket.
##

proc disconnect {} {
    
    global socket tool_name

    catch {
	removeinput $socket
	tcp close $socket
	unset socket
    }

    syslog info "$tool_name: disconnect"
    exit
}

##
## Read a message from the server and do what the master requests
## us to do. This proc gets called from scottys event manager.
##

proc readserver { socket } {

    global tool_name

    if {[catch {gets $socket} answer]} {
	syslog error "$tool_name: $answer"
	disconnect
	return
    }

    if {[eof $socket]} {
	syslog error "$tool_name: server is gone"
	disconnect
	return
    }

    set action [lindex $answer 0]
    set cmd    [lindex $answer 1]
    set result [lindex $answer 2]

    if {$action == "ok"} {
	switch $cmd {
	    {accepted} {
		catch {
		    puts $socket "name $tool_name"
		    flush $socket
		}
		return
	    }
	    {close} {
		syslog debug "$tool_name: closed connection to server"
		disconnect
		exit
	    }
	    {var_data} {
		prepare $result
		syslog debug "$tool_name: $action $cmd $result"
		return		
	    }
	    {default} {
		syslog debug "$tool_name: unknown cmd: $action $cmd $result"
	    }
	}
    }

}

##
## try to send data from hosts to control_daemon
## format data vars {{data} .. {data} {timestamp}}
## there is only one result written to socket, no data is collected
##

proc send_data {} {
    global tool_name
    global variable_list
    global node_list
    global trouble_result
    global socket
    global time
    
    set result ""
    foreach node $node_list {
	set host [lindex $node 0]
	foreach var [lindex $node 1] {
	    set var_name [lindex $var 0]
	    if {[info exists variable_list($host,$var_name,data)]} {
		set res $variable_list($host,$var_name,tag)
		lappend res $variable_list($host,$var_name,data)
		unset variable_list($host,$var_name,data)
		lappend result $res
	    }
	}
    }

    if {$result != ""} {
	lappend result $time(time_stamp)
	set mess "data values"
	lappend mess $result
	if {[catch {puts $socket $mess; flush $socket} result]} {
	    syslog error "$tool_name: $result"
	    disconnect
	}
    }
    catch {unset time(time_stamp)}
}

##
##  store information about variables and hosts in datastructures
##

##
## prepare the config-list for measurement
##

proc prepare { list } {
    global tool_name
    global snmp_port
    global community
    global variable_list
    global node_list
    global error_list

    if {[info exists node_list]} {return}
    foreach node $list {
	set host [lindex $node 0]
	set error 0
	if {$host == "ciscobs.rz"} {
	    set port 161 
	} else {
	    set port $snmp_port
	} 
	set ip [lindex $node 2]
	if {[catch {snmp open $ip} snmp_handle]} {
	    syslog error "$tool_name: snmp open: $snmp_handle"
	    set list [ldelete list $node]
	    continue
	}
	catch {snmp default $snmp_handle community $community}
	catch {snmp default $snmp_handle port $port}
	foreach var [lindex $node 1] {
	    set get_bulk_vars ""
	    set var_name [lindex $var 0]
	    set ext [expr {[lindex [split $var_name .] 1] - 1}]
	    set snmp_var_name \
                 "interfaces.ifTable.ifEntry.[lindex [split $var_name .] 0]"
	    if {[catch {
		snmp getbulk $snmp_handle $snmp_var_name x {
		    set type [lindex $x 1]
		    if {[lsearch "Gauge Counter INTEGER" $type] >= 0} {
			lappend get_bulk_vars [lindex $x 0]
		    }
		}
	    } msg ]} {
		set error 1
		syslog error "$tool_name: $msg for $host"
		catch {snmp close $snmp_handle}
		break
	    }
	    set variable_list($host,$var_name,table_index) \
                [lindex $get_bulk_vars $ext]
	    set variable_list($host,$var_name,interval)    [lindex $var 1]
	    set variable_list($host,$var_name,rem_time)    [lindex $var 1]
	    set variable_list($host,$var_name,tag)         [lindex $var 2]
	    lappend node $snmp_handle
	}
	if {$error} { continue }
	lappend node_list $node
	set error_list($host) no_error
    }
}

##
## procedures for doing the measurementjob
## -- get data and calculate data (aggregate)
##

##
## Compute the diff between two rstat calls.
## and calculate and compare the interval time of the specified variable
##

proc rstat_diff {l1 l2 period host} {
    global variable_list
    global node_list
    global interval

    set len [llength $l1]
    set res ""
    ## if one argument is -1 then host was not reachable
    ## we set then the value, if the variables intervaltime
    ## is reached to 0, until the host is again reachable
    ## 
    if {($l1 == -1) || ($l2 == -1)} {
	foreach node $node_list {
	    set host1 [lindex $node 0] 
	    if {$host1 == $host} {
		foreach var [lindex $node 1] {
		    set var [lindex $var 0]
		    set time \
                    [expr {$variable_list($host1,$var,rem_time) - $interval}]
		    set variable_list($host1,$var,rem_time) $time 
		    if {$variable_list($host1,$var,rem_time) <= 0} { 
			set time $variable_list($host1,$var,interval)
			set variable_list($host1,$var,rem_time) $time
			set variable_list($host1,$var,data) 0
		    }
		}
		break
	    }
	}
	return
    }
    ## here we calculate the result for one periode
    ## but only, if the variable shall be measured in this periode
    for {set i "0"} {$i < $len} {incr i} {
        set el1 [lindex $l1 $i]
        set el2 [lindex $l2 $i]
	set var [lindex $el1 0]
	if {[info exists variable_list($host,$var,tag)]} {
	    set time [expr {$variable_list($host,$var,rem_time) - $interval}]
	    set variable_list($host,$var,rem_time) $time 
	    if {$variable_list($host,$var,rem_time) <= 0} { 
		set time $variable_list($host,$var,interval)
		set variable_list($host,$var,rem_time) $time
		if {[lindex $el1 1] == "Counter"} {
		    set tmp [expr {[lindex $el1 2]-[lindex $el2 2]}]
		    if {$period <= 0} {
			set tmp 0
		    } else {
			set tmp [expr {"$tmp.0" / $period}]
		    }
		} else {
		    if {[lindex $el1 1] == "Gauge"} {
			set tmp [expr {[lindex $el1 2] / 256.0}]
		    } else {
			if {[lindex $el1 1] == "TimeTicks"} {
			    set tmp [lindex $el1 2]
			}
		    }
		}
		set variable_list($host,$var,data) $tmp
	    }
	}
    }
}

##
## try to get information about load of hosts and store them
##

proc snmp_measure {} {
    global tool_name
    global socket
    global node_list
    global variable_list
    global error_list
    global stat
    global interval
    global time

    ## set a readable timestamp
    if {![info exists time(time_stamp)]} {
	set time(time_stamp) [getdate]
    }
    foreach node $node_list  {
	set host [lindex $node 0]
	set ip   [lindex $node 2]
	set snmp_handle [lindex $node 3]
	foreach var [lindex $node 1] { 
	    set var_name [lindex $var 0]
	    set snmp_var_name $variable_list($host,$var_name,table_index)
	    if {[catch {snmp get $snmp_handle $snmp_var_name} \
		 snmp_var_value]}  {
		set result -1
		if {$error_list($host) == "no_error"} {
		    set message "$host $var_name [getdate] no information \
                                 available: $time(time_stamp)"
		    syslog debug "$tool_name: $message"
		    set error_list($host) error
		}
		break
	    } else {
		if {$error_list($host) == "error"} {
		    set message "$host $var_name [getdate] information \
                                 available: $time(time_stamp)"
		    syslog debug "$tool_name: $message"
		}
		lappend result "$var_name [lindex $snmp_var_value 1] \
                                [lindex $snmp_var_value 2]"
		set error_list($host) no_error
	    } 
	    
	} 
	set stat($ip) $result
	set time($ip) [getclock]
    }
}

##
## try to get information about load of hosts and store them
##

proc snmp_measure1 {} {
    global tool_name
    global error_list
    global socket
    global node_list
    global variable_list
    global stat
    global time

    foreach node $node_list  {
	set host [lindex $node 0]
	set ip [lindex $node 2]
	set snmp_handle [lindex $node 3]
	foreach var [lindex $node 1] { 
	    set var_name [lindex $var 0]
	    set snmp_var_name $variable_list($host,$var_name,table_index)
	    if {$stat($ip) != -1} {
		if {[catch {snmp get $snmp_handle $snmp_var_name} \
		     snmp_var_value]} {
                     set result -1
		     if {$error_list($host) == "no_error"} {
			 set message "$host $var_name [getdate] no \
                                   information available: $time(time_stamp)"
			 syslog debug "$tool_name: $message"
			 set error_list($host) error
		     }
		     break
		 } else {
		     if {$error_list($host) == "error"} {
			 set message "$host $var_name [getdate] information \
                                      available: $time(time_stamp)"
			 syslog debug "$tool_name: $message"
		     }
		     lappend result "$var_name [lindex $snmp_var_value 1] \
                                    [lindex $snmp_var_value 2]"
		     set error_list($host) no_error
		 }
	    } else {
		set result -1
	    }
	}
	set now [getclock]
	rstat_diff $result $stat($ip) [expr {$now - $time($ip)}] $host
    }
}

##
## A asynchronous version of the ugly measure loop. This will be removed
## when we get real jobs for all kind of intervals. The jobs will be
## created when a prepare message is received, not here!
##

proc loop {} {

    global socket
    static notfirst

    if {![info exists socket]} {
	job kill [job current]
	exit
    }

    if {![info exists notfirst]} {
	set notfirst 1
    } else {
	catch {
	    snmp_measure1
	    send_data
	}
    }

    snmp_measure
}

##
## Here starts the main script. After some initialization, we fall
## into the event loop processing our jobs avery few seconds.
##

if {[llength $argv] < 2} {
    puts stderr "Usage: snmp_netguard port name"
    flush stderr
    exit
}

set port      [lindex $argv 0]
set tool_name [lindex $argv 1]

if {[catch {exec hostname} server]} {
    syslog error "$tool_name: $server"
    exit
}
set server 127.0.0.1

if {[connect $server $port] == ""} {
    syslog error "$tool_name: can not connect to server"
    exit
}

set delay [expr {(60 - ([getclock] % 60)) * 1000}]
after $delay "job create loop [expr {$interval * 1000}]"
