#!/usr/bin/perl

# oh-usage-monitor.pl
# $Date: 2008/04/03 12:48:33 $ $Revision: 1.38 $

# Use this script to monitor the resource usage of your OpenHosting
# VPS on an ongoing basis.

# OpenHosting will send you email if your usage looks like it's going
# to exceed your commit level.  There are two reasons why this level
# of monitoring isn't sufficient:

# 1. If your typical usage is between two commit levels, you'll
#    probably set your commit level to the lower one on purpose, which
#    means that you'll always exceed it, so the notification that
#    OpenHosting provides doesn't tell you anything useful.

# 2. If there is an unexpected spike in your usage at any point during
#    the month after you've already been notified by OpenHosting, you
#    won't find out about it.  For example, if a process in your VPS
#    starts spinning and consuming all available CPU, you could end up
#    paying a huge amount of money for weeks of useless CPU usage
#    without even noticing what's going on until you get your bill at
#    the end of the month.

# This script periodically polls the usage page for your VPS on the
# OpenHosting Web site, stores the resulting data, and produces
# expected usage estimates for the current month, alerts when usage
# seems to have spiked, a recommendation for what your commit level
# should be for next month, and graphs of usage trends for various
# time periods.

# This script requires a number of Perl modules which you can download
# and install from CPAN if you don't have them already (if you are
# missing any, Perl will tell you when you try to run the script).

# This script also requires a MySQL database into which to save the
# usage data that it collects.

# Here's how to use the script:

# 1. Save it into a file and make it executable.

# 2. Run "oh-usage-monitor.pl --config-template", which will create
#    the file $HOME/.oh-web-auth.pl.  Load this file into an editor
#    and make the appropriate changes to it.

# 3. Run "oh-usage-monitor.pl --db-template > oh-usage.sql".  This
#    will generate the SQL code you need to create the MySQL user and
#    database.

# 4. Execute the resulting file as a SQL script against your MySQL
#    server, e.g.: "mysql -u root -p --batch < oh-usage.sql".

# Now, you should be ready to go.  If you run the script with no
# arguments, it will fetch your current usage data from OpenHosting
# and store it in the database.  The first time you run it, it will
# average your cumulative usage for the month back to the start of the
# month.  After that, you should run it either once per hour at the
# half hour mark, or twice per hour at 15 and 45 minutes past the
# hour, to update the database with new usage data.

# Note that running the script with no arguments doesn't produce any
# reports or recommendations; it just fetches usage data and inserts
# it into the database.  Here are the command-line options you can
# specify to make the script actually do stuff:

# --help	Print a usage message and exit.
# --verbose	Generate verbose output (doesn't really do much).
# --config-file	Use a configuration file other than ~/.oh-web-auth.pl.
# --mail-to	Specify the email address to which reports should be
# 		sent.  Defaults to $MAILTO or $USER or $LOGNAME or
#		"root".
# --history	Up to how many time intervals of history to fetch for
# 		the usage graphs and comparisons
# --nofetch	Don't try to fetch new data from the Web site -- just
#		operate on existing data.
# --report	Generate a usage report by email.
# --report-if-new Generate a usage report if new data was fetched.
# --check	Check if usage appears to have spiked, and generate a
#		report if it has.
# --threshold	Threshold for usage spikes (standard deviations above
#		average, default is 2).
# --[no]average	If there appears to be a gap in the data, then average
#		the new usage hourly between the last fetched data and
#		the current fetched data, rather than putting all the
#		new usage at the current time (or don't average if
#		"no" prefix is specified).  Defaults to true.
# --recommend	Generate recommendations for next month's commit
#		level.  Defaults to true if it's less than a week
#		before the end of the month.
# --as-of	Generate a report as of the specified time (parsed
#		using Date::Parse, so the format is pretty flexible).
#		Fetch is turned off automatically when --as-of is
#		used.
# --[no]randomize If true, then pause for a random interval of up to
#		five minutes before fetching data from the server.
#		Defaults to false if script is running in a terminal,
#		true otherwise.  This is to prevent the OpenHosting
#		Web site from being overwhelmed if a bunch of people
#		all call this script from their crontab files at the
#		same time.

# As a sample configuration, here's what I have in my crontab file:

# 15 0 * * *     ~/scripts/oh-usage-monitor.pl --report
# 15 1-23 * * *  ~/scripts/oh-usage-monitor.pl --check
# 45 * * * *     ~/scripts/oh-usage-monitor.pl

# The first line says, "Generate a report once per day at 12:15am no
# matter what."  The second line says, "Fetch new usage data hourly at
# 15 minutes after the hour and generate a report if there's a usage
# spike."  The third line says, "Fetch new usage data hourly at 45
# minutes after the hour, in case the Web site wasn't updated yet a
# half hour ago."

# Please contact me at jik@kamens.brookline.ma.us if you have any
# questions, comments or suggestions!  If you email me and let me know
# you're using the script, I'll email you when I release a new
# version.

######################################################################
#
# Copyright (C) 2008 Jonathan Kamens
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# General Public License for more details.
#
# The GNU General Public License may be viewed at
# http://www.gnu.org/licenses/.
#
######################################################################

use strict;
use warnings;

use DBI;
use Data::Dumper;
use Date::Parse;
use English;
use File::Basename;
use File::Slurp;
use GD::Graph::bars;
use Getopt::Long;
use HTML::FormatText;
use HTML::TreeBuilder;
use MIME::Entity;
use POSIX qw(strftime);
use Statistics::Descriptive;
use Time::Local;
use WWW::Mechanize;

# For sendmail
$ENV{PATH} = "/usr/sbin:$ENV{PATH}";
my $HOME = $ENV{"HOME"};

my $config_file = "$HOME/.oh-web-auth.pl";
my $url = "https://www.openhosting.com/cp/vps_usage?vps=%s";
my $start_recommending = 60 * 60 * 24 * 7; # a week before the end of the month

my($verbose, $help, $fetch, $report, $report_if_new, $recommend,
   $as_of, $mail_to, $check, $config_template, $db_template, $randomize);
my($history) = 60;
my($threshold) = 2; # standard deviations
my($average) = 1;
my($average_threshold) = 60 * 90; # seconds = 90 minutes
my $whoami = basename $0;
my $usage = "Usage: $whoami [--help] [--verbose]
	[--config-template] [--config-file=filename]
	[--mail-to=address] [--history=number] [--report]
	[--report-if-new] [--check] [--threshold=std-devs] [--noaverage]
	[--[no]recommend [--as-of=date | --nofetch] [--[no]randomize]\n";

die $usage if (! GetOptions("help" => \$help,
			    "verbose" => \$verbose,
			    "config-template" => \$config_template,
			    "db-template" => \$db_template,
			    "config-file=s" => \$config_file,
			    "mail-to" => \$mail_to,
			    "fetch!" => \$fetch,
			    "history=i" => \$history,
			    "report!" => \$report,
			    "report-if-new!" => \$report_if_new,
			    "check!" => \$check,
			    "threshold=i" => \$threshold,
			    "average!" => \$average,
			    "recommend!" => \$recommend,
			    "as-of=s" => \$as_of,
			    "randomize!" => \$randomize,
		     ));

if ($help) {
    print $usage;
    exit;
}

if ($config_template) {
    if (-f $config_file) {
	die("$whoami: $config_file already exists.\n",
	    "\tPlease delete it before creating a new template,\n",
	    "\tconfiguration, or specify --config-file to save the template\n",
	    "\tin a different location.\n");
    }
    open(CONFIG, ">", $config_file) or
	die "$whoami: open(>$config_file): $!\n";
    print(CONFIG "
\$username = '<OpenHosting control panel username>'; # Required
\$password = '<OpenHosting control panel password>'; # Required
# \$vps = '<OpenHosting VPS name to monitor>'; # Optional, defaults to username
# \$db_host = '<MySQL database host name>';  # Optional, defaults to localhost
# \$db_name = '<MySQL database name>'; # Optional, defaults to 'oh_usage'
# \$db_user = '<MySQL user name>'; # Optional, defaults to 'oh_usage'
# \$db_password = '<MySQL password>'; # Optional defaults to same as \$db_user

1;
") or die "$whoami: write(>$config_file): $!\n";
    close(CONFIG) or die "$whoami: close($config_file): $!\n";
    chmod((stat $config_file)[2] & ~077, $config_file) or
	die "$whoami: chmod($config_file): $!\n";
    exit;
}

my($username, $password, $vps, $db_host, $db_name, $db_user, $db_password);

{
    die "$whoami: Can't read configuration file $config_file\n"
	if (! -f $config_file);
    my $mode = (stat(_))[2];
    die "$whoami: $config_file is not protected\n" if ($mode & 044);
}

eval read_file $config_file;
die $@ if $@;

die "$whoami: No \$username in $config_file\n" if (! $username);
die "$whoami: No \$password in $config_file\n" if (! $password);
$vps = $username if (! $vps);
$db_host = "localhost" if (! $db_host);
$db_name = "oh_usage" if (! $db_name);
$db_user = "oh_usage" if (! $db_user);
$db_password = $db_user if (! $db_password);

if ($db_template) {
    print "
CREATE DATABASE IF NOT EXISTS $db_name;

GRANT ALL PRIVILEGES on $db_name.* TO '$db_user'\@'$db_host'
IDENTIFIED BY '$db_password';

USE $db_name;

CREATE TABLE oh_resources (
	resource_id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
	resource_name VARCHAR(20) NOT NULL,
	UNIQUE INDEX (resource_name),
	resource_rate INT NOT NULL
);

INSERT INTO oh_resources (resource_name, resource_rate) VALUES
	('CPU', 44444),
	('VM', 222222222),
	('RSS', 44444444),
	('Bandwidth', 16666667),
	('Disk', 83333333);

CREATE TABLE oh_usage (
	usage_id INT NOT NULL AUTO_INCREMENT PRIMARY KEY,
	resource_id INT NOT NULL,
	FOREIGN KEY (resource_id) REFERENCES oh_resources (resource_id),
	INDEX (resource_id),
	usage_time DATETIME NOT NULL,
	INDEX (usage_time),
	usage_amount BIGINT NOT NULL
);

CREATE TABLE oh_latest (
	resource_id INT NOT NULL,
	INDEX (resource_id),
	FOREIGN KEY (resource_id) REFERENCES oh_resources (resource_id),
	usage_amount BIGINT NOT NULL
);

CREATE TABLE oh_commit_levels (
       commit_level FLOAT(5,2) NOT NULL,
       discount FLOAT(3,2) NOT NULL
);

INSERT INTO oh_commit_levels (commit_level, discount) VALUES
       (19.95, 0.00),
       (29.95, 0.05),
       (49.95, 0.10),
       (79.95, 0.15),
       (119.95, 0.20);

CREATE VIEW oh_commit_thresholds (commit_level, discount, threshold) AS
SELECT commit_level, discount, (1 - discount) * commit_level
FROM oh_commit_levels;

CREATE TABLE oh_status (
       commit_level FLOAT(5,2) NOT NULL
);

INSERT INTO oh_status (commit_level) VALUES (19.95);
";

    exit;
}

if (! defined $fetch) {
    $fetch = ! $as_of;
}
elsif ($fetch && $as_of) {
    die "$whoami: Can't specify both --fetch and --as-of\n";
}

if ($as_of) {
    my($time) = str2time($as_of);
    if (! $time) {
	die "$whoami: Can't parse --as-of time \"$as_of\"\n";
    }
    $as_of = $time;
}

if (! defined $randomize) {
    $randomize = ! -t STDERR;
}

$url = sprintf($url, $vps);

my $dsn = "DBI:mysql:host=$db_host;database=$db_name";
my $dbh = DBI->connect($dsn, $db_user, $db_password) or
    die "$whoami: ", DBI::errstr;

my $sth = $dbh->prepare("SELECT * FROM oh_resources") or
    die "$whoami: ", $dbh->errstr;
$sth->execute or die "$whoami: ", $sth->errstr;
my %resource_id_to_name;
while (my $ref = $sth->fetchrow_hashref()) {
    $resource_id_to_name{$ref->{resource_id}} = $ref->{resource_name};
}
my(%name_to_resource_id) = reverse %resource_id_to_name;

$sth = $dbh->prepare("SELECT * FROM oh_commit_thresholds ORDER BY threshold")
    or die "$whoami: ", $dbh->errstr;
$sth->execute or die "$whoami: ", $sth->errstr;
my(@thresholds);
while (my $ref = $sth->fetchrow_hashref()) {
    push(@thresholds, $ref);
}

$sth = $dbh->prepare("SELECT * FROM oh_latest") or 
    die "$whoami: ", $dbh->errstr;
$sth->execute or die "$whoami: ", $sth->errstr;
my %old_latest;
while (my $ref = $sth->fetchrow_hashref()) {
    $old_latest{$ref->{resource_id}} = $ref->{usage_amount};
}

$sth = $dbh->prepare("SELECT UNIX_TIMESTAMP(MIN(usage_time)), UNIX_TIMESTAMP(MAX(usage_time)) FROM oh_usage")
    or die "$whoami: ", $dbh->errstr;
$sth->execute or die "$whoami: ", $sth->errstr;
my($first_usage_time, $last_usage_time) = $sth->fetchrow_array;

my($start_of_month, $end_of_month, $seconds_in_month);
my($end_of_next_month, $seconds_in_next_month);
my($current_commit_value, $current_commit_level);

sub start_end_of_month {
    my($timestamp) = @_;
    my(@localtime) = localtime($timestamp);
    my($start_of_month, $end_of_month, $seconds_in_month);

    $start_of_month = timelocal(0, 0, 0, 1, $localtime[4], $localtime[5]);
    $end_of_month = timelocal(0, 0, 0, 1, ($localtime[4] == 11) ? (0, $localtime[5]+1) :  ($localtime[4]+1, $localtime[5])) - 1;
    $seconds_in_month = $end_of_month - $start_of_month + 1;
    ($start_of_month, $end_of_month, $seconds_in_month);
}
    
my($timestamp);

if ($fetch) {
    if ($randomize) {
	my $secs = int(rand(301));
	print "Sleeping for $secs seconds\n" if ($verbose);
	sleep($secs);
    }

    my $mech = WWW::Mechanize->new();
    my($tmp, @resource_names, @resource_values,
       %resource_id_to_column, %column_to_resource_id, %latest, %last_month);

    $mech->get($url);
    $mech->set_fields("userid" => $username,
		      "passwd" => $password);
    $mech->click();

    $_ = $mech->content();

    # Figure out when last updated

    if (m,<b>Last updated: (.*)</b>,) {
	my $last_updated = $1;
	$timestamp = str2time($last_updated);
	if (! $timestamp) {
	    warn "Could not parse last_updated time \"$last_updated\", using current time\n";
	}
	else {
	    print "Last updated: $last_updated\n" if ($verbose);
	}
    }
    else {
	warn "Could not find last updated time; aborting fetch\n";
	$fetch = 0;
	goto no_fetch;
    }
    $timestamp = time if (! $timestamp);
    ($start_of_month, $end_of_month, $seconds_in_month) =
	&start_end_of_month($timestamp);

    # find the commit level
    (m,<tr>\s*<th[^<]+Usage commitment</th>\s*<th[^>]*>\$(\d+\.\d+)<,s)
	or die "$whoami: Couldn't find current commit level on Web page\n";
    $current_commit_value = $1;
    $dbh->do("UPDATE oh_status SET commit_level=? WHERE commit_level <> ?",
	     {}, $current_commit_value, $current_commit_value)
	or die("$whoami: Error updating commit level in database: ",
	       $dbh->errstr);

    # Find the column headers

    m/<tr>\s*<th.*?<\/th>\s*((?:<th>.*?<\/th>\s*)+)<th>Total<\/th>\s*<\/tr>/s or die;
    $_ = $POSTMATCH;
    $tmp = $1;
    $tmp =~ s/^\s*<th>\s*//s;
    $tmp =~ s/\s*<\/th>\s*$//s;
    @resource_names = split(/\s*<\/th>\s*<th>\s*/s, $tmp);
    foreach my $i (0..@resource_names-1) {
	my $name = $resource_names[$i];
	my $id = $name_to_resource_id{$name};
	die "Unrecognized resource: $name\n" if (! $id);
	$resource_id_to_column{$id} = $i;
    }
    %column_to_resource_id = reverse %resource_id_to_column;

    # Find the latest values

    m/<tr>\s*<th.*?<\/th>\s*((?:<td>\s*\d+\s*<\/td>\s*)+)\s*<\/tr>/s or die;
    $_ = $POSTMATCH;
    $tmp = $1;
    $tmp =~ s/^\s*<td>\s*//s;
    $tmp =~ s/\s*<\/td>\s*$//s;
    @resource_values = split(/\s*<\/td>\s*<td>\s*/s, $tmp);
    foreach my $i (0..@resource_values-1) {
	my $id = $column_to_resource_id{$i};
	if (! $id) {
	    die "Too many resource values!\n";
	}
	my $value = $resource_values[$i];
	$latest{$id} = $value;
    }

    # Find last month's values, in case there's a rollover
    # (same code as above, different variable)
    m/<tr>\s*<th.*?<\/th>\s*((?:<td>\s*\d+\s*<\/td>\s*)+)\s*<\/tr>/s or die;
    $tmp = $1;
    $tmp =~ s/^\s*<td>\s*//s;
    $tmp =~ s/\s*<\/td>\s*$//s;
    @resource_values = split(/\s*<\/td>\s*<td>\s*/s, $tmp);
    foreach my $i (0..@resource_values-1) {
	my $id = $column_to_resource_id{$i};
	if (! $id) {
	    die "Too many resource values!\n";
	}
	my $value = $resource_values[$i];
	$last_month{$id} = $value;
    }

    # Insert new values into the database

    if (! %old_latest) {
	# There's no prior data in the database, so spread out all of
	# the current usage over the entire month so that the
	# estimates will be more accurate than if we inserted them all
	# at one point in time.
	warn "No prior data!  Averaging back to start of month.\n";
	&average($start_of_month-1, $timestamp, %latest);
    }	    
    else {
	my($key) = keys %latest;
	if ($latest{$key} < $old_latest{$key}) {
	    warn "Rollover!\n";
	    if (%last_month) {
		&insert_new_usage($start_of_month - 1, \%last_month);
	    }
	    else {
		warn "No data from last month to back-fill rollover!\n";
	    }
	    map { $old_latest{$_} = 0; } keys %old_latest;
	}
	if ($average &&
	    ($timestamp - $last_usage_time > $average_threshold)) {
	    warn "Data gap!  Averaging between " .
		localtime($last_usage_time)
		. " and " . localtime($timestamp) . ".\n";
	    &average($last_usage_time, $timestamp, %latest);
	}
	else {
	    &insert_new_usage($timestamp, \%latest);
	}
    }
}
else {
  no_fetch:
    if ($as_of) {
	$last_usage_time = $as_of;
    }
    $timestamp = $last_usage_time || time;
    ($start_of_month, $end_of_month, $seconds_in_month) =
	&start_end_of_month($timestamp);

    $sth = $dbh->prepare("SELECT commit_level FROM oh_status")
	or die $dbh->errstr;
    $sth->execute or die $sth->errstr;
    ($current_commit_value) = $sth->fetchrow_array;
    die "Could not fetch current commit level from database\n"
	if (! $current_commit_value);
}

($current_commit_level) = grep($_->{commit_level} == $current_commit_value,
			       @thresholds);
die "Could not find commit level $current_commit_value in database\n"
    if (! $current_commit_level);

(undef, $end_of_next_month, $seconds_in_next_month) =
    &start_end_of_month($end_of_month + 1);

if (! defined($recommend)) {
    if ($end_of_month - $timestamp <= $start_recommending) {
	$recommend = 1;
    }
}

sub average {
    my($from, $to, %latest) = @_;
    my($old_timestamp) = $to;
    my($intervals) = int(($to - $from) / (60 * 60));
    $old_timestamp -= ($intervals - 1) * 60 * 60;
    my(%difference);
    map { $difference{$_} = $latest{$_} - ($old_latest{$_} || 0) } keys %latest;
    my(%oldest_latest) = %old_latest;
    foreach my $i (1..$intervals) {
	my(%this_latest);
	map {
	    $this_latest{$_} = ($oldest_latest{$_} || 0) +
		int($i / $intervals * $difference{$_});
	} keys %difference;
	&insert_new_usage($old_timestamp, \%this_latest);
	%old_latest = %this_latest;
	$old_timestamp += 60 * 60;
    }
}

sub insert_new_usage {
    my($timestamp, $latest_ref) = @_;
    my(%latest) = %{$latest_ref};

    foreach my $id (keys %latest) {
	my $value = $latest{$id};
	my $old_value = $old_latest{$id} || 0;
	my $new_usage = 0;
	my $new_latest = 0;
	if ($value > $old_value) {
	    print "Increase (id $id)!\n" if ($verbose);
	    $new_usage = $value - $old_value;
	    $new_latest = $value;
	}
	if ($new_usage) {
	    if ($report_if_new) {
		$report = 1;
		$report_if_new = 0;
	    }
	    $dbh->do("INSERT INTO oh_usage (resource_id, usage_time, usage_amount)
VALUES (?, FROM_UNIXTIME(?), ?)", {}, $id, $timestamp, $new_usage)
		or die $dbh->errstr;
	    $first_usage_time = $timestamp if (! $first_usage_time);
	    # The "if" below should always be true, but it doesn't hurt to
	    # be cautious.
	    $last_usage_time = $timestamp if (! $last_usage_time ||
					      $last_usage_time < $timestamp);
	}
	if ($new_latest) {
	    ($dbh->do("UPDATE oh_latest SET usage_amount=? where resource_id=?",
		      {}, $new_latest, $id) > 0 or
	     $dbh->do("INSERT INTO oh_latest (resource_id, usage_amount)
VALUES (?, ?)", {}, $id, $new_latest))
		or die $sth->errstr;
	}
    }
}

# Calculate predicted usages for the month based on hourly, daily and weekly
# usage.

# First need to figure out the cost so far this month.
if ($as_of) {
    my($sql) = "SELECT u.resource_id, SUM(u.usage_amount/r.resource_rate)/100
FROM oh_usage u
INNER JOIN oh_resources r ON u.resource_id = r.resource_id
WHERE u.usage_time >= FROM_UNIXTIME($start_of_month) AND
      u.usage_time <= FROM_UNIXTIME($as_of)
GROUP BY u.resource_id";
    $sth = $dbh->prepare($sql) or die $dbh->errstr;
}
else {
    $sth = $dbh->prepare("
SELECT l.resource_id, SUM(l.usage_amount/r.resource_rate)/100
FROM oh_latest l
INNER JOIN oh_resources r ON l.resource_id = r.resource_id
GROUP BY l.resource_id")
	or die $dbh->errstr;
}

$sth->execute or die $sth->errstr;
my($so_far_usage, %so_far_usage);
while (my($resource_id, $usage) = $sth->fetchrow_array) {
    $so_far_usage += $usage;
    $so_far_usage{$resource_id} = $usage;
}

my(@estimate_intervals) = ("Monthly", "Two Week", "Weekly", "Daily", "Hourly");
my(%estimate_intervals) =
    (
     "Hourly" => 60 * 60,
     "Daily" => 60 * 60 * 24,
     "Weekly" => 60 * 60 * 24 * 7,
     "Two Week" => 60 * 60 * 24 * 14,
     "Monthly" => $seconds_in_month,
     );

if ($last_usage_time - $first_usage_time < $seconds_in_month) {
    unshift(@estimate_intervals, "This Month's Data");
    $estimate_intervals{"This Month's Data"} =
	[ $last_usage_time - $first_usage_time - 1,
	  60 * 60 ];
}
my(%estimates, %high_estimates, %detail_estimates, %next_month_estimates);

foreach my $interval (@estimate_intervals) {
    ($estimates{$interval},
     $detail_estimates{$interval},
     $next_month_estimates{$interval})
	= &make_estimates($first_usage_time,
			  $last_usage_time,
			  $estimate_intervals{$interval});
}

my($expanding_estimates, $expanding_details) =
    &make_expanding_estimates($first_usage_time, $last_usage_time,
			      60 * 60 * 24);

sub make_expanding_estimates {
    my($first_time, $last_time, $seconds) = @_;
    my(%estimates, %details);
    my($count) = 0;

    while ($count++ < $history &&
	   $last_time - $seconds * $count > $first_time) {
	($estimates{$count}, $details{$count}, undef) =
	    &make_estimate($last_time, $seconds * $count, 1);
    }

    return(\%estimates, \%details);
}
    

sub make_estimates {
    my($first_time, $last_time, $seconds) = @_;
    my($min_seconds, %estimates, %details);
    my($count) = 0;
    my($next_month);

    if (ref $seconds eq 'ARRAY') {
	$min_seconds = $seconds->[1];
	$seconds = $seconds->[0];
    }
    else {
	$min_seconds = $seconds;
    }

    my $gap = $seconds;
    if ($last_time - $history * $gap < $first_time) {
	$gap = int(($last_time - $first_time - $seconds) / $history);
	if ($gap < 60 * 60) {
	    $gap = 60 * 60;
	}
    }
    while ($last_time - $min_seconds > $first_time && $count++ < $history) {
	my($tmp);
	($estimates{$last_time}, $details{$last_time}, $tmp) =
	    &make_estimate($last_time, 
			   ($last_time - $first_time < $seconds) ?
			   ($last_time - $first_time) : $seconds);
	$next_month = $tmp if (! defined $next_month);
	$last_time -= $gap;
    }

    return(\%estimates, \%details, $next_month);
}
    

sub make_estimate {
    my($last, $seconds, $all_next_month) = @_;
    my(%detail);
    my($next_month);

    $sth = $dbh->prepare("
SELECT u.resource_id, SUM(u.usage_amount/r.resource_rate)/100
FROM oh_usage u
INNER JOIN oh_resources r ON u.resource_id = r.resource_id
WHERE u.usage_time <= FROM_UNIXTIME($last) AND
      u.usage_time > DATE_SUB(FROM_UNIXTIME($last), INTERVAL $seconds SECOND)
GROUP BY u.resource_id")
	or die $dbh->errstr;
    $sth->execute or die $sth->errstr;
    my $total_usage = 0;
    while (my($resource_id, $usage) = $sth->fetchrow_array) {
	$detail{$resource_id} = $usage;
	$total_usage += $usage;
    }
    # Note: the $next_month value is NOT scaled by the discount,
    # because we use it for estimating what our commit level should be
    # next month, and the thresholds we've calculated for doing that
    # are based on a 0% discount.
    $next_month = $seconds_in_next_month / $seconds * $total_usage;
    my($factor);
    if ($all_next_month) {
	$factor = $seconds_in_next_month / $seconds;
    }
    else {
	$factor = ($end_of_month - $last + 1) / $seconds;
    }
    $total_usage = ($all_next_month ? 0 : $so_far_usage) +
	$factor * $total_usage;
    $total_usage *= (1 - $current_commit_level->{discount});
    map {
	$detail{$_} =($all_next_month ? 0 : $so_far_usage{$_}) +
	    $factor * $detail{$_};
	$detail{$_} *= (1 - $current_commit_level->{discount});
    } keys %detail;
    map {
	$_ = sprintf("%.2f", $_);
    } ($total_usage, values %detail);
    return($total_usage, \%detail, sprintf("%.2f", $next_month));
}

foreach my $interval (@estimate_intervals) {
    &check($interval);
}

sub check {
    my($interval) = @_;
    my $estimates = $estimates{$interval};
    my(@times) = sort { $b <=> $a } keys %{$estimates};
    return if (@times < 2);
    my($t1) = $times[0];
    my $stat = Statistics::Descriptive::Sparse->new();
    $stat->add_data(values %{$estimates});
    my $mean = $stat->mean;
    my $dev = $stat->standard_deviation;
    if ($estimates->{$t1} - $mean >= $threshold * $dev) {
	$report = 1 if ($check);
	$high_estimates{$interval} = 1;
    }
}

if ($report) {
    my $html = '';
    my $next_month_graph = "Next month expanding time scale";

    $html .= "<html>
<head>
<title>VPS $vps OpenHosting Usage Report</title>
</head>
<body>
<h1>VPS $vps OpenHosting Usage Report</h1>
";

    my(@texts);
    foreach my $set (@estimate_intervals) {
	if (my $t = &last_estimate_text($set)) {
	    push(@texts, $t);
	}
    }
    $html .= "<p>" . join("<br/>\n", @texts) . "</p>\n";

    if ($recommend) {
	$html .= "<h2>Commit level recommendations for next month</h2>\n";
	$html .= "<p>Current commit level: $current_commit_value</p>\n";
	@texts = ();
	foreach my $set (@estimate_intervals) {
	    next if (! $next_month_estimates{$set});
	    my(@this) = @thresholds;
	    my $usage = $next_month_estimates{$set};
	    my $r = shift @this;
	    my $discounted;
	    while (@this && $this[0]->{threshold} < $usage) {
		$r = shift @this;
		$discounted = 1;
	    }
	    my $t = "$set: $r->{commit_level}";
	    if ($discounted) {
		$t .= " (undiscounted $usage >= threshold $r->{threshold})";
	    }
	    else {
		$t .= " (undiscounted $usage < threshold $this[0]->{threshold})";
	    }
	    if ($r->{commit_level} != $current_commit_value) {
		$t = "<b>$t</b>";
	    }
	    push(@texts, $t);
	}
	$html .= "<p>" . join("<br/>\n", @texts) . "</p>\n";

	$html .= "<p><a href=\"#" . &tag($next_month_graph) . "\">Next month estimates by expanding time scale</a></p>\n";
    }
	
    $html .= "<br/>\n";

    $html .= "<p>Current as of: <b>" . localtime($timestamp) . "</b><br/>\n";
    $html .= "<p>Current usage statistics available at <a href=\"$url\">$url</a>.</p>\n";

    $html .= "<hr/>
<center>
";

    my(@image_ids, @image_data, $tag, $set);
    $html .= "<p>";
    foreach $set (@estimate_intervals) {
	$tag = &tag($set);
	if (my $data = &graph("History of $set Estimates",
			      $estimates{$set},
			      $detail_estimates{$set},
			      1)) {
	    $html .= "<a name=\"$tag\"><img src=\"cid:$tag\" alt=\"$set graph\"/></a>\n";
	    push(@image_ids, $set);
	    push(@image_data, $data);
	}
    }
    $html .= "</p>\n";

    if ($recommend) {
	$html .= "<p>";
	$set = $next_month_graph;
	$tag = &tag($set);
	if (my $data = &graph("Next month estimates by expanding time scale",
			      $expanding_estimates,
			      $expanding_details,
			      0, "Days")) {
	    $html .= "<a name=\"$tag\"><img src=\"cid:$tag\" alt=\"$set graph\"/></a>\n";
	    push(@image_ids, $set);
	    push(@image_data, $data);
	}
	$html .= "</p>\n";
    }

    $html .= "</body>
</html>
";

    my $top = MIME::Entity->build(Type    => "multipart/related",
				  To      => $mail_to || $ENV{"MAILTO"} || $ENV{"USER"} || $ENV{"LOGNAME"} || "root",
				  Subject => "OpenHosting usage report for VPS $vps") or die;

    my $alt = MIME::Entity->build(Type => "multipart/alternative") or die;

    my $tree = HTML::TreeBuilder->new_from_content($html);
    my $text = HTML::FormatText->new()->format($tree);

    $alt->attach(Data     => $text,
		 Type     => "text/plain",
		 Encoding => "7bit");
    $alt->attach(Data     => $html,
		 Type     => "text/html",
		 Disposition => undef,
		 Encoding => "7bit") or die;
    
    $top->add_part($alt);

    foreach my $i (0..@image_ids-1) {
	my $set = $image_ids[$i];
	my $data = $image_data[$i];
	my $tag = &tag($set);
	$top->attach(Data        => $data,
		     Type        => "image/png",
		     Encoding    => "base64",
		     Id          => $tag,
		     Filename    => "$set.png") or die;
    }

    # XXX Should really use Mail::Mailer or something
    open(MAIL, "|-", "sendmail -t -oi -oem") or die;
#    open(MAIL, "|-", "cat") or die;
    $top->print(\*MAIL) or die;
    close(MAIL) or die;
}

sub last_estimate_text {
    my($which) = @_;
    my $eref = $estimates{$which};
    return '' if (! ($eref && %{$eref}));
    my(%e) = %{$eref};
    my(%d) = %{$detail_estimates{$which}};
    my(@times) = sort { $b <=> $a } keys %e;
    my $t1 = $times[0];
    my $total = $e{$t1};
    my %details = %{$d{$t1}};
    my($t2, $old_total, %old_details);
    my $extra = '';
    if (@times > 1) {
	($t2) = (grep($_ <= $t1 - $estimate_intervals{$which},
		     @times), $times[1]);
	$old_total = $e{$t2};
	%old_details = %{$d{$t2}};
	my $date1 = strftime("%m/%d", localtime($t1));
	my $date2 = strftime("%m/%d", localtime($t2));
	if ($date2 ne $date1) {
	    $date2 = "$date2 ";
	}
	else {
	    $date2 = "";
	}
	$extra .= " (" . &pct_change($total, $old_total) . " since " .
	    strftime("$date2%H:%M", localtime($t2)) . ")";
    }
    # Commented out since the graphs are now stacked and do a better job
    # of representing this data.
#     my(@detail_strings);
#     foreach my $resource_id (sort { $a <=> $b } keys %details) {
# 	my($dstr) = $resource_id_to_name{$resource_id} . " " .
# 	    $details{$resource_id};
# 	if ($t2) {
# 	    $dstr .= " (" . &pct_change($details{$resource_id},
# 					$old_details{$resource_id}) . ")";
# 	}
# 	push(@detail_strings, $dstr);
#     }
#     $extra .= " (" . join(", ", @detail_strings) . ")";
    my $t = "$which estimate: <a href=\"#" . &tag($which) . "\"><b>$total</b></a><font size=\"-2\">$extra</font>";
    if ($high_estimates{$which}) {
	$t = "<b><font color=\"red\">$t</font></b>";
    }
    return $t;
}

sub pct_change {
    my($new, $old) = @_;
    my $pct = int(($new / $old - 1) * 100);
    if (! $pct) {
	$pct = "unchanged";
    }
    elsif ($pct > 0) {
	$pct = "+$pct%";
    }
    else {
	$pct = "$pct%";
    }
    $pct;
}

sub graph {
    my($title, $estimates_ref, $details_ref, $format_dates,
	$x_label) = @_;
    my(%estimates) = %{$estimates_ref};
    my(%detail_estimates) = %{$details_ref};

    return undef if (! %estimates);

    my(@timestamps) = sort { $a <=> $b } keys %estimates;
    my(@timestrings);
    if ($format_dates) {
	@timestrings = map(strftime("%m/%d %H:%M", localtime($_)),
			   @timestamps);
    }
    else {
	@timestrings = @timestamps;
    }

    my(@values, @legend, $key_stamp);

    for ($key_stamp = 0;
	 $key_stamp < @timestamps && ! %{$detail_estimates{$timestamps[$key_stamp]}};
	 $key_stamp++) {}
	 
    foreach my $resource_id (sort { $a <=> $b } 
			     keys %{$detail_estimates{$timestamps[$key_stamp]}}) {
	push(@values,
	     [map($detail_estimates{$_}->{$resource_id}, @timestamps)]);
	push(@legend, $resource_id_to_name{$resource_id});
    }

    my $skip;
    if (($skip = @timestamps / 10) > 1) {
	$skip = int($skip + 1);
    }
    else {
	$skip = 1;
    }
    my $graph = GD::Graph::bars->new(400, 300) or die;
    $graph->set(y_label           => 'Cost',
		x_label		  => $x_label,
		title             => $title,
		x_labels_vertical => 1,
		x_label_skip      => $skip,
		cumulate          => 1,
		) or die $graph->error;
    $graph->set_legend(@legend);
    my $gd = $graph->plot([\@timestrings, @values]) or die $graph->error;
    return $gd->png;
}

sub tag {
    local($_) = @_;
    s/(\W)/_/g;
    $_;
}