#!/usr/bin/ruby

# Copyright (c) 2008, 2009 Peter Palfrader
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

require "yaml"

require 'db'
require 'db-config'
require 'optparse'
require 'find'
require 'digest/sha1'
require "ftools"

def show_help(parser, code=0, io=STDOUT)
  program_name = File.basename($0, '.*')
  io.puts "Usage: #{program_name} --archive=<archivename> --path=<path> --farm=<dir> [--date=<date>] [--ignore=<path> [--ignore=<path ...]] [--verbose=(1|2)] [--quick]"
  io.puts parser.summarize
  exit(code)
end
$verbose = 0;
$ignore=[]
ARGV.options do |opts|
  opts.on("-a", "--archive=<archivename>"  , String, "Name of the archive")      { |@archive| }
  opts.on("-f", "--farm=<dir>"             , String, "Location of the hash farm"){ |$farm| }
  opts.on("-p", "--path=<path>"            , String, "Path to the ftp root")     { |@path| }
  opts.on("-d", "--date=<date>"            , String, "Date of this archive run") { |@date| }
  opts.on("-i", "--ignore=<path>"          , String, "Ignore this file")         { |val| $ignore << val }
  opts.on("-v", "--verbose=<n>"            , Integer, "Be verbose")              { |$verbose| }
  opts.on("-q", "--quick"                  , nil,    "quick (Ignore files older than last run)") { |$quick| }

  opts.on_tail("-h", "--help", "Display this help screen")        { show_help(opts) }
  opts.parse!
end

show_help(ARGV.options, 1, STDERR) if ARGV.length > 0
show_help(ARGV.options, 1, STDERR) unless @archive
show_help(ARGV.options, 1, STDERR) unless $farm
show_help(ARGV.options, 1, STDERR) unless @path
@date = Time.now unless @date
#$verbose = 1 if $verbose and not $verbose.kind_of?(Integer)

unless File.directory?($farm)
  Stderr.puts "#{$farm} is not a directory"
  exit(1)
end


def get_pk(db, table, field, value)
  pk = table+'_id'
  row = db.query_row("SELECT #{pk} FROM #{table} WHERE #{field}=?", value)
  if row
    return row[pk]
  else
    r = { field => value }
    db.insert_row( table, r )
    return r[pk]
  end
end



class NodeManager
  def initialize(db, archive, date, path)
    @db = db;
    @dir_mapping = {}

    new_mirror_run(archive, date)
    insert_tree(path)
  end

  # Add a new mirror run entry into the mirrorun table,
  # verifying that this run is strictly later than any previously imported runs.
  # sets @previous_run and @mirror_run to the appropriate values.
  def new_mirror_run(archive, date)
    archive_id = get_pk(@db, 'archive', 'name', archive)

    row = @db.query_row("SELECT run, mirrorrun_id FROM mirrorrun WHERE archive_id = ? ORDER BY run DESC LIMIT 1", archive_id)
    if row
      prev = Time.parse(row['run'].to_s)
      if prev >= date
        STDERR.puts "Error:  Mirrorruns need to be imported in proper order.  "+
                    "The latest import is from #{row['run']} whereas this one is from #{date}."
        exit(1)
      end
      @previous_run = row['mirrorrun_id']
      @previous_run_time = prev
    else
      @previous_run = -1
      @previous_run_time = nil
    end

    r = { 'archive_id' => archive_id, 'run' => date }
    @db.insert_row( 'mirrorrun', r )

    @mirror_run = r['mirrorrun_id']
    @dir_mapping = {}
  end

  # find gives us paths starting with ./ (since we run it for .)
  # remove the leading dot, but turn just the '.' into a '/'.
  def fixupPath(path)
    p = path.clone
    p[0 .. 0]='' if p[0 .. 0] == '.'
    p='/' if p == ''
    p
  end

  def insert_tree(path)
    Dir.chdir(path) do
      Find.find('.') do |realpath|
        s = File.lstat(realpath)
        nicepath = fixupPath realpath
        next if $ignore.include?(nicepath)

        puts "Doing #{nicepath}" if $verbose >= 2
        if s.symlink?
          insert_symlink(nicepath, File.readlink(realpath))
        elsif s.directory?
          puts "Doing #{nicepath}" if $verbose == 1
          insert_directory(nicepath)
        elsif s.file?
          insert_file(nicepath, s, realpath)
        else
          STDERR.puts "Ignoring non-regular file #{nicepath}"
        end

      end
    end
  end

  def insert_node(parent)
    node = { 'first' => @mirror_run,
             'last' => @mirror_run,
             'parent' => parent};
    @db.insert_row('node', node);
    node
  end

  def store_file(source, hash)
    target = $farm

    h = hash
    2.times do
      target = target + '/' + h[0..1]
      h = h[2..-1]
      Dir.mkdir(target) unless File.directory?(target)
    end
    target = target + '/' + hash
    File.copy(source, target) unless File.exists?(target)
  end

  def insert_file(fullpath, stat, realpath)
    parent = insert_directory(File.dirname(fullpath))
    basename = File.basename(fullpath)
    size = stat.size

    if ($quick and
        @previous_run_time and
        @previous_run_time > stat.ctime and
        @previous_run_time > stat.mtime)
      extra_where = ""
      extra_arg = []
    else
      hash = Digest::SHA1.hexdigest( File.open(realpath).read )
      extra_where = " AND hash=?"
      extra_arg = [ hash ]
    end

    r = @db.do("UPDATE node SET last=?
                FROM file
                WHERE node.node_id = file.node_id AND
                parent=? AND (last=? OR last=?) AND
                name=? AND size=? #{extra_where}",
                @mirror_run,
                parent, @previous_run, @mirror_run,
                basename, size, *extra_arg)
    case r
      when 0:
        hash = Digest::SHA1.hexdigest( File.open(realpath).read ) unless hash
        node = insert_node(parent)
        new = { 'name' => basename,
                'hash' => hash,
                'size' => size,
                'node_id' => node['node_id'] };
        @db.insert_row('file', new)
        store_file(realpath, hash)
      when 1: # nothing
      else
        throw "Did not update exactly zero or one element."
    end
  end

  def insert_symlink(fullpath, target)
    parent = insert_directory(File.dirname(fullpath))
    basename = File.basename(fullpath)
    r = @db.do("UPDATE node SET last=?
                FROM symlink
                WHERE node.node_id = symlink.node_id AND
                parent=? AND (last=? OR last=?) AND
                name=? AND target=?",
                @mirror_run,
                parent, @previous_run, @mirror_run,
                basename, target)
    case r
      when 0:
        node = insert_node(parent)
        new = { 'name' => basename,
                'target' => target,
                'node_id' => node['node_id'] };
        @db.insert_row('symlink', new)
      when 1: # nothing
      else
        throw "Did not update exactly zero or one element."
    end
  end

  def insert_directory(fullpath)
    return @dir_mapping[fullpath] if @dir_mapping.has_key?(fullpath)

    r = @db.query_row("UPDATE node SET last=?
                       FROM directory
                       WHERE node.node_id = directory.node_id AND
                         path=? AND (last=? OR last=?)
                       RETURNING directory_id",
                       @mirror_run,
                       fullpath, @previous_run, @mirror_run)
    if r
      @dir_mapping[fullpath] = r['directory_id']
      return @dir_mapping[fullpath]
    else
      node = insert_node( (fullpath == '/') ? -1 : insert_directory(File.dirname(fullpath)))

      directory = { 'path' => fullpath,
                    'node_id' => node['node_id'] };
      @db.insert_row('directory', directory);

      if (fullpath == '/')
        r = @db.update_one('node',
                       { 'parent' => directory['directory_id'] },
                       { 'node_id' => node['node_id'] });
      end
      @dir_mapping[fullpath] = directory['directory_id']
      return @dir_mapping[fullpath]
    end
  end
end

$db = Db.new($CONFIG['database']['dbhost'], $CONFIG['database']['dbname'], $CONFIG['database']['user'], $CONFIG['database']['password'])


$db.transaction_begin
NodeManager.new($db, @archive, @date, @path)
$db.transaction_commit



# vim:set et:
# vim:set shiftwidth=2:
# vim:set ts=2:
