#!/bin/bash

# consider removing config-file flag since home is specified.
# compiles and installs ghc, and all the packages in stackage LTS

set -x
set -e
set -o pipefail
set -u
#needed for ${file%-+([0-9.])}
shopt -s extglob

#approximate usage:
# create a bunch of screen windows first, because tokens get destroyed
# copy to /tmp && usr/bin/time bash /tmp... local|afs /mit/ghc/bin|newer > /scratch/id00.log 2>&1
# link rcs elsewhere, copy accessory scripts, then
# co do-with-stack && usr/bin/time bash do-with-stack (local|afs) newer |& tee >(bro -w 24 -o id00.log.bro) > id00.log
# co do-with-stack.sh && nice time bash do-with-stack.sh local |& tee >(/mit/ghc/src/bro -w 24 -o h32.log.bro) | perl /mit/ghc/src/log-filter.pl

#nice is not necessary, called internally
date
echo "\$Id: do-with-stack.sh,v 1.127 2017/02/06 05:37:41 kenta Exp $"

test -e process-stack.pl
test -e get-nettle.pl
test -e more-pack-experi.pl
test -e process-dryruns3.pl
test -e process-mult-final.pl

version=8.0.1
stackage=lts-7.18
fullsuffix=$version-$stackage-a

#unfortunately, each new stackage requires a new compiler build, because prefix is hardcoded
#7.8 .. 7.12 do Cabal-1.24.1.0 which is uninstallable
#7.14 has a conflict between cryptohash and cryptohash-sha1 when compiling executable-hash
#7.15 fixed the problem by updating executable-hash
#future: look up latest lts via curl -I https://www.stackage.org/lts
#using /tmp allows easier migration to other machines

localhd=/var/tmp/kenta/hdd/ghc
#indexed by stackage because hardcoded prefix
builddir=$localhd/ghc-build-$fullsuffix
basedir=/afs/sipb.mit.edu/project/ghc/arch/$ATHENA_SYS/install
repo=http://mirrors.mit.edu/ubuntu
#snapshot=/hdd-scratch/kenta/ghc
snapshot=$localhd/snapshot
# 2 = simple -v switch
cabal_verbosity=2
# 1 is too little for understanding *.h errors.

# Note: gcc is often found in the output of ghc --info and not PATH
my_gcc_dir=/afs/sipb.mit.edu/project/gcc-6plus
llvm_path=$localhd/full-llvm-37-gcc6/install/bin
system_path=$my_gcc_dir/bin:$llvm_path:/usr/bin:/bin
export LD_RUN_PATH=$my_gcc_dir/lib
ls -l $LD_RUN_PATH
with_gcc=--with-gcc=$my_gcc_dir/bin/gcc
# GHC does not see LD_RUN_PATH, if there is another Wl rpath on the command line (and there is)
ghc_option_rpath=--ghc-option=-optl-Wl,-rpath,$my_gcc_dir/lib

# yi (via charsetdetect-ae) is an example of something that depends on libstdc++
# hot (in hOpenPGP-tools) via nettle is an example of something that depends on nettle

# kenta has a local perl dir, which causes perl to fail if unreadable (due to unlog)
unset PERL5LIB

if [ "$1" = "local" ]
then basedir=$localhd$basedir
    #create a symlink
    test -e $localhd
#avoid misbehaving packages from doing too much harm
kdestroy
unlog
elif [ "$1" = "afs" ]
then :
else echo "specify local or afs"
    exit 1
fi
target=$basedir/$fullsuffix
extrabin=$target/hackage-bin/versioned

oldinstall=/var/tmp/kenta/hdd/ghc/afs/sipb.mit.edu/project/ghc/arch/amd64_ubuntu1404/install/8.0.1-lts-7.16-a/bin
#oldinstall=/mit/ghc/bin

installed_minimal=$snapshot/$fullsuffix-$1-10ghc.tar.bz2
workarea=$target/workarea
# separate from target to avoid storing it in snapshots
stracedir=$localhd/strace-$fullsuffix
cabalhome=$workarea/home
morepack=$workarea/more-packages
cabalrc=$cabalhome/.cabal
cabalconfig=$cabalrc/config

stackage_config=$workarea/cabal.config
splicefile=$workarea/cabal.config.splice

ready1=$snapshot/$fullsuffix-$1-20ready1.tar.bz2
ready2=$snapshot/$fullsuffix-$1-30ready2.tar.bz2
stackagedone=$snapshot/$fullsuffix-$1-40stackagedone.tar.bz2
ready3=$snapshot/$fullsuffix-$1-50ready3.tar.bz2
singledone=$snapshot/$fullsuffix-$1-60singledone.tar.bz2
multdone=$snapshot/$fullsuffix-$1-70multdone.tar.bz2
mergedone=$snapshot/$fullsuffix-$1-80mergedone.tar.bz2

ghc_option_rpath="$ghc_option_rpath --ghc-option=-optl-Wl,-rpath,$target/nettle/nettle"

#oldinstall goes first because we need at least ghc 7.8, and ubuntu1404 gives 7.6
export PATH=$oldinstall:$system_path
hash -r
which cabal
cabal -V
which gcc
gcc --version
which ghc
ghc --version
which llc
llc --version
which c2hs
c2hs --version

pbzip2 -V
pigz -V

function prefetch_afs() {
    find $target/bin -type f -exec cat '{}' \; | cksum
    find $target/lib/ghc-$version -type f -exec cat '{}' \; | cksum
    true
}

function integrity() {
    pushd $target
    # for the logs, potentially to detect integrity loss
    ls -laR --time-style=+%s -b
    find . -type f -print0 | xargs -0 sha224sum
    popd
    true integrity check "done"
}

function single_install() {
    local file checksum
    file=$1
    # see perldoc -f unpack
    checksum=$(perl -we 'printf("%01x",unpack("%8C*","'$file'")%16)')
    if HOME=$cabalhome nice -19 strace -f -o $stracedir/$file time cabal --config-file=$cabalconfig install --allow-newer --offline --verbose=$cabal_verbosity -p --global --prefix=$target --bindir=$extrabin/$file --libdir=$target/lib-separated/$checksum --extra-include-dirs=$target/nettle  --extra-lib-dirs=$target/nettle/nettle $ghc_option_rpath $file
    then if [ \( ${file%-+([0-9.])} = prelude2010 \) -o \( ${file%-+([0-9.])} = rerebase \) ]
        then $target/bin/ghc-pkg hide $file
        fi
    else true ROOT FAILURE $file
    fi
    # does this do signficantly better than compress=lzo btrfs mount option?
    #nice -19 pigz --fast -f $stracedir/$file
    # new plan: selectively manually compress some while the build happens
}

rm -fr $target
mkdir -p $stracedir
if [ -e "$stackagedone" ]
then pushd $basedir
    nice time pbzip2 -d -c $stackagedone | tar xf -
    popd
else
if [ -e "$ready2" ]
then pushd $basedir
    nice time pbzip2 -d -c $ready2 | tar xf -
    popd
else
if [ -e "$ready1" ]
then
    pushd $basedir
    nice time pbzip2 -d -c $ready1 | tar xf -
    popd
else

if [ -e "$installed_minimal" ]
then mkdir -p $basedir
    pushd $basedir
    #this hopefully prefetches all of ghc into the cache, avoiding the Bus error in rts/sm/Evac.c
    nice time pbzip2 -d -c $installed_minimal | tar xf -
    popd
else rm -fr $builddir
    mkdir -p $builddir
    pushd $builddir
    sha224sum /mit/ghc/src/tarballs/ghc-$version-src.tar.xz
    tar xf /mit/ghc/src/tarballs/ghc-$version-src.tar.xz
    pushd ghc-$version

    # with-gcc is necessary because otherwise it still finds
    # /usr/bin/gcc, probably from ghc --info of the old compiler
    strace -f -o $stracedir/00-10ghcconfigure ./configure --prefix=$target $with_gcc
    # j4 is possible here, 46 min with strace turned off.
    # strace -f -o $stracedir/00-11ghcmake
    # 50 GB strace file
    nice -19 time make -j4

    #make has to be separate from make install
    nice -19 strace -f -o $stracedir/00-12ghcinstall time make install
    popd
    popd
    # maybe delete $builddir?
    pushd $basedir
    nice -19 time tar cf $installed_minimal -I pbzip2 $fullsuffix
    popd
fi

#oldinstall has to come after because we want newest ghc
export PATH=$target/bin:$PATH
hash -r
which ghc
ghc --version

mkdir -p $cabalhome
if [ -e $cabalrc ]
then exit 1
fi
# honeypot for installer scripts which try to modify these
touch $cabalhome/.bashrc
touch $cabalhome/.profile

pushd $workarea
wget https://www.stackage.org/$stackage/cabal.config
popd
perl process-stack.pl config < $stackage_config > $splicefile

which cabal
cabal -V

HOME=$cabalhome cabal update

perl -plwi.orig -e 'if(/^\s*--\s*logs-dir:/){s/^\s*--\s*//;s,$, '$cabalhome'/.cabal/logs,}elsif(/^\s*--\s*constraint:/){open FI,"'$splicefile'" or die;do {local$/;$_=<FI>}}' $cabalconfig

integrity

prefetch_afs

# use_sse2 is for mersenne-random

#this is parallel so nice 19
# testing whether the fetch, then offline install, process works ahead of the main event
# BUG: fetch does not obey config-file constraints, so need to specify versions exactly.
# seen with 7.15 hashable 1.2.4.0 versus 1.2.5.0
# resolverParams is very different in cabal-install/Distribution/Client/Fetch.hs and Install.hs
#HOME=$cabalhome nice -19 time cabal --config-file=$cabalconfig fetch --verbose=$cabal_verbosity cabal-install
HOME=$cabalhome nice -19 strace -f -o $stracedir/00-20cabal-install time cabal --config-file=$cabalconfig install --verbose=$cabal_verbosity -p --global --prefix=$target -fuse_sse2 $ghc_option_rpath cabal-install

hash -r
which cabal
cabal -V

#create new cabal config file with new cabal-install
mv $cabalhome/.cabal $cabalhome/.cabal-old
HOME=$cabalhome cabal update
perl -plwi.orig1 -e 'if(/^\s*--\s*logs-dir:/){s/^\s*--\s*//;s,$, '$cabalrc'/logs,}elsif(/^\s*--\s*constraint:/){open FI,"'$splicefile'" or die;do {local$/;$_=<FI>}}' $cabalconfig

#just for fun
diff -u $cabalhome/.cabal-old/config.orig $cabalconfig || true

integrity
prefetch_afs

#language-c needs alex and happy

#HOME=$cabalhome nice -19 strace -f -o $stracedir/00-30cabal-alex-happy time cabal --config-file=$cabalconfig install --verbose=$cabal_verbosity -p --global --prefix=$target -fuse_sse2 $ghc_option_rpath alex happy
test "$(which alex)"
test "$(which happy)"

#integrity

# we have made an editorial decision that cabal, alex, and all these
# "initial" utilities needed to build everything else go in
# $prefix/bin, whereas the executables for "everything else" (below)
# will go in $extrabin

# new plan: we pretty much always have a very recent build available,
# so completely rely on the old build for the build tools other than
# ghc and cabal.  This makes simpler the dependencies on obscure build
# tools later in the package build.


#if [ "$fast" ]
#then exit 0
#fi

#get a newer version of nettle than Ubuntu Trusty

# We make an editorial decision that nettle, and the packages that
# depend on it like hOpenPGP are interesting enough to justify this
# extra work.

# XXX check signature
mkdir -p $target/nettle
curl $repo/dists/xenial/main/source/Sources.xz | unxz | perl get-nettle.pl > $target/nettle/checksums
pushd $target/nettle
for file in $(perl -plwe 's/^\S+\s*//' checksums)
do wget $repo/pool/main/n/nettle/"$file"
done
sha256sum -c checksums
tar xf nettle*.orig.tar*
nettledir=$(find . -maxdepth 1 -type d -name 'nettle*' -exec basename '{}' \;)
pushd $nettledir
tar xf ../nettle*.debian.tar.*
while read -r file
do patch -p1 < debian/patches/$file
done < debian/patches/series
debian/rules build
# hack to avoid make install because configure step not done
ln -s libnettle.so libnettle.so.6
popd
# include files are searched for in <nettle/version.h>
ln -s $nettledir nettle
unset nettledir
popd

#elsif(/^\s*jobs:/){$_="jobs: 1"}
#too many CPUs and we swap memory.
#note jobs=1 causes no separate log files.

perl -plwi.orig2 -e 'if(/^\s*jobs:/){$_="jobs: 1"}' $cabalconfig

pushd $basedir
nice -19 time tar cf $ready1 -I pbzip2 $fullsuffix
popd
# ready1 endif
fi

hash -r
which cabal
cabal -V
which gcc
gcc --version
which ghc
ghc --version
which c2hs
c2hs --version

integrity

prefetch_afs

export PATH=$target/bin:$oldinstall:$system_path

# sleep avoids long command line in the next line from weirdly clobbering the output of the previous command
sleep 1
the_packages="$(perl process-stack.pl rest < $stackage_config)"
#the_packages="bzlib hOpenPGP hopenpgp-tools"


# avoid installing all the packages all at once because we cannot
# separate out the executables, and if we do not separate out the
# executables, those with the same name will clobber each other.
# Currently, only "example" is the duplicated name.

#verbose=3 gives output during dependencies, count 3934
# solving dependencies requires more than 4GB memory, spotted 5.27g

# this might need extra swap space someday
HOME=$cabalhome nice time cabal --config-file=$cabalconfig install -p --global --prefix=$target --dry-run $the_packages > $workarea/dry-run.out
perl -lwe 'while(<>){last if /^In order, the following would be installed/}while(<>){chomp;s/ \(latest: .*\)//;die "($_)" if /\s/;print}' $workarea/dry-run.out > $workarea/package-order

# approx 43 minutes to this point

# less likely to get hit at some point with hackage downtime
HOME=$cabalhome nice time cabal --config-file=$cabalconfig fetch --no-dependencies $(cat $workarea/package-order)

pushd $basedir
nice -19 time tar cf $ready2 -I pbzip2 $fullsuffix
popd

# ready2 endif
fi

# intentionally duplicated, because too lazy to think about various
# possibilities of how to get to this point

export PATH=$target/bin:$oldinstall:$system_path

hash -r
which cabal
cabal -V
which gcc
gcc --version
which ghc
ghc --version
which c2hs
c2hs --version

#the main event

# this strategy unfortunately eliminates the possibility of parallel
# builds.

# future: the clever way to make it parallel again is to generate a
# makefile from the dependencies, then make -j2, etc.  Cannot do too
# high parallelism or else we run out of memory quickly: even 2 may be
# too much.  Largest maxresident to compile: fortran-src
# gogol-dfareporting amazonka-ec2 gogol-youtube gogol-compute idris
# Agda yesod-job-queue stache
true main event
while read -r file
do single_install $file
done < $workarea/package-order

#current known failures in Ubuntu Trusty:
#see notes

prefetch_afs

# ghc-heap-view must be compiled with profiling turned off

# this one omits version number suffix in extrabin, though irrelevant
# because no executables

HOME=$cabalhome nice -19 time strace -f -o $stracedir/ghc-heap-view cabal --config-file=$cabalconfig install --verbose=$cabal_verbosity --global --prefix=$target --bindir=$extrabin/ghc-heap-view $ghc_option_rpath ghc-heap-view


pushd $basedir
nice -19 time tar cf $stackagedone -I pbzip2 $fullsuffix
popd

#endif stackagedone
fi

export PATH=$target/bin:$oldinstall:$system_path
hash -r
which cabal
cabal -V
which gcc
gcc --version
which ghc
ghc --version
which c2hs
c2hs --version

mkdir -p $morepack/00
perl more-pack-experi.pl "$target" > $morepack/00/0run.sh
pushd $morepack/00
#maybe parallelize this with deal-lines.hs
bash 0run.sh
popd
find $morepack/00 -name 'dry*' | sort | perl process-dryruns3.pl $morepack/00

# consider creating a local package cache

HOME=$cabalhome nice time cabal --config-file=$cabalconfig fetch --no-dependencies $(cat $morepack/00/singles.list $morepack/00/multiples.list)

# question: does (latest: ...) happen if it is a beta version (beyond the pref ver)?
# do we want to enforce beta be installed?

pushd $basedir
nice -19 time tar cf $ready3 -I pbzip2 $fullsuffix
popd

#now singles install
while read -r i
do single_install $i
done < $morepack/00/singles.list

pushd $basedir
nice -19 time tar cf $singledone -I pbzip2 $fullsuffix
popd

# Blacklist find-conduit rerebase prelude2010
# ghc-pkg hide might be better
# process-dryruns3
# fetch both single and multiple
# save
# singles install
# save
# recalculate dry with the multiples.list (dry-generate.pl)

#todo list the pkg-config failures, even calling apt-cache search -x 'x.pc$' | grep /x  (see dry-pkgconfig-search.pl)

mkdir -p $morepack/01
while read -r file
do HOME=$cabalhome cabal --config-file=$cabalconfig install -p --global --prefix=$target --dry-run --verbose=1 $file > $morepack/01/dry-$file.out 2>&1
done < $morepack/00/multiples.list

find $morepack/01 -name 'dry*' | sort | perl process-dryruns3.pl $morepack/01

while read -r i
do single_install $i
done < $morepack/01/singles.list


# another singles install
# save
# compilcated cabal dependencies, excluding packages which depend on failed singles.  This might require access to the logs.
# process-mult-final

mkdir -p $morepack/02
while read -r file
do HOME=$cabalhome cabal --config-file=$cabalconfig install -p --global --prefix=$target --dry-run --verbose=1 $file > $morepack/01/dry-$file.out 2>&1
done < $morepack/01/multiples.list

find $morepack/02 | sort | perl process-mult-final.pl > $morepack/02/good

HOME=$cabalhome nice time cabal --config-file=$cabalconfig install -p --global --prefix=$target --dry-run $(cat $morepack/02/good) > $morepack/02/ordering

while read -r i
do single_install $i
done < $morepack/02/ordering

pushd $basedir
nice -19 time tar cf $multdone -I pbzip2 $fullsuffix
popd

pushd $extrabin
mkdir ../unversioned
for file in *
do ln -s ../versioned/"$file" ../unversioned/"${file%-+([0-9.])}"
done

mkdir ../merged
cd ../merged
# omit executables whose name occurs more than once.  this is a
# heuristic for programs users probably will not be interested in.
# currently the only such name is "example".
find ../versioned -type f | perl -nlwe 'die unless m,^../versioned/(.*)/(.*),;$counts{$2}++;$o{$2}=$1;END{for$e(keys%counts){if($counts{$e}>1){print STDERR "skip $e,$counts{$e}";next}$_=$o{$e};s/-[0-9.]+$// or die;print qq(ln -s "../unversioned/$_/$e")}}' | bash -e
popd

# todo collect the build tools into one directory, for the next build

pushd $basedir
nice -19 time tar cf $mergedone -I pbzip2 $fullsuffix
popd

integrity

dpkg -l

du -sk $target


dirs
date
echo all "done"

# try shellcheck to lint this script.

