Files
httrack/tests/crawl-test.sh
Xavier Roche 2e88036808 Fixed escaping
2013-07-05 18:36:20 +00:00

208 lines
4.2 KiB
Bash
Executable File

#!/bin/bash
#
function warning {
echo "** $*" >&2
return 0
}
function die {
warning "$*"
exit 1
}
function debug {
if test -n "$verbose"; then
echo "$*" >&2
fi
}
function info {
printf "[$*] ..\t" >&2
}
function result {
echo "$*" >&2
}
function cleanup {
debug "cleaning function called"
if test -n "$tmpdir"; then
if test -d "$tmpdir"; then
if test -z "$nopurge"; then
debug "cleaning up $tmpdir"
rm -rf "$tmpdir"
fi
fi
fi
if test -n "$crawlpid"; then
debug "killing $crawlpid"
kill -9 "$crawlpid"
crawlpid=
fi
}
function usage {
cat << EOF
usage: $0
EOF
}
function assert_equals {
info "$1"
if test ! "$2" == "$3"; then
result "expected '$2', got '$3'"
exit 1
else
result "OK ($2)"
fi
}
function start-crawl {
# parse args
pos=1
while test "$#" -ge "$pos" ; do
case "${!pos}" in
--debug)
verbose=1
;;
--no-purge|--summary|--print-files)
;;
--errors|--files|--found|--not-found|--directory)
pos=$[${pos}+1]
test "$#" -ge "$pos" || warning "missing argument" || return 1
;;
httrack)
pos=$[${pos}+1]
break;
;;
*)
warning "unrecognized option ${!pos}"
return 1
;;
esac
pos=$[${pos}+1]
done
debug "remaining args: ${@:${pos}}"
# ut/ won't exceed 2 minutes
moreargs="--quiet --max-time=120 --timeout=30 --connection-per-second=5"
# proxy environment ?
if test -n "$http_proxy"; then
moreargs="$moreargs --proxy $http_proxy"
fi
test -n "$tmpdir" || ! warning "no tmpdir" || return 1
tmp="${tmpdir}/crawl"
rm -rf "$tmp"
mkdir "$tmp" || ! warning "could not create $tmp" || return 1
which httrack >/dev/null || ! warning "could not find httrack" || return 1
ver=$(httrack -O /dev/null --version | sed -e 's/HTTrack version //')
test -n "$ver" || ! warning "could not run httrack" || return 1
# start crawl
log="${tmp}/log"
debug starting httrack -O "${tmp}" ${moreargs} ${@:${pos}}
info "running httrack ${@:${pos}}"
httrack -O "${tmp}" --user-agent="httrack $ver ut ($(uname -omrs))" ${moreargs} ${@:${pos}} >"${log}" 2>&1 &
crawlpid="$!"
debug "started cralwer on pid $crawlpid"
wait "$crawlpid"
result="$?"
crawlpid=
test "$result" -eq 0 || ! result "error code $result" || return 1
result "OK"
grep -iE "^[0-9\:]*[[:space:]]Error:" "${tmp}/hts-log.txt" >&2
# now audit
while test "$#" -gt 0; do
case "$1" in
--no-purge)
nopurge=1
;;
--summary)
grep -E "^HTTrack Website Copier/[^ ]* mirror complete in " "${tmp}/hts-log.txt"
;;
--print-files)
find "${tmp}" -mindepth 1 -type f
;;
--errors)
shift
assert_equals "checking errors" "$1" "$(grep -iEc "^[0-9\:]*[[:space:]]Error:" "${tmp}/hts-log.txt")"
;;
--found)
shift
info "checking for $1"
if test -f "${tmp}/$1" ; then
result "OK"
else
result "not found"
exit 1
fi
;;
--not-found)
shift
info "checking for $1"
if test -f "${tmp}/$1" ; then
result "OK"
else
result "not found"
exit 1
fi
;;
--directory)
shift
info "checking for $1"
if test -d "${tmp}/$1" ; then
result "OK"
else
result "not found"
exit 1
fi
;;
--files)
shift
nFiles=$(grep -E "^HTTrack Website Copier/[^ ]* mirror complete in " "${tmp}/hts-log.txt" \
| sed -e 's/.*[[:space:]]\([^ ]*\)[[:space:]]files written.*/\1/g')
assert_equals "checking files" "$1" "$nFiles"
;;
httrack)
break;
;;
esac
shift
done
# cleanup
if test -z "$nopurge"; then
rm -rf "$tmp"
else
tmpdir=
fi
}
# check args
test "$#" -le 0 && usage && exit 0
# <http://pubs.opengroup.org/onlinepubs/000095399/basedefs/xbd_chap08.html>
tmptopdir=${TMPDIR:-/tmp}
test -d "$tmptopdir" || mkdir -p "${tmptopdir}" || die "can not find a temporary directory ; please set TMPDIR"
# final cleanup
tmpdir=
crawlpid=
nopurge=
verbose=
trap "cleanup" 0 1 2 3 4 5 6 7 8 9 11 13 14 15 16 19 24 25
# working directory
tmpdir="${tmptopdir}/httrack_ut.$$"
mkdir "${tmpdir}"
# rock'in
start-crawl "${@}"
# that's all, folks!