#!/bin/bash


if [ "X$3" == X ]; then
	echo "Usage: compress-html <language> <edition> <checkpoint>"
	exit
fi

. `dirname $0`/config.sh

lang=$1
edition=$2
checkpoint=$3
site=wikipedia

sitebase=$base/new/$site
dest=$base/downloads/$edition/$lang
p7zip=$base/scripts/7za-readdir-hack

mkdir -p $dest

set -e

echo Finding files...
cd $sitebase
find $lang/ -name \*.html > $dest/html.lst

find $lang/skins $lang/raw $lang/misc -type f > $dest/skins.lst
echo $lang/dumpHTML.version >> $dest/skins.lst

[ -e $lang/images ] && find $lang/images -not -type d > $dest/images.lst

echo Found `wc -l < $dest/html.lst` files

echo Creating HTML archive...
rm -f $dest/wikipedia-$lang-html.tar.7z


# Set chunk size to 8MB for faster random access
#$p7zip -l -ms8m a $dest/wikipedia-$lang-html.7z @$dest/html.lst @$dest/skins.lst

#fileCount=`wc -l $base/downloads/$edition/$lang/html.lst | awk '{print $1}'` 
#if [ $fileCount -gt 2000000 ]; then
#	echo "Creating split archives"
#	$base/scripts/compress-volumes "$lang" "$edition"
#fi


tar -c -T $dest/html.lst -T $dest/skins.lst | $p7zip a $dest/wikipedia-$lang-html.tar.7z -si -bd

echo "everything=done" > $checkpoint

