#!/bin/sh
exit 1
# http://www.berklix.net/~jhs/src/bsd/jhs/bin/public/tidy/tidy.sh
# Copyright Julian H. Stacey jhs @ berklix.net
# Purpose: To help debug someone's web who doesn't have tidy command.
# Installed to /usr/local/www/cgi-bin/tidy
# Called by http://www.berklix.net/cgi-bin/tidy
# PS I have run this script, captured the output,
# & checked the output with tidy,
# This is a recovered merge of 2 version, JJLATER To Be reviewed & tested.

# To ensure this script generates error free HTML. I even ran ispell too.

# PATH=/sbin:/bin:/usr/sbin:/usr/bin:/usr/local/sbin
PATH=/sbin:/bin:/usr/sbin:/usr/bin
export HOME PATH TZ

# Do not change TMP without also editing embedded /tmp/ elsewhere
TMP=/usr/local/www/data/tmp
#	/usr/local/www/apache22/data/tmp
#	/site/usr/local/www/Data/virtual/tmp
# Remember the $TMP should be 755 root staff
# Also apache.conf needs to allow .txt
# BUG: For some reason,  can not click in the table of 6
# on localhost - files do not display ,
# yet files get created. Maybe its cos a sym link local -> ../usr1/local
# when this script run on berklix servers it is OK

rm  -f \
 $TMP/tidy_tidied_errors.txt \
 $TMP/tidy_tidied_html.txt \
 $TMP/tidy_tidied_lines.txt \
 $TMP/tidy_original_errors.txt \
 $TMP/tidy_original_html.txt \
 $TMP/tidy_original_lines.txt \
 $TMP/tidy_txt.tmp	# In case of some previous abort.

URL=http://www.berklix.com

# All my temporary files end in .txt as:
# Better chance apache .conf will know how to display them,
# than a random name like .error
# Stops firefox rendering them as HTML, leaves it raw, easier for user to see.
cd $TMP
fetch -o tidy_original_html.txt $URL
cp tidy_original_html.txt tidy_tidied_html.txt
/usr/local/bin/tidy -i -m tidy_tidied_html.txt 2>1 > /dev/null

HEADER="http://www.berklix.net/cgi-bin/tidy `date`"
HOSTNAME=`hostname`

echo "$HEADER"				>  tidy_original_lines.txt
echo "$URL original_lines"		>> tidy_original_lines.txt
echo ""					>> tidy_original_lines.txt
/bin/cat -n tidy_original_html.txt	>> tidy_original_lines.txt

echo "$HEADER"				>  tidy_tidied_lines.txt
echo "$URL tidied_lines"		>> tidy_tidied_lines.txt
echo ""					>> tidy_tidied_lines.txt
/bin/cat -n tidy_tidied_html.txt	>> tidy_tidied_lines.txt

echo "$HEADER"				>  tidy_original_errors.txt
echo "$URL original_errors"		>> tidy_original_errors.txt
echo ""					>> tidy_original_errors.txt
/usr/local/bin/tidy -errors tidy_original_html.txt	\
					2>> tidy_original_errors.txt

echo "$HEADER"				>  tidy_tidied_errors.txt
echo "$URL tidied_errors"		>> tidy_tidied_errors.txt
echo ""					>> tidy_tidied_errors.txt
/usr/local/bin/tidy -errors tidy_tidied_html.txt	\
					2>> tidy_tidied_errors.txt

# Now put headers on to make it a bit harder for someone
# to abuse this as a proxy
echo "$HEADER"				>  tidy_txt.tmp
echo "$URL original_html"		>> tidy_txt.tmp
echo ""					>> tidy_txt.tmp
cat tidy_original_html.txt		>> tidy_txt.tmp
rm tidy_original_html.txt
mv tidy_txt.tmp				   tidy_original_html.txt

echo "$HEADER"				>  tidy_txt.tmp
echo "$URL tidied_html"			>> tidy_txt.tmp
echo ""					>> tidy_txt.tmp
cat tidy_tidied_html.txt		>> tidy_txt.tmp
rm tidy_tidied_html.txt
mv tidy_txt.tmp				   tidy_tidied_html.txt

echo 'Content-type: text/html'
echo ""
echo '<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">'
echo '<HTML>' #{
echo '<HEAD>' #{
echo '<META NAME="generator" '
echo -n 'CONTENT='
echo '"http://www.berklix.net/~jhs/src/bsd/jhs/bin/public/date/date.sh">'
echo '<LINK REL="shortcut icon" HREF="http://www.berklix.net/gifs/berklix.ico">'
# 'LINK' above over rides the firefox blue icon of an envelope.
echo '<TITLE>' #{
echo 'HTML Tidy' #	Text Shows in the header bar in Firefox.
echo '</TITLE>'	#}
echo '</HEAD>'	#}
echo '<BODY>'	#{
echo '<H1>'	#{
echo '<A HREF="http://www.berklix.net/cgi-bin/tidy"'
echo -n 'TITLE="http://www.berklix.net/cgi-bin/tidy">'	#{
echo -n 'http://www.berklix.net/cgi-bin/tidy'
echo -n '</A>'	#}
echo '</H1>'	#}
echo 'This is an <A HREF="#credit">HTML Tidy</A> diagnostic script,'
echo 'analysing <A HREF=#later>initially</A> a single URL.'
#	echo '(Currently:'
#	echo -n '<A HREF="'	#{
#	echo -n $URL
#	echo -n '">'
#	echo -n $URL
#	echo ' )</A>'	#}
echo '<H3>Why Use It</H3>'
echo '<div style="margin-left: 2em">'	#{
echo 'For Microsoft users who do not have local Unix tools. To reformat,'
echo 'check, &amp; insert line numbers to help debug their HTML.'
echo 'Presume search engines will rate you higher'
echo 'if you have clean HTML.'
echo 'Some commercial HTML generator'
echo 'programs fail to generate clean HTML, some use browser dependent'
echo 'proprietary extensions, &amp; humans fail too with'
echo 'hand written HTML'
echo '</div>'	#}
# I hope This is no longer needed now I am importing $HOSTNAME
#	echo '<H3>If your browser is on any of these</H3>'
#	echo '<UL>'	#{
#	echo '<LI>'	#{
#	echo '<A HREF="http://www.berklix.com/cgi-bin/tidy">www.berklix.com/cgi-bin/tidy</A>'
#	echo 'or'
#	echo '<A HREF="http://www.berklix.com/cgi-bin/tidy">berklix.com/cgi-bin/tidy</A>'
#	echo '</LI>'	#}
#	echo '<LI>'	#{
#	echo '<A HREF="http://www.berklix.eu/cgi-bin/tidy">www.berklix.eu/cgi-bin/tidy</A>'
#	echo 'or'
#	echo '<A HREF="http://www.berklix.eu/cgi-bin/tidy">berklix.eu/cgi-bin/tidy</A>'
#	echo '</LI>'	#}
#	echo '<LI>'	#{
#	echo '<A HREF="http://www.berklix.net/cgi-bin/tidy">www.berklix.net/cgi-bin/tidy</A>'
#	echo 'or'
#	echo '<A HREF="http://www.berklix.net/cgi-bin/tidy">berklix.net/cgi-bin/tidy</A>'
#	echo '</LI>'	#}
#	echo '<LI>'	#{
#	echo '<A HREF="http://www.berklix.org/cgi-bin/tidy">www.berklix.org/cgi-bin/tidy</A>'
#	echo 'or'
#	echo '<A HREF="http://www.berklix.org/cgi-bin/tidy">berklix.org/cgi-bin/tidy</A>'
#	echo '</LI>'	#}
#	echo '<LI>'	#{
#	echo '<A HREF="http://www.bsdpie.net/cgi-bin/tidy">www.bsdpie.net/cgi-bin/tidy</A>'
#	echo 'or'
#	echo '<A HREF="http://bsdpie.net/cgi-bin/tidy">bsdpie.net/cgi-bin/tidy</A>'
#	echo '</LI>'	#}
#	echo '</UL>'	#}
#	echo '<H2>Click to one of these instead</H2>'
#	echo '<UL>'	#{
#	echo '<LI>'	#{
#	echo '<A HREF="http://www1.berklix.net/cgi-bin/tidy">www1.berklix.net/cgi-bin/tidy</A>'
#	echo '</LI>'	#}
#	echo '<LI>'	#{
#	echo '<A HREF="http://www2.berklix.net/cgi-bin/tidy">www2.berklix.net/cgi-bin/tidy</A>'
#	echo '</LI>'	#}
#	echo '</ul>'	#}
#	echo '<SMALL>'	#{
#	echo '(Because <A HREF="http://www.berklix.net">Berklix</A> is served by a cluster of web servers, '
#	echo '&amp; the server that has served you this text &amp; has analysed your URL,'
#	echo 'may not be the same server that next responds when you click'
#	echo 'one of the 6 files below (so you may get an obsolete old copy, or even an error of no existent file)).'
#	echo '</SMALL>'	#}
#	echo '<BR>'
echo -n '<H2>'	#{
echo 'Analysis of HTML compliance for:'
echo -n '<A HREF="'	#{
echo -n $URL
echo -n '">'
echo -n $URL
echo '</A>'		#}
echo '</H2>'		#}
echo '<div style="margin-left: 2em">'	#{
echo '<TABLE SUMMARY="Analysis of your page" BORDER=1>'	#{
echo '<TR>'	#{
echo '<TD>'	#{
echo '<B>File</B>'
echo '</TD>'	#}
echo '<TD>'	#{
echo '<B>Content</B>'
echo '</TD>'	#}
echo '</TR>'	#}
#	The update was imported from date/xearth,
#	but I dont think I need it here,
#	the user could also push refetch on browser button.
#	echo '<TR>'	#{
#	echo '<TD>'	#{
#	echo '<I>'	#{
#	echo '<A HREF="'
#	echo './tidy"'
#	echo ' title="Click to update">UPDATE</A>'
#	echo '</I>'	#}
#	echo '</TD>'	#}
#	echo '<TD>'	#{
#	echo '<I>Update the contents of the 6 files below.</I>'
#	echo '</TD>'	#}
#	echo '</TR>'	#}
#
echo '<TR>'	#{
echo '<TD>'	#{
echo -n '<A HREF="'
echo -n http://$HOSTNAME
echo -n '/tmp/tidy_original_html.txt"'
echo '>HTML of Original</A>'
echo '</TD>'	#}
echo '<TD>'	#{
echo 'Original source HTML'
# echo 'From <I>fetch</I>'
echo '</TD>'	#}
echo '</TR>'	#}
echo '<TR>'	#{
echo '<TD>'	#{
echo -n '<A HREF="'
echo -n http://$HOSTNAME
echo -n '/tmp/tidy_original_errors.txt"'
echo '>Errors Of Original</A>'
echo '</TD>'	#}
echo '<TD>'	#{
echo -n 'Errors'
# echo 'From '
# echo '<I>tidy -errors original_html</I>'
echo '</TD>'	#}
echo '</TR>'	#}
echo '<TR>'	#{
echo '<TD>'	#{
echo -n '<A HREF="'
echo -n http://$HOSTNAME
echo -n '/tmp/tidy_original_lines.txt"'
echo '>Lines Numbers Of Original</A>'
echo '</TD>'	#}
echo '<TD>'	#{
echo 'With line numbers'
# echo 'From <I>cat -n original_html</I>'
echo '</TD>'	#}
echo '</TR>'	#}
echo '<TR>'	#{
echo '<TD>'	#{
echo -n '<A HREF="'
echo -n http://$HOSTNAME
echo -n '/tmp/tidy_tidied_html.txt"'
echo '>HTML tidied</A>'
echo '</TD>'	#}
echo '<TD>'	#{
echo 'HTML Tidied (cleaned &amp; formated)'
# echo 'From'
# echo '<I>tidy -m -i tidied_html</I>,'
# echo '<BR>With indents &amp; line feeds to easier read &amp; debug.'
echo '</TD>'	#}
echo '</TR>'	#}
echo '<TR>'	#{
echo '<TD>'	#{
echo -n '<A HREF="'
echo -n http://$HOSTNAME
echo -n '/tmp/tidy_tidied_errors.txt"'
echo '>Errors of Tidied</A>'
echo '</TD>'	#}
echo '<TD>'	#{
echo 'Errors'
# echo 'From <I>tidy -errors tidied_html</I>'
echo '</TD>'	#}
echo '</TR>'	#}
echo '<TR>'	#{
echo '<TD>'	#{
echo -n '<A HREF="'
echo -n http://$HOSTNAME
echo -n '/tmp/tidy_tidied_lines.txt"'
echo '>Line numbers of Tidied</A>'
echo '</TD>'	#}
echo '<TD>'	#{
echo 'With line numbers'
# echo 'From <I>cat -n tidied_html</I>'
echo '</TD>'	#}
echo '</TR>'	#}
echo '</TABLE>'	#}
echo '</DIV>'	#}
echo '<H3>If you think it slow</H3>'
echo '<div style="margin-left: 2em">'	#{
echo 'It is a script: It fetches your URL'
echo 'from the internet (including whatever delays that URL might'
echo 'impose with scripts or link redirections etc), then'
echo 'analyses the data &amp; writes analysis in 6 files'
echo 'on a <A HREF="http://www.berklix.net">Berklix</A> server,'
echo '(for you to later click on),'
echo 'then builds this page, &amp; sends it to your browser.'
echo '</div>'	#}
echo '<H3><A NAME="credit">Credits</A> &amp; Copyrights</H3>'
echo '<DIV STYLE="margin-left: 2em">'	#{
echo '<SMALL>'	#{
echo -n '<A HREF="/~jhs/src/bsd/jhs/bin/public/tidy/">'	#{
echo -n 'CGI Script'
echo '</A>, '	#}
echo 'by'
echo -n '<A HREF="/~jhs/cv/">'	#{
echo -n 'Julian H. Stacey'
echo '</A>, '	#}
echo 'calls '
echo -n '<A HREF="http://en.wikipedia.org/wiki/HTML_Tidy">'	#{
echo -n 'Tidy'
echo '</A>,'	#}
echo 'Indexed by '
echo -n '<A HREF='
echo -n '"http://www.freebsd.org/cgi/ports.cgi?query=tidy&amp;stype=all">' #{
echo -n 'FreeBSD ports/www/tidy'
echo '</A>, '	#}
echo 'Port wrapper in '
echo -n '<A HREF="http://svnweb.freebsd.org/ports/head/www/tidy/">'	#{
echo -n 'SVN (subversion repository)'
echo '</A>'	#}
echo 'on Operating System: '
echo -n '<A HREF="http://www.berklix.org/freebsd.org">'	#{
echo -n 'FreeBSD'
echo '</A>, '	#}
echo 'Admin by '
echo -n '<A HREF="http://www.berklix.com">'	#{
echo -n 'Berklix.com'
echo '</A>, '	#}
echo 'Servers: '
echo -n '<A HREF="http://www.berklix.net">'	#{
echo -n 'Berklix.Net'
echo '</A>, '	#}
echo '&amp; '
echo -n '<A HREF="/~jhs/berklix/sites/">'	#{
echo -n 'Sites'
echo '</A>.'	#}
echo 'Copyright '
echo -n '<A HREF="http://www.berklix.com/~jhs/cv/">'	#{
echo -n 'Julian H. Stacey'
echo '</A>, '	#}
echo 'Munich 2015, '
echo 'You may use script under BSD licence, Please retain this credits line.'
echo '</SMALL>'	#}
echo '</DIV>'	#}
echo '<H3><A NAME="later">Later</A> Development</H3>'
echo '<div style="margin-left: 2em">'	#{
echo '<SMALL>'	#{
echo 'it could take a parameter to sample other URLs,'
echo 'but there are issues to be considered first'
echo '(security design comments welcome to'
echo -n '<A HREF="http://www.berklix.com/~jhs/contact/">'
echo 'www.berklix.com/~jhs/contact/</A> )'
echo '</SMALL>' #}
echo '<UL>'	#{
echo '<LI>'	#{
echo '<SMALL>'	#{
echo 'It would need to use different names for temp files,'
echo 'so different pages do not collide if multiple simultaneous users.'
echo '</SMALL>'	#}
echo '</LI>'	#}
echo '<LI>'	#{
echo '<SMALL>'	#{
echo 'It would need a sleep &amp;/or a parallel crontab &amp;'
echo 'find -older to time delete them.'
echo '</SMALL>'	#}
echo '</LI>'	#}
echo '<LI>'	#{
echo '<SMALL>'	#{
echo 'It would need something so it can not be abused to copy content I'
echo 'do not want to my site, eg porn, terrorism, or MS adverts etc'
echo '</SMALL>'	#}
echo '</LI>'	#}
echo '<LI>'	#{
echo '<SMALL>'	#{
echo 'Maybe restructure it so output goes straight to a pipe thus 1 per page,'
echo 'not 6 per page ?'
echo '</SMALL>'	#}
echo '</LI>'	#}
echo '<LI>'	#{
echo '<SMALL>'	#{
echo 'But even then it would need more protection, as the <I>cat -n</I>'
echo 'output could be abused as an anonymising proxy, (&amp; more so, if'
echo 'someone elsewhere added a recipient line number stripper).'
echo '</SMALL>'	#}
echo '</LI>'	#}
echo '<LI>'	#{
echo '<SMALL>'	#{
echo 'While I am in favour of some anonymising proxies (to allow'
echo 'some citizens of some countries to evade repressive regimes), There is'
echo 'also immoral/criminal usage of proxies possible, Complex possibilities'
echo 'I do not have time for &amp; prefer to leave to specialised'
echo '<A HREF="http://www.torproject.org">Tor</A> operators; + my servers'
echo 'do not need the load.'
echo '</SMALL>'	#}
echo '</LI>'	#}
echo '</UL>'	#}
echo '</DIV>'	#}
echo 'Other tools at <A HREF="http://www.berklix.net">www.berklix.net</A>' #}
echo '</BODY>'	#}
echo '</HTML>'	#}
