#!/usr/local/bin/perl # # A. de la Fuente, VILSPA, 12-Jan-1995 # Translation from dtsplit awk program # $Usage = 'Usage: dtsplit.pl style=vilspa resource records into separate files by category. # # D.Wells, NRAO-CV, 03Feb-22Mar94,31Jly,23Sep. # 1/5/95: added support for mailing_list category # 1/6/95: every category is now in a separate page. # To-Do: # -produce a page with a list of active CATEGORY codes # -set up main page as though subordinate page to eliminate special code # -------------------------------------------------------------------- $[ = 1; # set array base to 1 $, = ' '; # set output field separator $\ = "\n"; # set output record separator # bc() specifies 'big' categories, ch() specifies ordinary categories; # if ($style eq 'vilspa') { $link_address = 'Sergio Paoli spaoli@fcaglp.fcaglp.unlp.edu.ar'; $main_page = 'astronomy'; $astroweb_path = './'; # &bc('observing', 'Observing resources'); &ch('telescope', 'Telescopes'); &ch('survey', 'Astronomical survey projects'); &ch('schedule', 'Telescope observing schedules'); &ch('weather', 'Metereological information'); # &bc('data', 'Data Resources'); &ch('center', 'Data and Archive Centers'); &ch('infosys', 'Astronomy Information Systems'); # &bc('library', 'Publication-related Resources'); &ch('preprint', 'Astronomy & astrophysics preprints & abstracts'); &ch('abstracts', 'Abstracts of Astronomical Publications'); &ch('full_text', 'Full-texts of Astronomical Publications'); &ch('bibliography', 'Astronomical Bibliographical Services'); &ch('library', 'Astronomy-related Libraries'); &ch('other_library', 'Other library resources'); # &bc('people', 'People-related Resources'); &ch('people', 'People'); &ch('personal', 'Personal Web pages'); &ch('jobs', 'Jobs'); &ch('meetings', 'Conferences and Meetings'); &ch('newsgroup', 'Newsgroups'); &ch('mailing_list', 'Mailing Lists'); # &bc('organization', 'Organizations'); &ch('dept', 'Astronomy Departments'); &ch('society', 'Astronomical Societies'); &ch('agency', 'Space Agencies'); # &bc('software', 'Software Resources'); &ch('software', 'Astronomy software servers'); &ch('document_tools', 'Document Preparation Tools (TeX,etc)'); # &bc('areas', 'Research areas of Astronomy'); &ch('radio', 'Radio Astronomy'); &ch('optical', 'Optical Astronomy'); &ch('space', 'Space Astronomy'); &ch('solar', 'Solar Astronomy'); &ch('planetary', 'Planetary Astronomy'); &ch('high_energy', 'High Energy Astronomy'); # &bc('astroweb', 'Various Lists of Astronomy Resources'); &ch('astroweb', 'Primary Lists of Astronomy Resources'); &ch('other_astronomy', 'Other lists of astronomy resources'); # &bc('pictures', 'Astronomical Imagery'); &ch('pictures', 'Pictures'); # &bc('education', 'Education'); &ch('education', 'Educational Resources in Astronomy'); # &bc('history', 'History'); &ch('history', 'History of Astronomy'); # &bc('misc', 'Miscellaneous Resources'); &ch('protocols', 'Overviews & technical notes for protocols'); &ch('misc', 'Miscellaneous Resources'); &ch('earth', 'Atmosphere,Ocean,Geophys.,SpaceSci.'); &ch('physics', 'Physics-related resources'); &ch('comp_sci', 'Computer Science-related resources'); # ; } else { printf("$Usage \n"); $ExitValue = (17); exit; # ----------------------------------------------------------------------- # Prefix for header files: ; } $header_prefix = 'head'; # Prefix for temporary files: $temp_prefix = 'temp'; # Prefix for output pages: $output_prefix = 'yp'; # Filename for the main page: $header_main = $header_prefix . '_' . $main_page . '.html'; $output_main = $output_prefix . '_' . $main_page . '.html'; # Filename for the tail file: $tailfile = $temp_prefix . '_tail.html'; open (tailfile,">$tailfile"); ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = gmtime(time); $mon = $mon + 1 ; $timeupdate = "$year/$mon/$mday $hour:$min GMT" ; print tailfile "
" ; print tailfile "Updated on $timeupdate by $link_address " ; print tailfile "
\n"; close (tailfile) ; # $category_table = $output_prefix . '_categories.html'; # ----------------------------------------------------------------------- # initialize the output files with header lines: foreach $i (keys %h) { if ($h{$i} ne '=') { # printf("\n", c[i]) > temp_file(i); &Pick('>>', &temp_file($i)) && (printf $fh "

\n%s\n

\n
\n\n", $h{$i}); } } # # -------------------------------------------------------------------- # Resource records begin with
. The lines of the record are # concatenated into one string, delimited by code '||'. The record # is then appended to the appropriate file based on CATEGORY code(s) # defined in the configuration information in the BEGIN section. line: while (<>) { chop; # strip record separator @Fld = split(' ', $_, 9999); if ($_ =~ /^[<][Dd][Tt][>]/) { $e = $_ . '||'; # read from stdin, appending lines to string, until encounter blank line: do { $_ = &Getline0(); $e = $e . $_ . '||'; } while length > 0 ; # locate CATEGORY line and parse it: if (($u1 = index($e, '') + $u1 - 1; $ec = substr($e, $u1, $u2 + 5 - $u1); $nf = (@cats = split(' ', $ec, 9999)); # the strings of the CATEGORY definition have been parsed into cats[nf]: foreach $i (keys %done_cat) { delete $done_cat{$i}; } for ($i = 3; $i < $nf; $i++) { #??? $nomatch = 1; foreach $j (keys %c) { if ($cats[$i] eq $c{$j}) { #??? $nomatch = 0; if (!(defined $done_cat{&temp_file($j)})) { &print_entry($e, $j); $ncat{$j}++; $done_cat{&temp_file($j)} = 1; last; } } } if ($nomatch) { $unmatch{$cats[$i]}++; } } $nrec++; } } # -------------------------------------------------------------------- # -------------------------------------------------------------------- # Function to append resource record 'e' to temp_file(i): # -------------------------------------------------------------------- # ------------------------------------------------------------------ # Category code configuration for this set of pages: # the c[] are the actual codes which appear in
files # the h[] are the

strings for listings of the categories # the t[] are the names of the pages to which categories are appended # the h1[] are /<H1> strings for multiple-category pages # # Function to compute "temp" filenames. A name is formed from category code # c[i] unless f[i] is set to an alternate name. If a[i] is set to the # subscript of a different code, then the filename will be formed from # the c[] or a[] entries of that subscript. This scheme enables codes # to be aliased. # ------------------------------------------------------------------ # Function to compute "header" filenames. # Function to compute "output" filenames. } printf "%4d resource records from input\n", $nrec; # initialize the category table: &Pick('>', $category_table) && (print $fh '<LISTING>'); &Pick('>>', $category_table) && (printf $fh ('The AstroWeb master database currently')); &Pick('>>', $category_table) && (printf $fh "contains %d resource records,\n", $nrec); &Pick('>>', $category_table) && (printf $fh ("classified into the following categories:\n\n")); &Pick('>>', $category_table) && (print $fh '#Records Category_codes'); &Pick('>>', $category_table) && (print $fh '-------- --------------'); # terminate the "temp" output files: for ($i = 1; $i <= $k; $i++) { #??? if ($h{$i} ne '=') { &Pick('>>', $category_table) && (printf $fh "%8d %-20s %s\n", $ncat{$i}, $c{$i}, $h{$i}); &Pick('>>', &temp_file($i)) && (print $fh "</DL>\n"); delete $opened{&temp_file($i)} && close(&temp_file($i)); } } $num_unmatch = 0; foreach $i (keys %unmatch) { $num_unmatch++; } printf "%4d unrecognized category codes:\n", $num_unmatch; foreach $i (keys %unmatch) { printf "\t%4d cases of '%s'\n", $unmatch{$i}, $i; # initialize output files and copy header files to them: ; } for ($i = 1; $i <= $k; $i++) { #??? if ($initialized{&output_file($i)} != 1) { &Pick('>', &output_file($i)) && (printf $fh ("<HTML>\n<HEAD>\n<TITLE>\n")); &Pick('>>', &output_file($i)) && (printf $fh "%s\n", $h{$i}); &Pick('>>', &output_file($i)) && (printf $fh "\n\n", $link_address); &Pick('>>', &output_file($i)) && (printf $fh ("\n\n")); while ((($_ = &Getline2(&header_file($i)),$getline_ok)) > 0) { &Pick('>>', &output_file($i)) && (print $fh $_); } &Pick('>>', &output_file($i)) && (printf $fh ("\n")); $initialized{&output_file($i)} = 1; } } if (!(-f $header_main)){ system('touch ' . $header_main ); } system('cp ' . $header_main . ' ' . $output_main); &Pick('>>', $output_main) && (printf $fh "The AstroWeb master database\ncurrently contains %d distinct resource records:
      \n", $nrec); # concatenate the "temp" files into the "output" files: for ($i = 1; $i <= $k; $i++) { #??? if ($h{$i} ne '=') { while (($_ = &Getline2(&temp_file($i)),$getline_ok) > 0) { &Pick('>>', &output_file($i)) && (print $fh $_); } delete $opened{&temp_file($i)} && close(&temp_file($i)); &Pick('>>', &output_file($i)) && (print $fh "
      \n"); system('rm -f ' . &temp_file($i)); # if ($h1t{$bch{$i}} ne 'done') { &Pick('>>', $output_main) && (print $fh ("\n
  • " . $h1{$bch{$i}} . "
      \n")); $h1t{$bch{$i}} = 'done'; } &Pick('>>', $output_main) && (print $fh ("
    • ")); &Pick('>>', $output_main) && (printf $fh "%s (%d records)\n", $h{$i}, $ncat{$i}); } } foreach $i (keys %c) { if ($done{&output_file($i)}!= 1) { while ((($_ = &Getline2($tailfile),$getline_ok)) > 0) { &Pick('>>', &output_file($i)) && (print $fh $_); } delete $opened{$tailfile} && close($tailfile); &Pick('>>', &output_file($i)) && (print $fh "\n"); delete $opened{&output_file($i)} && close(&output_file($i)); $done{&output_file($i)} = 1; } } &Pick('>>', $output_main) && (print $fh "\n
"); while ((($_ = &Getline2($tailfile),$getline_ok)) > 0) { &Pick('>>', &output_file($i)) && (print $fh $_); } delete $opened{$tailfile} && close($tailfile); &Pick('>>', $output_main) && (print $fh "\n\n\n"); exit $ExitValue; sub print_entry { local($e, $i) = @_; $tfi = &temp_file($i); while (length($e) > 0) { $p = index($e, '||'); # find end of this line &Pick('>>', $tfi) && (printf $fh "%s\n", substr($e, 1, $p - 1)); $e = substr($e, $p + 2, 999999); } } sub bc { local($big_category_code, $big_category_header) = @_; $mc = $big_category_code; $h1{$mc} = $big_category_header; } sub ch { local($category_code, $category_header) = @_; $c{++$k} = $category_code; $h{$k} = $category_header; if ($h{$k} eq '=') { if ($k == 1) { print 'ABORT! cannot determine category alias!'; $ExitValue = (13); last line; } $a{$k} = (defined $a{($k - 1)}) ? $a{$k - 1} : ($k - 1); } $bch{$k} = $mc; # mc is set by bc() above $t{$k} = $c{$k}; # produce separate files for categories } sub temp_file { local($i) = @_; $j = (defined $a{$i}) ? $a{$i} : $i; $filename = (defined $f{$j}) ? $f{$j} : $c{$j}; $temp_prefix . '_' . $filename . '.html'; } sub header_file { local($i) = @_; $filename = $t{$i}; $header_prefix . '_' . $filename . '.html'; } sub output_file { local($i) = @_; $filename = $t{$i}; $output_prefix . '_' . $filename . '.html'; } sub Getline0 { if ($getline_ok = (($_ = <>) ne '')) { chop; # strip record separator @Fld = split(' ', $_, 9999); } $_; } sub Getline2 { &Pick('',@_); if ($getline_ok = (($_ = <$fh>) ne '')) { chop; # strip record separator @Fld = split(' ', $_, 9999); } $_; } sub Pick { local($mode,$name,$pipe) = @_; $fh = $name; open($name,$mode.$name.$pipe) unless $opened{$name}++; }