#!/usr/bin/perl use strict; my %SURES = ( "A" => "positive", "R" => "not sure", "S" => "sure of species", "T" => "sure of group", "G" => "sure of genus", "F" => "sure of family", "W" => "guess", "" => "unknown", ); # ---------------------------- # Get list of images to do. # ---------------------------- # Get list of images already uploaded. my @FILES; my %done; open(my $fh, ") { next if /^!\s*(#|$)/; s/\s*$//; $done{$_}++; } close($fh); # List of files explicitly wish to upload. # Oops, just clobbered it. # open(my $fh, ") { # next if /^!\s*(#|$)/; # s/\s*$//; # push @FILES, $_ if !$done{$_}; # } # close($fh); # Build list of new images to upload by searching for exclamation # marks in the caption files. foreach my $file (glob "*/captions.txt") { my ($date) = $file =~ /^(\d{8})/; open(my $fh, "<$file") or die "Couldn't read $file: $!\n"; while (<$fh>) { next if !/^(\d+)!/; my $img = "$date/$1.jpg"; if (-f $img) { push @FILES, $img if !$done{$img}; } else { print STDERR "Image doesn't seem to exist: $img\n"; } } close($fh); } # Extract sorted list of dates. my %dates; foreach my $file (@FILES) { $dates{$1}++ if $file =~ /(\d{8})/; } my @DATES = sort keys %dates; # ---------------------------- # Get location names. # ---------------------------- my %LOC_NAMES; my $obv_file = "/home/jason/public_html/born/notes/obv_codes.txt"; open(my $fh, "<$obv_file") or die "Couldn't read observation codes: $!\n"; $_ = <$fh>; my $code; while (<$fh>) { last if !/\S/; s/^ //; /^( *)(\S) +(\S(.*\S)?)/; my ($lvl, $let, $name) = ($1, $2, $3); substr($code, length($lvl)) = $let; $LOC_NAMES{$code} = $name; } close($fh); sub get_loc_name { my $code = shift; my $name = ""; for (my $i=1; $i<=length($code); $i++) { my $part_code = substr($code, 0, $i); my $this = $LOC_NAMES{$part_code}; print STDERR "Invalid location code: [$part_code]\n" if !defined $this; next if $this =~ /!$/; # Suppress names with "!" at end. $name =~ s/(^|;)[^;]*$// # For each "*" at end of name, while $this =~ s/\*$//; # remove previous name(s). $name =~ s/; [a-z][^;]*$//; # Remove lowercase names (unless at end). $name .= "; " if $name ne ""; $name = $name.$this; } return $name; } # --------------------------------------------------- # Read notes to get location, sureness, taxa, etc. # --------------------------------------------------- my %MAPS; my %LOCS; my %TAXA; my %SURS; foreach my $DATE (@DATES) { my $note_file = "/home/jason/public_html/born/notes/$DATE.not"; open(my $fh, "<$note_file") or print STDERR "$DATE: missing notes.\n"; my %map; my $loc; while (<$fh>) { my $num; my $tax; if (s/^=(\d+) *//) { $num = $1; $tax = $_; $tax =~ s/\[.*//; $tax =~ s/\?//; $tax =~ s/_/ /g; $tax =~ s/^\s+|\s+$//g; $TAXA{$DATE}{$num} = $tax; } next if !/\[(.*?)\]/; my $x = $1; my $this_loc = ($x =~ /L=(\w+)/) ? $1 : $loc; $loc = $this_loc if !$num; $LOCS{$DATE}{$num} = $this_loc if defined $num; $SURS{$DATE}{$num} = $1 if $x =~ /\bC=(\w)/; if ($x =~ /PH="(.*?)(")/ || $x =~ /PH=(.*)/) { my ($y, $z) = ($1, $2); $y =~ s/,[A-Z].*//; print STDERR "$DATE: missing quotes.\n" if $y =~ /,/ && $z eq ""; foreach my $range (split(",", $y)) { if ($range =~ /^(\d+)-(\d+)$/) { foreach ($1..$2) { $map{$_} ||= []; push @{$map{$_}}, $num; } } elsif ($range =~ /^(\d+)$/) { $map{$1} ||= []; push @{$map{$1}}, $num; } else { print STDERR "$DATE: weird notes ($y).\n"; } } } } close($fh); $MAPS{$DATE} = \%map; } # --------------------------------------------------- # Read taxa.txt to get aliases, common names, etc. # --------------------------------------------------- my %FULS; my %COMS; my %DADS; my %LVLS; my %AKAS; my $tax_file = "/home/jason/public_html/born/notes/taxa.txt"; open(my $fh, "<$tax_file") or die "Couldn't read taxa: $!\n"; my $tax; while (<$fh>) { $tax = $1 if /^A:(\S(.*\S)?)/; $FULS{$tax} = $1 if /^F:(\S(.*\S)?)/; $COMS{$tax} = $1 if /^C:(\S(.*\S)?)/; $DADS{$tax} = $1 if /^P:(\S(.*\S)?)/; $LVLS{$tax} = $1 if /^L:(\S(.*\S)?)/; $AKAS{unabbrev($1, $tax)} = $tax if /^[IK]:(\S(.*\S)?)/; } close($fh); sub unabbrev { my ($abbrev, $taxon) = @_; $abbrev =~ s((^| )(\w)(?= |$))( my $prefix = $1; my $letter = $2; if ($taxon =~ /(^| )($letter\S+)(?= |$)/) { $letter = $2; } elsif ($letter =~ /\D/) { print STDERR "Bad abbrev <$abbrev> in <$taxon>\n"; } $prefix.$letter; )ge; return $abbrev; } sub get_common { my $tax = shift; my $com = $COMS{$tax}; $com = depluralize($com) if $tax =~ /^[A-Z]\S+$/; if ($com eq "" && $tax =~ /^(.* .*) /) { my $tax2 = $1; $com = $COMS{$tax2}; $com = depluralize($com) if $tax2 =~ /^[A-Z]\S+$/; } $com =~ s/\/[^\s,]+//g; $com =~ s/ [\/\&] .*//; my $gen = $tax; $gen =~ s/ .*//; my $gcom = $COMS{$gen}; my @gcoms = ($gcom); for (my $i=0; $i<@gcoms;) { if ($gcoms[$i] =~ s/ [&\/] (.*)//) { push @gcoms, $1; } elsif ($gcoms[$i] =~ s/(\S+)\/(\S+)/\1/) { push @gcoms, "$`$2$'"; } else { $i++; } } if ($com =~ /"/) { $com =~ s("(\w*))( my $letters = $1; my $pat = "^". join(".*", split("", $letters)); my $i = 0; for (; $i<@gcoms; $i++) { last if $gcoms[$i] =~ /$pat/i; } print STDERR "$tax: Can't find \"$letters ($pat) in $gcom\n" if $i >= @gcoms; $i >= @gcoms ? "\"$letters" : depluralize($gcoms[$i]); )e; } elsif ($com eq "") { $com = depluralize($gcoms[0]); } $com =~ s/_/ /g; $com =~ s/(.*), (.*)/\2 \1/; $com =~ s/\b([a-z])/uc($1)/eg; $com =~ s/\b(and|in|o|of|or|s|the)\b/lc($1)/egi; return $com; } sub depluralize { local $_ = shift; s/s$// if !/\(/; s/\(\)//g; s/\(i\)/us/g; s/\(es\)//g; s/\(ies\)/y/g; s/\(\w+\)//g; s/\((\w+),\w+\)/\1/g; return $_; } # ---------------------------- # Read captions. # ---------------------------- my %CAPS; foreach my $DATE (@DATES) { my $cap_file = "$DATE/captions.txt"; open(my $fh, "<$cap_file") or print STDERR "$DATE: missing captions.\n"; while (<$fh>) { my ($pnum, $cap) = /^(\d+)!?\s+(\S(.*\S)?)/; $CAPS{$DATE}{$pnum} = $cap; } close($fh); } # ----------------------------------------------------------- # Collate info and print stuff I need to upload to flickr. # ----------------------------------------------------------- open(my $out, ">upload.txt") or die "Couldn't write upload.txt: $!\n"; # print $out "#! vim: ts=50 noet\n"; foreach my $file (reverse @FILES) { my $FILE = $file; my $NAME = $file; $NAME =~ s/\//_/; my $DATE = $file; $DATE =~ s/\/.*//; my $PNUM = $1 if $FILE =~ /\/(\d+)/; my $NNUMS = $MAPS{$DATE}{$PNUM}; my $LOCC = $LOCS{$DATE}{$$NNUMS[0]}; my $LOCN = get_loc_name($LOCC); my $CAP = $CAPS{$DATE}{$PNUM}; print $out "$DATE $PNUM $LOCN\n"; foreach my $NNUM (@$NNUMS) { my $SURE = $SURES{$SURS{$DATE}{$NNUM}}; my $TAX = $TAXA{$DATE}{$NNUM}; my $GRP = $1 if $TAX =~ s/(-group|-like)$// or $TAX =~ s/( ".*)//; my $AKA = $AKAS{$TAX}; my $FUL = $FULS{$TAX}; my $COM = get_common($TAX) || $AKA && get_common($AKA); my $LVL = $LVLS{$TAX} || $AKA && $LVLS{$AKA}; print STDERR "$DATE: missing photo #$PNUM\n" if !defined $NNUM; print STDERR "$DATE: missing location (photo #$PNUM, note #$NNUM)\n" if defined $NNUM && !defined $LOCC; print STDERR "$DATE: missing taxon (photo #$PNUM, note #$NNUM)\n" if defined $NNUM && $TAX eq ""; print STDERR "$DATE: missing caption for photo #$PNUM.\n" if !defined $CAP; if ($FUL ne "") { my @tax = split(" ", $TAX); $FUL =~ s/"/''.(shift @tax).'<\/i>'/eg; $TAX = $FUL; } else { my $ssp = $LVL eq "ssp" ? "ssp" : $LVL eq "v" ? "var" : $LVL eq "f" ? "f" : "xxx"; if ($TAX =~ / .* / && $TAX =~ /^[A-Z]/) { $TAX =~ s/(.*) (\S+)/$1<\/i> $ssp. $2<\/i>/; } elsif ($TAX =~ /^[A-Z]/) { $TAX = "$TAX"; } } $TAX =~ s/ /_/g; $COM =~ s/ /_/g; $COM = "-" if $COM eq ""; $SURE =~ s/ /_/g; print $out " $NNUM $TAX $COM ($SURE)\n"; } print $out " \"$CAP\"\n\n"; } close($out); exit 0;