< places2k-PR.txt tr -d \\015 | ./2kmore | sort -tTAB +1 > 2k-tag.txt
tr '[a-z]' '[A-Z]' < places.cable-inet.yes | sort > pci.yes 
tr '[a-z]' '[A-Z]' < places.cable-inet.planned | sort > pci.planned


join -tTAB -j1 2 -j2 1 2k-tag.txt pci.yes

join -t\     -j1 2 -j2 1 -o 1.1 2k-tag.txt pci.planned | awk '{print $0, " PLAN"}' > out1
join -t\     -j1 2 -j2 1 -o 1.1 -v 1 2k-tag.txt pci.planned > 2k-tag-noplan

[portnoy!jhawk] /afs/sipb/user/jhawk/for-sly/cable> wc -l out1 2k-tag-noplan 
     982 out1
   24171 2k-tag-noplan
   25153 total
[portnoy!jhawk] /afs/sipb/user/jhawk/for-sly/cable> wc -l 2k-tag.txt 
   25150 2k-tag.txt
[portnoy!jhawk] /afs/sipb/user/jhawk/for-sly/cable> 

join -t\     -j1 2 -j2 1 -o 1.1,0 -v 1 2k-tag.txt pci.planned > 2k-tag-noplan 
join -t\     -j1 2 -j2 1 -o 1.1 2k-tag-noplan pci.yes | awk '{print $0, " YES "}' > out2
join -t\     -j1 2 -j2 1 -o 0 -v 1 2k-tag-noplan pci.yes > 2k-tag-nono
cat out1 out2 > merge

----------------------------------------
|     From: Devil in the Shape of a Woman <sly> on FIGHTING-FALCON.MIT.EDU
|                           To: jhawk@ATHENA.MIT.EDU
| 
| 2k-tag-nono appears to be places in places2k-PR.txt that didn't have matches
| in cable-inet* -- not places in cable-inet* which didn't have matches in
| places2k-PR.txt -- which is what I asked for
|        Auth. Personal message at 20:46:47 on Wed Dec 15 2004
| From: Devil in the Shape of a Woman <sly> on FIGHTING-FALCON.MIT.EDU
|                       To: jhawk@ATHENA.MIT.EDU
| 
| you did this:
| > I'd also like an outputted list of places in
| > places.cable-inet.planned and places.cable-inet.yes that failed to
| > match anything in places2k-PR.txt. 
| 
| This is 2k-tag-nono.
| 
| backwards

So:

join -t\     -j1 2 -j2 1 -v 2 2k-tag.txt pci.planned > 2k-tag-noplan2
join -t\     -j1 2 -j2 1 -v 2 2k-tag.txt pci.yes > 2k-tag-noyes2

sort -u 2k-tag-noplan2 > 2k-tag-noplan3
sort -u 2k-tag-noyes2 > 2k-tag-noyes3

wc *3
     707    2444   20640 2k-tag-noplan3
   10688   36294  284570 2k-tag-noyes3
   11395   38738  305210 total

cat 2k*3 | sort -u | wc 
   11395   38738  305210
