User:The Anome/NRIS kml extractor

#Copyright (c) 2011 The Anome
#
#Permission is hereby granted, free of charge, to any person obtaining a copy
#of this software and associated documentation files (the "Software"), to deal
#in the Software without restriction, including without limitation the rights
#to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
#copies of the Software, and to permit persons to whom the Software is
#furnished to do so, subject to the following conditions:
#
#The above copyright notice and this permission notice shall be included in
#all copies or substantial portions of the Software.
#
#THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
#IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
#FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
#AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
#LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
#OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
#THE SOFTWARE.

import string, re

placemarks = re.findall(r"(?ms)<Placemark>.*?</Placemark>", open("doc.kml").read())

cdata_re = r"""(?ms)<!\[CDATA\[<b>Historic Place Name: </b>(.*?)<br /><b>Address: </b>(.*?)<br /><b>City: </b>(.*?)<br /><b>County: </b>(.*?)<br /><b>State: </b>(.*?)<br /><br /><u>Geographic Coordinates:</u><br /><b>Latitude: </b>(.*?)<br /><b>Longitude: </b>(.*?)<br /><br /><b>NPS Reference Number: </b>(.*?)<br /><b>Date Listed: </b>(.*?)<br /><b>Notes: </b>(.*?)<br /><b>Type: </b>(.*?)<br /><b>Geocode Match: </b>(.*?)<br /><br /><p align="center">A Service of:<br /><a href="http://www\.cr\.nps\.gov/nr/">National Register of Historic Places</a> <br /><a href="http://www\.nps\.gov/">National Park Service</a></p>\]\]>"""

for placemark in placemarks:
    fields = re.findall(r"(?ms)^.*?<name>(.*?)</name>.*?<description>(.*?)</description>.*?<coordinates>(.*?),(.*?),(.*?)</coordinates>.*?$", placemark)
    try:
       name, description, lon, lat, etc = fields[0]
       description = string.join(string.split(description))
       cdata = re.findall(cdata_re, description)
       print string.join([repr(string.strip(x)) for x in  ["OK", name, lat, lon] + list(cdata[0])], ", ")
    except:
       print string.join([repr(string.strip(x)) for x in  ["ERROR", name, lat, lon]], ", ")