Convert and split many files gpx to one CSV file


Insert all gpx files in one subcataloge (name gpx) and run this parser 😉

 #!/usr/bin/python
 # -*- coding: utf-8 -*-
from xml.dom import minidom
import string
import datetime, dateutil.parser
import unicodedata
import re,os,sys
import time
from xml.dom.minidom import parse, parseString
import gc
start =time.time()

proc=os.getpid()	

#print DOMTree.toxml()


deli = ";" 

delie=""
zmiana4={
	"słaba":'0',
	'poniżej przeciętnej':'1',
	'normalna':'2',
	'dobra':'3',
	'znakomita':'4',
	' ':'',
	}

zamiana3={
	'True':'1',
	'False':'0',
	}

zamiana1={
	'Micro':'1',
	'Small':'2',
	'Regular':'3',
	'Large':'4',
	'Other':'5',
	'Virtual':'0',
	
	}

zamiana2={
	'Traditional Cache':'1',
	'Multi Cache':'2',
	'Unknown Cache':'3',
	'Multi-cache':'4',
	'Virtual Cache':'5',
	'Quiz':'0',
	'Event Cache':'6',
	'Webcam Cache':'7',
	}

def plchr(x):
	A=re.sub('[^A-Za-z0-9]+', '', x)
	return A
	
def zapisz(strigerek, nazwapliku1):
	with open(nazwapliku1, "a") as myfile:
		myfile.write(str(strigerek))


def ile_znalezien(i):
	a_wyniki=[0,0,0,0,0]
	for j in i.getElementsByTagName("groundspeak:logs"):
		
			for k in j.getElementsByTagName("groundspeak:log"):
			#print j.toxml()
				a=k.getElementsByTagName("groundspeak:type")[0].childNodes[0].toxml()	
				#print a
				if str(a) == "Found it":
					a_wyniki[0]=int(a_wyniki[0])+1
				if str(a) == "Didn't find it":
					a_wyniki[1]=int(a_wyniki[1])+1
				if str(a) == "Write note":
					a_wyniki[2]=int(a_wyniki[2]+1)
				if str(a) == "Enable Listing":
					a_wyniki[3]=int(a_wyniki[3])+1
				if str(a) == "Temporarily Disable Listing":
					a_wyniki[4]=int(a_wyniki[4])+1
	return a_wyniki
	
	
def zapisz_Naglowek(nazwapliku2):
	endHEAD= delie+"cache_name"+deli+"cache_id"+deli+"cache_lat"+deli+"cache_lon"+deli+"cache_owner"+deli+"cache_owner_id"+deli+"cache_type"+deli+"cache_container"+deli+"cache_terrain"+deli+"cache_difficu"+deli+"cache_year"+deli+"cache_month"+deli+"cache_day"+deli+"cache_finds"+deli+"cache_not_finds"+deli+"cache_note"+deli+"cache_starts"+deli+"cache_ocena"+deli+"cache_avaible"+deli+"cache_archived"+deli+"\r\n"
	zapisz(endHEAD,nazwapliku2)
#print cNodes[0].getElementsByTagName("wpt")[0].toxml()	

def Deska_prasujaca(cNodes, fname3):
	a=0
	wyniki=[]
	for i in  cNodes[0].getElementsByTagName("wpt"):
		
		az=  i.getElementsByTagName("desc")[0].childNodes[0].toxml()
		az2=  i.getElementsByTagName("type")[0].childNodes[0].toxml()
		#print az2
		#print az2[0:8]
		if  az2[0:8] != "Waypoint" :
			cache_point = i.getElementsByTagName("name")[0].childNodes[0].toxml()	
			
			cache_ifquiz = i.getElementsByTagName("urlname")[0].childNodes[0].toxml()	
			if cache_ifquiz[-4:]== "Quiz":
				cache_type="Quiz"
				
			else:
				cache_type = i.getElementsByTagName("groundspeak:type")[0].childNodes[0].toxml()
			
			
			cache_name = i.getElementsByTagName("name")[0].childNodes[0].toxml()	
			cache_id = i.getElementsByTagName("groundspeak:cache")[0].getAttribute("id")	
			cache_lat = i.getAttribute('lat')
			cache_lon = i.getAttribute('lon')
			cache_owner = plchr(i.getElementsByTagName("groundspeak:owner")[0].childNodes[0].toxml())
			
			cache_owner_id = i.getElementsByTagName("groundspeak:owner")[0].getAttribute("id")
			cache_difficu = i.getElementsByTagName("groundspeak:difficulty")[0].childNodes[0].toxml()
			cache_terrain = i.getElementsByTagName("groundspeak:terrain")[0].childNodes[0].toxml()
			cache_container = i.getElementsByTagName("groundspeak:container")[0].childNodes[0].toxml()
			cache_date = i.getElementsByTagName("time")[0].childNodes[0].toxml()	
			rok=dateutil.parser.parse(cache_date)
			cache_logs =i.getElementsByTagName("groundspeak:logs")[0]
			wyniki=ile_znalezien(i)
			cache_finds=wyniki[0]
			cache_dfinds=wyniki[1]
			cache_note=wyniki[2]
			cache_eable=wyniki[3]
			cache_disable=wyniki[4]
			cache_avaible=i.getElementsByTagName("groundspeak:cache")[0].getAttribute("available")
			cache_archived=i.getElementsByTagName("groundspeak:cache")[0].getAttribute("archived")
			
			cache_year=rok.strftime('%Y')
			cache_m=rok.strftime('%m')
			cache_d=rok.strftime('%d')
			cache_long_description = i.getElementsByTagName("groundspeak:long_description")[0].childNodes[0].toxml()
			#cache_long_description = re.sub('<[^<]+?>', '',cache_long_description)
			#print cache_long_description
			cache_starts =','.join(re.findall("Rekomendacje: (\d+)", cache_long_description))
			cache_ocena = ','.join(re.findall("Ocena skrzynki: (.*)", cache_long_description))
			endx= delie+cache_name+deli+cache_id+deli+cache_lat+deli+cache_lon+deli+cache_owner+deli+cache_owner_id+deli+zamiana2[str(cache_type)]+deli+zamiana1[str(cache_container)]+deli+cache_terrain+deli+cache_difficu+deli+cache_year+deli+cache_m+deli+cache_d+deli+str(cache_finds)+deli+str(cache_dfinds)+deli+str(cache_note)+deli+str(cache_starts)+deli+str(cache_ocena)+deli+zamiana3[str(cache_avaible)]+deli+zamiana3[str(cache_archived)]+deli+"\r\n"

			zapisz(endx,fname3)
			#a=a+1
			#gc.collect()		

os.chdir("./gpx")
zapisz_Naglowek("out"+str(proc)+".csvx")
for files in os.listdir("."):
	if files.endswith(".gpx"):
		print "\t"+files
		start1=time.time()
		try:
			del DOMTree
			del cNodes
	except: 
			print ""		 

		gc.collect 
		
		DOMTree= minidom.parse(files)
		cNodes = DOMTree.childNodes 
		zapisz_Naglowek(files+str(proc)+".csv")
		Deska_prasujaca(cNodes ,files+str(proc)+".csv")
		stop1 =time.time()
		print "	Czas:"+files+" "+str(stop1-start1)
		
		

fout=open("out"+str(proc)+".csvx","a")
for files in os.listdir("."):
	if files.endswith(".csv"):
		start3=time.time()
		f = open(files)
		f.next() # skip the header
		for line in f:
			 fout.write(line)
		f.close() # not really needed
		stop3=time.time()
		print "	Czas splitowania:  "+files+" "+str(stop3-start3)
fout.close()     
stop =time.time()
print "	\tCzas łączny :"+str(stop-start)


Advertisements

Leave a Reply

Please log in using one of these methods to post your comment:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s