Commit 86bf43f3 authored by Brad Covey's avatar Brad Covey

Initial commit in release repo

parents
*.pyc
*.pkl
*.ipynb
.ipynb_checkpoints/
test_data/
#!/usr/bin/python
from datetime import timedelta
import glob
import os.path
import pickle
import os, sys, csv
import time, datetime
from imagenex853DataParser import *
import db_config
debug = True
def process_directory(raw_file_dir, data_filename, header_filename):
try:
processedListFile = open(os.path.join(os.path.split(os.path.realpath(__file__))[0], "853toCSV.pkl"), "r")
processedFiles = pickle.load(processedListFile)
processedListFile.close()
except IOError:
processedFiles = {}
try:
outputDataFile = open(data_filename, "a")
outputHeaderFile = open(header_filename, "a")
except IOError:
outputDataFile = open(data_filename, "w")
outputHeaderFile = open(header_filename, "w")
# grab all the filenames in the from-glider directory for the current glider
# rawFiles = glob.glob(rawFileDir + gliderName + "/from-glider/*.853")
rawFiles = glob.glob(os.path.join(raw_file_dir, "*.853"))
rawFiles.sort()
if debug:
print rawFiles
for f in rawFiles:
creationTime = datetime.fromtimestamp(os.path.getctime(f))
# loop through all of the 853 filenames to see if they have been processed already
# if they haven't been processed and they were created since the start of the current mission process them
if debug:
print "Unprocessed filename: " + str(os.path.basename(f))
currFile = open(f, "rb")
header, data = parse853File(currFile, returnHeaderAsDict=False)
headerHeadings = []
if len(header) > 0:
for key in header[0]:
if key == 'timestamp':
headerHeadings.insert(0, key)
elif key == 'pingNumber' and len(headerHeadings) >= 1:
headerHeadings.insert(1, key)
else:
headerHeadings.append(key)
# Initalizing the csv writers
headerWriter = csv.DictWriter(outputHeaderFile, headerHeadings)
dataWriter = csv.writer(outputDataFile)
# write csv headers if we're making a new headers file
print "Rows of data in file: %s\n" % len(data)
if os.path.getsize(outputHeaderFile.name) == 0:
headerWriter.writeheader()
# Write the data and headers to their respective files
headerWriter.writerows(header)
dataWriter.writerows(data)
processedFiles[f] = 0
outputHeaderFile.flush()
outputDataFile.flush()
processedListFile = open(os.path.join(os.path.split(os.path.realpath(__file__))[0], "853toCSV.pkl"), "w")
pickle.dump(processedFiles, processedListFile)
processedListFile.close()
return processedFiles
def process_mission(mission_id):
cursor, conn = get_database_connection()
# Repeat the parsing process for each glider
for gliderName in ['otn200', 'otn201']:
asciiDir = outputDir + gliderName + "/"
# get latest mission date for each glider.
q = "SELECT * FROM missions, gliders WHERE gliders.name='" + gliderName + "' and missions.gliderID = gliders.id ORDER BY missions.startTime DESC Limit 0,1"
try:
cursor.execute(q)
result = cursor.fetchall()
except:
try:
cursor.execute(q)
result = cursor.fetchall()
except:
print 'query "' + q + '"not possible on this connection.'
exit(0)
for row in result:
print row
# Run the parser if there is a live mission
# if row['recovered'] == 'n':
if row['id'] == mission_id:
missionStartTime = row['startTime']
data_filename = outputDir + gliderName + "_sci_echosounder_data.csv"
header_filename = outputDir + gliderName + "_sci_echosounder_headers.csv"
processed_files = process_directory()
def main():
if len(sys.argv) < 3:
print "Incorrect number of arguments"
print "Usage:"
print "\tpython 853toCSV.py [path_to_raw] [path_to_output]"
print "\tnote:\tpath_to_output should be the path to the output\n\t\tfilename without an extension"
sys.exit()
raw_file_dir = sys.argv[1]
output_filename = sys.argv[2]
if '.csv' in output_filename:
output_filename = output_filename.split('.')[0]
# data_filename = os.path.join(output_filename, '_data.csv')
# header_filename = os.path.join(output_filename, '_header.csv')
data_filename = output_filename + '_data.csv'
header_filename = output_filename + '_header.csv'
process_directory(raw_file_dir, data_filename, header_filename)
return
if __name__ == '__main__':
main()
# Requirements
* Python 2.7
# Installation & Command Line Usage
1. Click "Download zip" on the GitLab page
2. Extract the archive somewhere, make note of the path to the files
3. Call the script by entering the following into the command line on your operating system(Called terminal on mac and linux, cmd on windows):
`python /path/to/files/imagenex853DataParser.py /path/to/853/file.853`
Note: if you have multiple versions of python installed python 2.7 can be called like this:
`python2 /path/to/files/imagenex853DataParser.py /path/to/853/file.853`
# Sample API usage:
## Using the List form of the header
```python
from imagenex853DataParser import parse853File
# Files must be opened with the 'b' flag in order for your code to run properly on a Windows computer
file = open("test_data/test1_256byte.853","rb")
header, data = parse853File(file, returnHeaderAsDict=False)
```
Here header is a list of dictionaries containing header data for each ping. Data is a list of lists containing echo sounder data for each ping.
Here is an example printing out the data and pingNumber for a particular ping:
```python
# The following line prints the entire header for ping 0:
print header[0]
# The following line prints just the pingNumber for ping 0:
print header[0]['pingNumber']
# The following line prints the data for ping 0:
print data[0]
```
## Using the Dict form of the header
```python
from imagenex853DataParser import parse853File
# Files must be opened with the 'b' flag in order for your code to run properly on a Windows computer
file = open("test_data/test1_256byte.853","rb")
header, data = parse853File(file, returnHeaderAsDict=True)
# returnHeaderAsDict is true by default, so it can also be called like this:
header, data = parse853File(file)
```
Here header is a dictionary of lists. Each key in the dictionary is a header variable. Each list contains all of the values from each header variable.
Here are some examples inspecting the data:
```python
# The following prints all of the ping numbers:
print header['pingNumber']
# The following line prints the diveType for the third ping:
print header['diveType'][3]
```
\ No newline at end of file
user = None
password = None
hostname = None
database = None
port = 3306
from datetime import datetime as dt
from datetime import timedelta
import sys
import MySQLdb
from MySQLdb.times import DateTime_or_None
import db_config
def date_or_orig(obj):
try:
return date(*map(int, obj.split('-', 2)))
except ValueError:
return obj
def datetime_or_orig(obj): # cribbed from MySQLdb2.0 revisions, updated to handle fractional seconds.
if ' ' in obj:
sep = ' '
elif 'T' in obj:
sep = 'T'
else:
return date_or_orig(obj)
try:
micro = 0
ymd, hms = obj.split(sep, 1)
if '.' in hms:
hms, micro = hms.split('.', 1)
micro = float('0.' + micro)
return dt(*[int(x) for x in ymd.split('-') + hms.split(':')]) + timedelta(seconds=micro) # fractional seconds preserved for all precisions
except ValueError:
print 'error parsing datetime, returned original value instead'
return obj
def timestamp_or_orig(s):
"""Convert a MySQL TIMESTAMP to a Timestamp object."""
# MySQL>4.1 returns TIMESTAMP in the same format as DATETIME
# Check for microseconds, handle them properly
if s[19] == '.':
try:
return dt.strptime(s, "%Y-%m-%d %H:%M:%S.%f")
except (SystemExit, KeyboardInterrupt):
raise
except:
return None
if s[4] == '-':
return DateTime_or_None(s)
s = s + "0" * (14 - len(s)) # padding
parts = map(int, filter(None, (s[:4],s[4:6],s[6:8],
s[8:10],s[10:12],s[12:14])))
try:
return Timestamp(*parts)
except (SystemExit, KeyboardInterrupt):
raise
except:
return None
def get_database_connection(host=db_config.hostname, user=db_config.user,
db=db_config.database, passwd=db_config.password,
port=db_config.port, cursorclass='MySQLdb.cursors.DictCursor'):
try:
conv_dict = MySQLdb.converters.conversions
conv_dict[12] = datetime_or_orig # Override DateTime_or_None function. Works.
conv_dict[7] = timestamp_or_orig
# try to get a connection
conn = MySQLdb.connect(host=host, user=user, db=db, passwd=passwd, port=port, cursorclass=cursorclass, conv=conv_dict)
print host, user, db, passwd
cursor = MySQLdb.cursors.DictCursor(conn)
print "Connected to DB."
except:
exceptionType, exceptionValue, exceptionTraceback = sys.exc_info()
sys.exit("DB connection failed!\n ->%s" % (exceptionValue))
return cursor, conn
#!/usr/bin/python
import os, sys
import datetime
import numpy as np
from time import strptime, strftime
from calendar import timegm
import db_utilities
os.environ['TZ'] = 'UTC'
def upload_segment(data_segment, glider_id):
# Sort the segment by time
for key in data_segment:
data_segment[key] = np.array(data_segment[key])
order = np.argsort(data_segment['timestamp'])
for key in data_segment:
data_segment[key] = data_segment[key][order]
# find the start and end of the segment
segment_start = datetime.datetime.fromtimestamp(int(data_segment['timestamp'][0]) - 10)
segment_end = datetime.datetime.fromtimestamp(int(data_segment['timestamp'][-1]) + 10)
print "Start:\t" + str(segment_start)
print "End:\t" + str(segment_end)
cursor, conn = db_utilities.get_database_connection()
query = "SELECT unix_timestamp(ticks.timestamp) as timestamp, i_lat.i_lat, i_lon.i_lon, i_depth.i_depth FROM ticks JOIN i_lat ON i_lat.tick_id = ticks.id JOIN i_lon ON i_lon.tick_id = ticks.id JOIN i_depth ON i_depth.tick_id = ticks.id"
query += " WHERE ticks.gliderid = " + glider_id + " AND timestamp BETWEEN \'" + str(segment_start) + "\' AND \'" + str(segment_end) + "\' ORDER BY ticks.timestamp ASC;"
cursor.execute(query)
d = {'timestamp': [], 'i_lat': [], 'i_lon': [], 'i_depth': []}
for row in cursor:
d['timestamp'].append(row['timestamp'])
d['i_lat'].append(row['i_lat'])
d['i_lon'].append(row['i_lon'])
d['i_depth'].append(row['i_depth'])
try:
data_segment['i_depth'] = np.interp(data_segment['timestamp'], d['timestamp'], d['i_depth'])
data_segment['i_lat'] = np.interp(data_segment['timestamp'], d['timestamp'], d['i_lat'])
data_segment['i_lon'] = np.interp(data_segment['timestamp'], d['timestamp'], d['i_lon'])
for n, header in enumerate(data_segment['header']):
# Insert the header row
query = "INSERT INTO sci_echosounder_header (i_depth,i_lat,i_lon,timestamp,ping_number,dive_type,signal_frequency,message_type,filename,mode,ping_rate,signal_gain,signal_range,switches_accepted,character_overrun)"
query += " VALUES (%s,%s,%s," % (data_segment['i_depth'][n], data_segment['i_lat'][n], data_segment['i_lon'][n])
query += "\'%s\',%s,%s,%s,\'%s\',\'%s\'," % (header['timestamp'], header['pingNumber'], header['diveType'], header['signalFrequency'], header['messageType'], header['filename'])
query += "%s,%s,%s,%s,%s,%s" % (header['mode'], header['pingRate'], header['signalGain'], header['signalRange'], header['switchesAccepted'], header['characterOverrun'])
query += ");"
cursor.execute(query)
conn.commit()
# Insert the data rows
query = "INSERT INTO sci_echosounder_bin (header_id,bin_id,data) VALUES "
header_id = cursor.lastrowid
for bin_id, col in enumerate(data_segment['data'][n]):
query += "(%s,%s,%s)," % (header_id, bin_id, col)
query = query[0:-1] + ";"
cursor.execute(query)
except ValueError:
print "No ticks/i_depth available for segment: " + str(segment_start) + "-" + str(segment_end) + "\nDiscarding data for the segment"
def main():
max_frame_size = 500
if len(sys.argv) < 3:
print "invalid number of arguments"
print "Usage:"
print "\techosounder_ascii_to_db.py gliderID header_file_name data_file_name"
print "\twhere header_file and data_file are CSV files output by imagenex853DataParser.py"
sys.exit(0)
glider_id = sys.argv[1]
header_file_name = sys.argv[2]
data_file_name = sys.argv[3]
cursor, conn = db_utilities.get_database_connection()
header_file = open(header_file_name, 'r')
data_file = open(data_file_name, 'r')
header_keys = [key.strip() for key in header_file.readline().split(',')]
data_segment = {'time': [], 'timestamp': [], 'header': [], 'data': []}
count = 0
for header in header_file:
data = data_file.readline()
if(len(data_segment['header']) < max_frame_size):
data_segment['timestamp'].append(timegm(strptime(header.split(',')[0].strip(), '%Y-%m-%d %H:%M:%S')))
data_segment['time'].append(header.split(',')[0].strip())
data_segment['header'].append({})
for n, col in enumerate(header.split(',')):
col = col.strip()
data_segment['header'][-1][header_keys[n]] = col
data_segment['data'].append(data.split(','))
else:
print "Uploading up to line " + str(count)
upload_segment(data_segment, glider_id)
print "Segment uploaded successfully!\n"
data_segment = {'time': [], 'timestamp': [], 'header': [], 'data': []}
count += 1
upload_segment(data_segment, glider_id)
# Going in the header, header + i_lat + i_lon + i_depth
print header_keys
# print data_segment['header'][0]
# for key, val in zip(header_keys, data_segment['header'][0]):
# print str(key)+":\t"+str(val)
# print "i_depth:\t"+str(data_segment['i_depth'][0])
# print "i_lat:\t"+str(data_segment['i_lat'][0])
# print "i_lon:\t"+str(data_segment['i_lon'][0])
conn.close()
if __name__ == '__main__':
main()
This diff is collapsed.
import csv
from imagenex853DataParser import parse853File
import matplotlib.pyplot as plt
import numpy as np
import pylab
import glob
import dinkum
import os
import datetime
import time
def pk2pk2db(data):
#This equation needs a calibration factor.
#Input variable must be a numpy array
db = 20 * np.log10(data)
return db
def returnHeaderAsDict(header_list):
header_dict = {}
# initializing a list for each key under the assumption that all headers have the same keys(they should)
for key in header_list[0]:
header_dict[key] = []
# appending the values for each key onto the list
for header in header_list:
for key in header:
header_dict[key].append(header[key])
return header_dict
def data2variable(header, data, ping_count=None):
#No need to call this function directly. Use in parse2variable or csv2variable.
data=np.array(data,'float64')
if header['messageType'][0] == '256 byte':
#translate the range code
if header['signalRange'][0] == 0:
rng = np.array(25,'float64')
elif header['signalRange'][0] == 1:
rng = np.array(50,'float64')
elif header['signalRange'][0] == 2:
rng = np.array(100,'float64')
#translate the gain code
if header['signalGain'][0] == 1:
gain=np.array(20,'float64')
elif header['signalGain'][0] == 2:
gain=np.array(40,'float64')
# Set the spacing of the data (dependent on the low res or high res data)
if data.shape[1] == 200:
spacing = rng/data.shape[1]
y=np.arange(spacing,rng+spacing,spacing)
else:
spacing = rng/data.shape[1]
y=np.arange(0,rng,spacing)
elif header['messageType'][0] == '18 byte':
y=np.arange(12.5,112.5,12.5)
#Grab the ping counts
x=np.array(header['pingNumber'],'float32')
#Grab the timestamps
unixTimeStamp = np.array(header['timestamp'],'float64')
#Convert the peak to peak voltage into decibel
db=pk2pk2db(data)
#transpose the data so it graphs better
db=db.transpose()
return x, y, db, unixTimeStamp
# parse an 853 file and return variables to be plotted
def parse2variable(filename):
#This function loads the data and puts it into a numpy array that makes it easy to plot
file = open(filename,"rb")
header, data = parse853File(file)
return data2variable(header, data)
def csv2variable(header_file, data_file, ping_file=None):
with open(header_file) as header_file:
header_reader = csv.DictReader(header_file)
header = []
for i in header_reader:
header.append(i)
header_dict = returnHeaderAsDict(header)
with open(data_file) as data_file:
data_reader = csv.reader(data_file)
data = []
for i in data_reader:
data.append(i)
# If a ping_file is included with the function call read it into a variable and pass it to data2variable
if ping_file:
with open(ping_file) as ping_file:
ping_reader = csv.DictReader(ping_file)
pings = []
for i in ping_reader:
pings.append(i)
# return data2variable(header_dict, data, returnHeaderAsDict(pings))
return header_dict, data, returnHeaderAsDict(pings)
return header_dict, data
# Call data2variable without pings otherwise
# return data2variable(header_dict, data)
# Accepts the header and ping returned by csv2variable from imagenex_data_viewer
# Returns an array containing depths and another containing timestamps for each ping
def interpolate_headers(header, ping):
header_timestamp = np.array(header['timestamp'],'float64')
header_modified_timestamp = np.array(header['modifiedTimestamp'],'float64')
header_pingcount = np.array(header['pingNumber'],'float64')
header_datetime = []
for i in header_timestamp:
header_datetime.append(datetime.datetime.utcfromtimestamp(i))
header_modified_datetime = []
for i in header_modified_timestamp:
header_modified_datetime.append(datetime.datetime.utcfromtimestamp(i))
header_modified_datetime = np.array(header_modified_datetime)
ping_timestamp = np.array(ping['unixtime'],'float64')
ping_count = np.array(ping['sci_echosndr853_ping_count'],'float64')
ping_datetime = []
for i in ping_timestamp:
ping_datetime.append(datetime.datetime.utcfromtimestamp(i))
ping_datetime = np.array(ping_datetime)
unique_header_timestamps = np.unique(header_modified_timestamp)
header_groups = []
for timestamp in unique_header_timestamps:
header_groups.append(np.where(header_modified_timestamp == timestamp)[0])
header_groups = np.array(header_groups)
adjusted_timestamp = np.array(list(header_modified_timestamp))
for group in header_groups:
group = np.array(group)
max_timestamp = np.amax(header_modified_timestamp[group])
current_increment = 0
for i in reversed(group):
adjusted_timestamp[i] = adjusted_timestamp[i] - current_increment
current_increment += 4
if ( max_timestamp != np.amax(adjusted_timestamp[group])):
print "we have a problem"
interpolated_depths = np.interp(adjusted_timestamp, ping_timestamp, ping['depth'])
return interpolated_depths, adjusted_timestamp
if __name__ == "__main__":
### figure 1
# Files must be opened with the 'b' flag in order for your code to run properly on a Windows computer
dir_data = '/home/brad/Documents/imaginex-853-binary-data-parser/full_res/oct_7'
flist_live853 = glob.glob(os.path.join(dir_data,'*.csv'))
flist_sbd = glob.glob(os.path.join(dir_data,'*.sbd'))
flist_tbd = glob.glob(os.path.join(dir_data,'*.tbd'))
header_file = flist_live853[1]
data_file = flist_live853[0]
x, y, db, unixTimeStamp = csv2variable(flist_live853[1],flist_live853[0])
x, y, db = parse2variable(dir_data + file_name)
x2, y2, db2 = parse2variable(dir_data + file_name2)
plt.figure(num=None, figsize=(14, 6), dpi=80, facecolor='w', edgecolor='k')
plt.subplot(1,2,1)
plt.contourf(x,y,db)
plt.gca().invert_yaxis()
plt.colorbar()
plt.ylabel('depth (m)')
plt.xlabel('ping count')
plt.title(file_name)
plt.subplot(1,2,2)
plt.contourf(x2,y2,db2)
plt.gca().invert_yaxis()
plt.colorbar()
plt.ylabel('depth (m)')
plt.xlabel('ping count')
plt.title(file_name2)
plt.draw()
pylab.savefig(dir_data + 'echosndr_pic.png')
plt.show()
### figure 2
# Make a figure of 9 echosounder subplots to check out the profiles
dir_data = os.path.normpath('/Users/adam/Documents/temp/liveecho')
fileext = '853'
# os.chdir(dir_data)
flist = glob.glob(os.path.join(dir_data,'*.{0}'.format(fileext)))
x=np.arange(0,27,3)
cnt=1
for ii in x:
print ii
plt.subplot(3,3,cnt)
x,y,db = data2Variable(os.path.dirname(flist[int(x[ii])]),os.path.basename(flist[int(x[ii])]),1)
plt.contourf(x,y,db)
plt.gca().invert_yaxis()
if cnt==9:
plt.colorbar()
plt.ylabel('depth from echodounder (m)')
plt.xlabel('ping count')
plt.title(os.path.basename('filename: {0}, time: {1} UTC'.format(flist[int(x[ii])],datetime.datetime.strftime(datetime.datetime.fromtimestamp(convert853Name2UnixTime(flist[int(x[ii])])),"%Y-%m-%d %H:%M"))))
cnt=cnt+1
plt.show()
### figure 3
#echosounder data
file_name = 'nh211614.853'
file_name2 = 'nh051614_100mRange.853'
x, y, db = data2Variable(dir_data,file_name,1)
x2, y2, db2 = data2Variable(dir_data,file_name2,1)
#Science data
sci_fi_name = 'otn201-2014-232-3-0.ebd'
ebd, ebdheader = dinkum.dinkum2dict(os.path.join(dir_data,sci_fi_name))
#make the science figure
plt.plot(ebd['sci_m_present_time'],np.array(ebd['sci_water_pressure'],'float64')*10,'.')
plt.gca().invert_yaxis()
plt.show()
#Set up variables
pcnt = np.array(ebd['sci_echosndr853_ping_count'],'float64') #ping count
t = np.array(ebd['sci_m_present_time'],'float64') #time
depth = np.array(ebd['sci_water_pressure'],'float64')*10
#denan pings and set up time
t_pcnt = t[~np.isnan(pcnt)] #time of pingcounts
pcnt = pcnt[~np.isnan(pcnt)] #ping count with no nan's
#denan depth and set up time
t_depth = t[~np.isnan(depth)]
depth = depth[~np.isnan(depth)]
#interpolate depth so that every ping count has a depth
depth_pcnt= np.interp(t_pcnt,t_depth,depth)
print np.shape(t_depth)
print np.shape(depth)
print np.shape(t_pcnt)
print np.shape(depth_pcnt)
idx=np.array([],'float64')
for i in pcnt:
if np.size(np.where(pcnt[i] == x) > 1):
idx[i]=np.where(pcnt[i] == x)
else:
idx[i]=np.nan
plt.plot(t_pcnt,depth_pcnt,'.')
plt.show()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment