Commit 2c3a4b14 authored by Brad Covey's avatar Brad Covey
Browse files

Adding the rest of the lessons to the repo

parent e1543281
%% Cell type:code id: tags:
``` python
# Depth Interpolation
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
# read data
instrument1 = pd.read_csv('data/_SATMPR0078.csv')
instrument2 = pd.read_csv('data/_SATBF20129.csv')
# print instrument1
# print instrument2
# decide on reference variables
# This is real data that was gathered
depth_actual = instrument1['i_depth']
data_actual = instrument1['COND']
plt.scatter(data_actual, depth_actual, color='red')
plt.xlabel('i_depth')
plt.ylabel('conductivity')
plt.show()
# # decide on desired x axis
# # the actual data will be interpolated so that it is the same frequency as this data.
depth_desired = instrument2['i_depth']
print len(instrument2['i_depth'])
print len(instrument2['THERM'])
data_desired = np.interp(depth_desired, depth_actual, data_actual)
plt.scatter(instrument2['THERM'], instrument2['i_depth'], color='red')
plt.scatter(data_desired, depth_desired, color='blue')
plt.show()
```
%%%% Output: stream
1021
1021
%% Cell type:code id: tags:
``` python
print len(data_desired)
print len(instrument2['THERM'])
print (data_desired + instrument2['THERM'])/2
```
%%%% Output: stream
1021
1021
0 290.388124
1 290.396500
2 290.396500
3 290.396500
4 290.396500
5 290.396500
6 290.396500
7 290.396500
8 290.396500
9 290.396500
10 290.396500
11 290.396500
12 290.396500
13 290.396500
14 290.389999
15 290.396000
16 290.409000
17 290.398001
18 290.398001
19 290.383499
20 290.364500
21 290.365499
22 290.362000
23 290.360500
24 290.362499
25 290.364500
26 290.367500
27 290.361396
28 290.367500
29 290.452999
...
991 287.289000
992 287.290000
993 287.289500
994 287.290500
995 287.289000
996 287.290000
997 286.790000
998 287.290500
999 286.789500
1000 287.289000
1001 286.789500
1002 287.289500
1003 287.288500
1004 287.289000
1005 287.287500
1006 287.287500
1007 286.788500
1008 287.289500
1009 287.289500
1010 286.790500
1011 286.790000
1012 287.290000
1013 287.290500
1014 287.290000
1015 286.789500
1016 287.288500
1017 287.290000
1018 287.289500
1019 287.290000
1020 286.788500
Name: THERM, dtype: float64
%% Cell type:code id: tags:
``` python
# Time Interpolation
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from datetime import datetime
# read data
instrument1 = pd.read_csv('data/_SATMPR0078.csv')
instrument2 = pd.read_csv('data/_SATBF20129.csv')
# print instrument1
# print instrument2
# decide on reference variables
# This is real data that was gathered
time_actual = pd.to_datetime(instrument1['timestamp']).view('i8')
data_actual = instrument1['COND']
print time_actual
plt.scatter(time_actual, data_actual, color='red')
plt.xlabel('time')
plt.ylabel('conductivity')
plt.show()
# # decide on desired x axis
# # the actual data will be interpolated so that it is the same frequency as this data.
time_desired = pd.to_datetime(instrument2['timestamp']).view('i8')
data_desired = np.interp(time_desired, time_actual, data_actual)
plt.scatter(time_desired, instrument2['THERM'], color='red')
plt.scatter(time_desired, data_desired, color='blue')
plt.show()
```
%%%% Output: stream
0 1444219928575000000
1 1444219928649000000
2 1444219928749000000
3 1444219928859000000
4 1444219928959000000
5 1444219929049000000
6 1444219929149000000
7 1444219929259000000
8 1444219929339000000
9 1444219929449000000
10 1444219929549000000
11 1444219929665000000
12 1444219929758000000
13 1444219929961000000
14 1444219930024000000
15 1444219930117000000
16 1444219930227000000
17 1444219930305000000
18 1444219930414000000
19 1444219930679000000
20 1444219930679000000
21 1444219930835000000
22 1444219930897000000
23 1444219930991000000
24 1444219931116000000
25 1444219931209000000
26 1444219931365000000
27 1444219931459000000
28 1444219931553000000
29 1444219931646000000
...
1145 1444220047180000000
1146 1444220047242000000
1147 1444220047336000000
1148 1444220047445000000
1149 1444220047554000000
1150 1444220047632000000
1151 1444220047742000000
1152 1444220047851000000
1153 1444220047944000000
1154 1444220048038000000
1155 1444220048163000000
1156 1444220048256000000
1157 1444220048334000000
1158 1444220048444000000
1159 1444220048553000000
1160 1444220048646000000
1161 1444220048756000000
1162 1444220048865000000
1163 1444220049005000000
1164 1444220049099000000
1165 1444220049208000000
1166 1444220049270000000
1167 1444220049411000000
1168 1444220049551000000
1169 1444220049645000000
1170 1444220049738000000
1171 1444220049832000000
1172 1444220049926000000
1173 1444220050050000000
1174 1444220050144000000
Name: timestamp, dtype: int64
%% Cell type:code id: tags:
``` python
pd.to_datetime(instrument1['timestamp']).astype(np.int64)
```
%%%% Output: execute_result
0 1444219928575000000
1 1444219928649000000
2 1444219928749000000
3 1444219928859000000
4 1444219928959000000
5 1444219929049000000
6 1444219929149000000
7 1444219929259000000
8 1444219929339000000
9 1444219929449000000
10 1444219929549000000
11 1444219929665000000
12 1444219929758000000
13 1444219929961000000
14 1444219930024000000
15 1444219930117000000
16 1444219930227000000
17 1444219930305000000
18 1444219930414000000
19 1444219930679000000
20 1444219930679000000
21 1444219930835000000
22 1444219930897000000
23 1444219930991000000
24 1444219931116000000
25 1444219931209000000
26 1444219931365000000
27 1444219931459000000
28 1444219931553000000
29 1444219931646000000
...
1145 1444220047180000000
1146 1444220047242000000
1147 1444220047336000000
1148 1444220047445000000
1149 1444220047554000000
1150 1444220047632000000
1151 1444220047742000000
1152 1444220047851000000
1153 1444220047944000000
1154 1444220048038000000
1155 1444220048163000000
1156 1444220048256000000
1157 1444220048334000000
1158 1444220048444000000
1159 1444220048553000000
1160 1444220048646000000
1161 1444220048756000000
1162 1444220048865000000
1163 1444220049005000000
1164 1444220049099000000
1165 1444220049208000000
1166 1444220049270000000
1167 1444220049411000000
1168 1444220049551000000
1169 1444220049645000000
1170 1444220049738000000
1171 1444220049832000000
1172 1444220049926000000
1173 1444220050050000000
1174 1444220050144000000
Name: timestamp, dtype: int64
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
This diff is collapsed.
%% Cell type:code id: tags:
``` python
import pandas as pd
from pandas.tseries.resample import TimeGrouper
import numpy as np
import matplotlib.pyplot as plt
```
%% Cell type:code id: tags:
``` python
# Load CTD data
ctd = pd.read_csv('http://belafonte.ocean.dal.ca:8080/erddap/tabledap/bedford_basin_level_2_bop_unbinned_view_ctd.csv?&time>=2015-12-03T00:00:00Z&time<2015-12-10T00:00:00Z&orderBy(%22time%22)', skiprows=[1])
ctd['time'] = pd.to_datetime(ctd['time'])
print ctd.columns.tolist()
```
%%%% Output: stream
['time', 'depth', 'latitude', 'longitude', 'sea_water_temperature', 'sea_water_temperature_qc', 'sea_water_conservative_temperature', 'sea_water_conservative_temperature_qc', 'sea_water_density', 'sea_water_density_qc', 'sea_water_practical_salinity', 'sea_water_practical_salinity_qc', 'sea_water_absolute_salinity', 'sea_water_absolute_salinity_qc', 'sea_water_salinity', 'sea_water_salinity_qc', 'sea_water_electrical_conductivity', 'sea_water_electrical_conductivity_qc', 'sea_water_ph_reported_on_total_scale', 'sea_water_ph_reported_on_total_scale_qc', 'mole_concentration_of_dissolved_molecular_oxygen_in_sea_water', 'mole_concentration_of_dissolved_molecular_oxygen_in_sea_water_q']
%% Cell type:code id: tags:
``` python
plt.plot(ctd['sea_water_temperature'], ctd['depth'])
plt.gca().invert_yaxis()
plt.show()
```
%% Cell type:code id: tags:
``` python
# Load fluorometer data
fl = pd.read_csv('http://belafonte.ocean.dal.ca:8080/erddap/tabledap/bedford_basin_level_2_bop_unbinned_view_fluorometer.csv?&time>=2015-12-03T00:00:00Z&time<2015-12-10T00:00:00Z&orderBy(%22time%22)', skiprows=[1])
fl['time'] = pd.to_datetime(fl['time'])
print fl.columns.tolist()
```
%%%% Output: stream
['time', 'depth', 'latitude', 'longitude', 'fluorescence_chlorophyll_red_to_far_red', 'fluorescence_chlorophyll_red_to_far_red_qc', 'fluorescence_chlorophyll_blue_to_red', 'fluorescence_chlorophyll_blue_to_red_qc', 'fluorescence_cdom_uv_to_blue', 'fluorescence_cdom_uv_to_blue_qc']
%% Cell type:code id: tags:
``` python
plt.plot(fl['fluorescence_cdom_uv_to_blue'], fl['depth'])
plt.gca().invert_yaxis()
plt.show()
```
%% Cell type:code id: tags:
``` python
# Time binning the easy way with pandas TimeGrouper
# To use the timegrouper you need to dataframe's index column to the time column
# by default it's a generated list of sequentially increasing IDs
ctd = ctd.set_index('time')
fl = fl.set_index('time')
print ctd.index
```
%%%% Output: stream
DatetimeIndex(['2015-12-09 15:04:31', '2015-12-09 15:04:31',
'2015-12-09 15:04:31', '2015-12-09 15:04:31',
'2015-12-09 15:04:31', '2015-12-09 15:04:32',
'2015-12-09 15:04:32', '2015-12-09 15:04:32',
'2015-12-09 15:04:32', '2015-12-09 15:04:33',
...
'2015-12-09 15:07:10', '2015-12-09 15:07:10',
'2015-12-09 15:07:11', '2015-12-09 15:07:11',
'2015-12-09 15:07:11', '2015-12-09 15:07:11',
'2015-12-09 15:07:11', '2015-12-09 15:07:12',
'2015-12-09 15:07:12', '2015-12-09 15:07:12'],
dtype='datetime64[ns]', name=u'time', length=811, freq=None)
%% Cell type:code id: tags:
``` python
# Group into 10 second bins
grouped_ctd = ctd.groupby(TimeGrouper('10s'))
grouped_fl = fl.groupby(TimeGrouper('10s'))
print grouped_ctd.groups
```
%%%% Output: stream
{Timestamp('2015-12-09 15:04:40', offset='10S'): 95, Timestamp('2015-12-09 15:05:40', offset='10S'): 396, Timestamp('2015-12-09 15:06:40', offset='10S'): 697, Timestamp('2015-12-09 15:05:50', offset='10S'): 446, Timestamp('2015-12-09 15:06:20', offset='10S'): 597, Timestamp('2015-12-09 15:04:50', offset='10S'): 145, Timestamp('2015-12-09 15:05:00', offset='10S'): 196, Timestamp('2015-12-09 15:05:10', offset='10S'): 246, Timestamp('2015-12-09 15:07:00', offset='10S'): 798, Timestamp('2015-12-09 15:06:30', offset='10S'): 647, Timestamp('2015-12-09 15:07:10', offset='10S'): 811, Timestamp('2015-12-09 15:04:30', offset='10S'): 45, Timestamp('2015-12-09 15:06:10', offset='10S'): 547, Timestamp('2015-12-09 15:05:20', offset='10S'): 296, Timestamp('2015-12-09 15:05:30', offset='10S'): 346, Timestamp('2015-12-09 15:06:00', offset='10S'): 497, Timestamp('2015-12-09 15:06:50', offset='10S'): 748}
%% Cell type:code id: tags:
``` python
# Compute the median of each bin
binned_ctd = grouped_ctd.median()
binned_fl = grouped_fl.median()
plt.plot(binned_ctd['sea_water_temperature'], binned_ctd['depth'])
plt.gca().invert_yaxis()
plt.show()
```
%% Cell type:code id: tags:
``` python
plt.plot(binned_fl['fluorescence_cdom_uv_to_blue'], binned_fl['depth'])
plt.gca().invert_yaxis()
plt.show()
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
import pandas as pd
from datetime import datetime
def group_into_interval(path_to_file, time_interval=30, sheetname='Filter'):
# Use Pandas to read the spreadsheet
data = pd.read_excel(path_to_file, sheetname=sheetname)
new_data = []
for i, row in data.iterrows():
# Step through each row in the file
# Merge the Date and Time columns into a datetime column and delete them
row['datetime'] = str(row['Date']) + ' ' + str(row['Time'])
del row['Date']
del row['Time']
new_data.append(row)
# Turn the new_data created in the last step into a dataframe and assign it to data
data = pd.DataFrame(new_data)
# Convert the datetime column(made up of strings right now) into datetime objects
data['datetime'] = pd.to_datetime(data['datetime'], format="%d.%m.%y %H:%M:%S")
# Make the datetime column the index so it can be used for grouping
data = data.set_index('datetime')
#compute a time range starting at the start of the data and going every N minutes
time_range = pd.date_range(data.index[0], data.index[-1], freq='%sMin' % time_interval)
print time_range
groups = []
for i, r in enumerate(time_range):
try:
averaged_row = data[(data.index > r) & (data.index <= time_range[i+1])].mean().to_dict()
minned_row = data[(data.index > r) & (data.index <= time_range[i+1])].min().to_dict()
row = {
'HR_mean': averaged_row['HR'],
'QI_mean': averaged_row['QI'],
'HR_min': minned_row['HR'],
'QI_min': minned_row['QI']
}
# row['datetime'] = r
row['datetime'] = time_range[i+1]
groups.append(row)
except IndexError:
pass
grouped_data = pd.DataFrame(groups)
return grouped_data
```
%% Cell type:code id: tags:
``` python
# An example calling the function and writing the results to a CSV file:
filepath = '17_2066_0131.xlsx'
# filepath = '28_2021_0127.xlsx'
df = group_into_interval(filepath, time_interval=60)
df.to_csv(filepath.replace(',','_') + '_60.csv', index=False)
df = group_into_interval(filepath, time_interval=30)
df.to_csv(filepath.replace(',','_') + '_30.csv', index=False)
```
%%%% Output: stream
DatetimeIndex(['2015-08-20 11:33:00', '2015-08-20 12:33:00',
'2015-08-20 13:33:00', '2015-08-20 14:33:00',
'2015-08-20 15:33:00', '2015-08-20 16:33:00',
'2015-08-20 17:33:00', '2015-08-20 18:33:00',
'2015-08-20 19:33:00', '2015-08-20 20:33:00',
'2015-08-20 21:33:00', '2015-08-20 22:33:00',
'2015-08-20 23:33:00', '2015-08-21 00:33:00',
'2015-08-21 01:33:00', '2015-08-21 02:33:00',
'2015-08-21 03:33:00', '2015-08-21 04:33:00',
'2015-08-21 05:33:00', '2015-08-21 06:33:00',
'2015-08-21 07:33:00', '2015-08-21 08:33:00',
'2015-08-21 09:33:00', '2015-08-21 10:33:00',
'2015-08-21 11:33:00', '2015-08-21 12:33:00',
'2015-08-21 13:33:00', '2015-08-21 14:33:00',
'2015-08-21 15:33:00', '2015-08-21 16:33:00',
'2015-08-21 17:33:00', '2015-08-21 18:33:00',
'2015-08-21 19:33:00', '2015-08-21 20:33:00',
'2015-08-21 21:33:00', '2015-08-21 22:33:00',
'2015-08-21 23:33:00', '2015-08-22 00:33:00',
'2015-08-22 01:33:00', '2015-08-22 02:33:00',
'2015-08-22 03:33:00', '2015-08-22 04:33:00',
'2015-08-22 05:33:00', '2015-08-22 06:33:00',
'2015-08-22 07:33:00', '2015-08-22 08:33:00',
'2015-08-22 09:33:00', '2015-08-22 10:33:00',
'2015-08-22 11:33:00', '2015-08-22 12:33:00',
'2015-08-22 13:33:00'],
dtype='datetime64[ns]', freq='60T')
DatetimeIndex(['2015-08-20 11:33:00', '2015-08-20 12:03:00',
'2015-08-20 12:33:00', '2015-08-20 13:03:00',
'2015-08-20 13:33:00', '2015-08-20 14:03:00',
'2015-08-20 14:33:00', '2015-08-20 15:03:00',
'2015-08-20 15:33:00', '2015-08-20 16:03:00',
...
'2015-08-22 09:33:00', '2015-08-22 10:03:00',
'2015-08-22 10:33:00', '2015-08-22 11:03:00',
'2015-08-22 11:33:00', '2015-08-22 12:03:00',
'2015-08-22 12:33:00', '2015-08-22 13:03:00',
'2015-08-22 13:33:00', '2015-08-22 14:03:00'],
dtype='datetime64[ns]', length=102, freq='30T')
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
%% Cell type:code id: tags:
``` python
```
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment