# Web scraping imports
import sys
!{sys.executable} -m pip install html5lib
import requests
from bs4 import BeautifulSoup

# Dataframe and analysis imports
import numpy as np
import pandas
import re
import matplotlib.pyplot as plt
from scipy.stats import spearmanr
import seaborn as sb

# Mapping coordinate data
!pip install folium
import folium
from folium.plugins import MarkerCluster
from folium.plugins import FastMarkerCluster
from folium.plugins import HeatMap
from folium.map import *

# Linear and Polynomial Regression
from sklearn import linear_model
from sklearn.preprocessing import PolynomialFeatures
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.linear_model import LinearRegression

Requirement already satisfied: html5lib in /opt/conda/lib/python3.8/site-packages (1.1)
Requirement already satisfied: webencodings in /opt/conda/lib/python3.8/site-packages (from html5lib) (0.5.1)
Requirement already satisfied: six>=1.9 in /opt/conda/lib/python3.8/site-packages (from html5lib) (1.15.0)
Requirement already satisfied: folium in /opt/conda/lib/python3.8/site-packages (0.11.0)
Requirement already satisfied: numpy in /opt/conda/lib/python3.8/site-packages (from folium) (1.19.2)
Requirement already satisfied: requests in /opt/conda/lib/python3.8/site-packages (from folium) (2.24.0)
Requirement already satisfied: jinja2>=2.9 in /opt/conda/lib/python3.8/site-packages (from folium) (2.11.2)
Requirement already satisfied: branca>=0.3.0 in /opt/conda/lib/python3.8/site-packages (from folium) (0.4.1)
Requirement already satisfied: certifi>=2017.4.17 in /opt/conda/lib/python3.8/site-packages (from requests->folium) (2020.6.20)
Requirement already satisfied: chardet<4,>=3.0.2 in /opt/conda/lib/python3.8/site-packages (from requests->folium) (3.0.4)
Requirement already satisfied: idna<3,>=2.5 in /opt/conda/lib/python3.8/site-packages (from requests->folium) (2.10)
Requirement already satisfied: urllib3!=1.25.0,!=1.25.1,<1.26,>=1.21.1 in /opt/conda/lib/python3.8/site-packages (from requests->folium) (1.25.10)
Requirement already satisfied: MarkupSafe>=0.23 in /opt/conda/lib/python3.8/site-packages (from jinja2>=2.9->folium) (1.1.1)


# Send an html request and parse using beautifulSoup
URL ="https://www.ncei.noaa.gov/pub/data/paleo/coral/west_pacific/great_barrier/wei2009_noaa.txt"
headers = {"user-agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X x.y; rv:42.0) Gecko/20100101 Firefox/42.0"}
r = requests.get(URL, headers = headers)
html = r.content
# Run beautiful soup on html
root = BeautifulSoup(html, 'html.parser')

# Extract the data
# Since there is no table in this html, use a regexp to find character range where data begins
rootstr = str(root)
match = re.search(r"AREO", rootstr, re.M)
start = match.start()
# Find the range of characters we want
datastr = rootstr[start:len(rootstr)]
datastr = datastr.split('\n')

# Put string rows into dataframe df
df = pandas.DataFrame(datastr)
# Split each row using '\t' delimiter and expand into columns
df = df[0].str.split('\t',expand=True)
df.columns = ['sampage', 'd18O', 'Sr/Ca', 'd13C', 'Mg/Ca', 'Ba/Ca', 'd11B', 'd11B_2s', 'pH', 'pH_2s', 'unlabeled']
# Split first column further by space delimiter
tempdf = df['sampage'].str.split(' ',expand=True)
tempdf.columns = ['samp','age']
# Organize the dataframe df a bit
df = df.drop(['sampage', 'unlabeled'], axis=1)
df = pandas.merge(tempdf, df, left_index=True, right_index=True)
# Cast the data of interest to floats so we can work with it
df = df.astype({"samp":str, "age":str, "d18O":int, "Sr/Ca":float, 
                "d13C":float, "Mg/Ca":float, "Ba/Ca":float, "d11B":float,
                "d11B":float, "d11B_2s":float, "pH":float, "pH_2s":float})

df.head(5)


# Read the excel file and cast the GPS coordinates to type float for later use
datafile = pandas.read_excel('BleachingdatabaseV1.0.xlsx')
datafile["LATITUDE"] = datafile["LATITUDE"].astype(float)
datafile["LONGITUDE"] = datafile["LONGITUDE"].astype(float)
datafile["YEAR"] = datafile["YEAR"].astype(int)
datafile.head(3)


# The purpose of this function is to create a list of ranges so we can easily do aggregates on different ranges of years denoted by iterator
# ie. create_ranges(1963, 2012, 12) will create a list of 12 ranges between 1963 -2012
# ie. passing in 1 as the iterator will do yearly ranges to view all data
def create_ranges(start, end, iterator):
    # Create an index for the range the data spans
    ranges = list(range(start,end,iterator))
    # Create a tuple for ranges (allows us to do aggregates by year)
    range_arr = []
    for i in range(0, len(ranges) - 1):
        npele = (ranges[i], ranges[i+1])
        range_arr.append(npele)
    return range_arr

def sum_reports(range_arr, print_output):
    numevents = []
    year = []
    for i in range(0, len(range_arr)):
        count = 0
        start = range_arr[i][0]
        end = range_arr[i][1]
        for j in range(1, len(datafile)):
            if datafile.YEAR[j] in range(start,end):
                count = count + 1
        year.append(start)
        numevents.append(count)
        if print_output == True:
            print("{} - {}: {}".format(start, end, count))
    return year, numevents
        
# Generate plot for yearly data, do not show raw output
range_yearly = create_ranges(1963, 2012, 1)
year_yearly, numevents_yearly = sum_reports(range_yearly, False)
    
# Generate plot for the 48 year range of data in 12 groups
ranges = create_ranges(1963,2012,12)
year_range, numevents_range = sum_reports(ranges, False)

fig, (ax1,ax2) = plt.subplots(1,2)
fig.set_figheight(8)
fig.set_figwidth(18)
fig.subplots_adjust(wspace=0.30, hspace=0.4)

# Plot the yearly data as a line plot
a1 = ax1.plot(year_yearly, numevents_yearly)
ax1.set(ylabel ='Number of Reports', xlabel = 'Year')
ax1.set_title('Coral Bleaching Reports by Year')

# Plot the 12-year range data as a bar plot
nums = ['1963-1975', '1975-1987','1987-1999','1999-2011']
a2 = ax2.bar(nums, numevents_range)
ax2.set(ylabel ='Number of Reports', xlabel = 'Range of Years')
ax2.set_title('Coral Bleaching Reports Grouped by 12 Years')

Text(0.5, 1.0, 'Coral Bleaching Reports Grouped by 12 Years')


map_osm3 = folium.Map(location=[10.490000, 1.75000], zoom_start=1)
HeatMap(datafile[['LATITUDE', 'LONGITUDE']].values.tolist()).add_to(map_osm3)
map_osm3


# init map
map_osm1 = folium.Map(location=[10.490000, 1.75000], zoom_start=2)
# Create marker clusters for easier viewing
marker_cluster1 = MarkerCluster().add_to(map_osm1)
# Iterate through data points and plot data before 1987
for i in range(1, len(datafile)):
    # Display marker for Male
    if datafile.YEAR[i] <= 1987:
        # In popup also display date, age, race and specify icon and color for Male
        folium.Marker([datafile.LATITUDE[i], datafile.LONGITUDE[i]], 
                      popup = 'Year:{}\nLocation:{}\nSeverity:{}\nSource:{}\nComments:{}'.format(
                          datafile.YEAR[i], datafile.LOCATION[i], 
                          datafile.SEVERITY_CODE[i],
                          datafile.SOURCE[i], datafile.COMMENTS[i]),
                      icon = folium.Icon(color = 'darkblue', icon_color='white',
                      icon='skull-crossbones', angle=0, prefix ='fa')).add_to(marker_cluster1)
    
map_osm1


range_arr = create_ranges(1963, 2012, 12)


mapa = folium.Map()

for i in range(0, len(range_arr)):
    count = 0
    start = range_arr[i][0]
    end = range_arr[i][1]
    period ='{} to {}'.format(start,end)
    feature_group = FeatureGroup(name = period, show = True)
    for j in range(1, len(datafile)):
        if datafile.YEAR[j] in range(start,end) and datafile.SEVERITY_CODE[j] == 3:
            count = count + 1
            feature_group.add_child(Marker(location = (datafile.LATITUDE[j], datafile.LONGITUDE[j]), 
                                           popup = 'Year:{}\nLoc:{}\nSeverity:{}\nComments:{}'.format(
                          datafile.YEAR[j], datafile.LOCATION[j], 
                          datafile.SEVERITY_CODE[j], datafile.COMMENTS[j])))
            
    mapa.add_child(feature_group)
    
mapa.add_child(folium.map.LayerControl())
mapa


def makeSubplot(xtitle, xvar, yvar, c, ytitle, title, xlim1, xlim2, ax0, ax1):
    p = df.plot(y=yvar, x=xvar, color=c, xlim =(xlim1,xlim2), ax=axes[ax0,ax1])
    p.set_xlabel(xtitle, fontsize=20)
    p.set_ylabel(ytitle, fontsize=18)
    p.set_title(title, fontsize=20)

# Set axes and title strings for figure as well as color
xtit = 'Year'
xvar = 'd18O'
yvar1 = 'pH_2s'
c1 = 'b'
ytit1 = 'Predicted pH'
t1 = 'Predicted Ocean pH by Year'
# Make the actual plot from the dataframe df and set titles
p1 = df.plot(y=yvar1, x = xvar, color = c1, xlim = (1807, 2004), figsize =(7,5))
p1.set_xlabel(xtit)
p1.set_ylabel(ytit1)
p1.set_title(t1)


# Create subplot for the other parameter figures
fig,axes = plt.subplots(2,2)
fig.set_figheight(12)
fig.set_figwidth(18)
fig.subplots_adjust(wspace=0.30, hspace=0.4)


# Repeat for the other water parameters provided  
c2 = 'c'
t2 = 'Predicted Magnesium to Calcium Ratio by Year'
ytit2 ='Predicted Mg:Ca Ratio'
yvar2 = 'Mg/Ca'
makeSubplot(xtit, xvar, yvar2, c2, ytit2, t2, 1807, 2004, 0, 0)


c3 = 'g'
t3 = 'Predicted Strontium to Calcium Ratio by Year'
ytit3 = 'Predicted Sr:Ca Ratio'
yvar3 = 'Sr/Ca'
makeSubplot(xtit, xvar, yvar3, c3, ytit3, t3, 1807, 2004, 0, 1)


c4 = 'y'
t4 = 'Predicted Barium to Calcium Ratio by Year'
ytit4 = 'Predicted Ba:Ca Ratio'
yvar4 = 'Ba/Ca'
makeSubplot(xtit, xvar, yvar4, c4, ytit4, t4, 1807, 2004, 1, 0)


c5 = 'm'
t5 = 'Predicted Carbon Composition in Seawater by Year'
ytit5 = 'd13C'
yvar5 = 'd13C'
makeSubplot(xtit, xvar, yvar5, c5, ytit5, t5, 1807, 2004, 1, 1)


# Transfer coral bleaching report data to a dataframe 
altyears = list(range(1963,2011))
altdatadf = pandas.DataFrame(altyears)
altdatadf['numEvents'] = pandas.DataFrame(numevents_yearly)
altdatadf.columns = ['Year','NumEvents']
# Crop the data so that the years both start at 1963 which is the start of (2)'s data collection
cropdf = df[['d18O','pH_2s']].copy()
cropdf = cropdf[cropdf['d18O'] >= 1963] 
cropdf = cropdf.reset_index()
cropdf.columns = ['idx','Year','pH_2s']
# Do a join to merge both datasets on year
merged_datasets = altdatadf.join(cropdf.set_index('Year'), on='Year')

# Plot the data with two y-axes for pH and Bleaching Reports
fig, ax = plt.subplots()
merged_datasets.plot(x = 'Year', y = 'NumEvents', ax=ax, style = 'r-')
merged_datasets.plot(x = 'Year', y = 'pH_2s', ax=ax, style='b-', secondary_y= True)
ax.set_ylabel('Number of Reports')
ax.right_ax.set_ylabel('pH')
ax.set_xlabel('Year')
ax.set_title('pH and Mass Bleaching Reports by Year')

Text(0.5, 1.0, 'pH and Mass Bleaching Reports by Year')


def makeLinReg(ef, xvar, yvar, ytit, title1, c, subplot, ax0, ax1): 
    if subplot == True: 
        e1 = ef.plot.scatter(x=xvar, y=yvar, title = title1, color = c, ax=axes[ax0,ax1])
        e1.set_xlabel('Year', fontsize=15)
        e1.set_ylabel(ytit, fontsize=15)
        e1.set_title(title1, fontsize= 20)
    
    else: 
        e1 = ef.plot.scatter(x=xvar, y=yvar, title = title1, color = c)
        e1.set_xlabel('Year')
        e1.set_ylabel(ytit)
        e1.set_title(title1)
    
    
    # Extract data for the regression model below
    years = ef[xvar]
    yvariable = ef[yvar]
    # Convert year array into 2D array
    years = np.array([[h] for h in years])

    # Fit regression line to model
    model = linear_model.LinearRegression().fit(years, yvariable)
    e1.plot(years, model.predict(years), color = 'k')
    #print('Model Coefficient of Determination: ', model.score(years, yvariable))
    printedmodel = 'Year' + '\n\nModel: ' + str(round(model.intercept_, 4)) + ' + ' + str(round(model.coef_[0],4)) + '*x'
    e1.set_xlabel(printedmodel)
    
    # Save the coefficients and return them
    coef = []
    coef.append(model.intercept_)
    coef.append(model.coef_[0])
    return coef


coef1 = makeLinReg(df, 'd18O', 'pH_2s', ytit1, t1, c1, False, 0, 0)

# Create subplot for the other parameter figures
fig,axes = plt.subplots(2,2)
fig.set_figheight(12)
fig.set_figwidth(18)
fig.subplots_adjust(wspace=0.30, hspace=0.4)

coef2 = makeLinReg(df, 'd18O', 'Mg/Ca', ytit2, t2, c2, True, 0,0)
coef3 = makeLinReg(df, 'd18O', 'Sr/Ca', ytit3, t3, c3, True, 0,1)
coef4 = makeLinReg(df, 'd18O', 'Ba/Ca', ytit4, t4, c4, True, 1,0)
coef5 = makeLinReg(df, 'd18O', 'd13C', ytit5, t5, c5, True, 1,1)


def makePolyReg(ef, xvar, yvar, ytit, title1, c, d, subplot, ax0, ax1): 
    if subplot == True: 
        e1 = ef.plot.scatter(x=xvar, y=yvar, title = title1, color = c, ax=axes[ax0,ax1])
        e1.set_xlabel('Year', fontsize=15)
        e1.set_ylabel(ytit, fontsize=15)
        e1.set_title(title1, fontsize=20)
    else: 
        e1 = ef.plot.scatter(x=xvar, y=yvar, title = title1, color = c)
        e1.set_xlabel('Year')
        e1.set_ylabel(ytit)
        e1.set_title(title1)
    
    # Extract data for the regression model below
    x = ef[xvar]
    y = ef[yvar]
    weights = np.polyfit(x,y, d)
    # If you want to see coefficients and their outputs for the specified degree
    #print(poly_feat.get_feature_names(yvar))
    #print(poly_feat.transform(x))
    
    counter = 0  
    output = ""
    for i in range(d, -1, -1): 
        if counter == 0: 
            output = output + str(round(weights[i],2)) + " + "
        elif counter == 1:
            output = output + str(round(weights[i],2)) + "x + "
        elif counter in range(2, d):
            output = output + str(round(weights[i],2)) + "x^" + str(counter) + " + "
        elif counter == d:
            output = output + str(round(weights[i],2)) + "x^" + str(counter)
        counter = counter + 1
        
    printedmodel = "Model: " + output
    e1.set_xlabel("Year\n\n" + printedmodel)
    
    
    model = np.poly1d(weights)
    ypred = model(x)
    e1.scatter(x, y, color = c)
    e1.plot(x, ypred, color = 'k')

    return weights


pH_weights = makePolyReg(df, 'd18O', 'pH_2s', ytit1, t1, c1, 4, False, 0,0)

# Create subplot for the other parameter figures
fig,axes = plt.subplots(2,2)
fig.set_figheight(12)
fig.set_figwidth(18)
fig.subplots_adjust(wspace=0.30, hspace=0.4)

makePolyReg(df, 'd18O', 'Mg/Ca', ytit2, t2, c2, 2, True, 0,0)
makePolyReg(df, 'd18O', 'Sr/Ca', ytit3, t3, c3, 2, True, 0,1)
makePolyReg(df, 'd18O', 'Ba/Ca', ytit4, t4, c4, 2, True, 1,0)
makePolyReg(df, 'd18O', 'd13C', ytit5, t5, c5, 3, True, 1,1)

x = 2020
result = -3.01857837e+04 +x*6.42725063e+01 + (-5.12760507e-02)*x**2 + (1.81707624e-05)*x**3 + (-2.41332927e-09)*x**4
print("Predicted pH in the year 2020 - Polynomial Regression: {}".format(result))

Predicted pH in the year 2020 - Polynomial Regression: 7.597268581252138


def print_corr(x, y):
    corr1, _ = spearmanr(x,y)
    #print('Spearmans correlation: %.3f' % corr1)
    return round(corr1,3)


# Create subplot for the other parameter figures
fig_g,axes = plt.subplots(2,2)
fig_g.set_figheight(12)
fig_g.set_figwidth(18)
fig_g.subplots_adjust(wspace=0.30, hspace=0.4)

g1 = df.plot.scatter(x = 'pH_2s', y = 'd13C', color = c5, ax=axes[0,0])
corr = print_corr(df.pH_2s, df.d13C)
xlab = 'pH\nCorrelation Coeff: ' + str(corr)
g1.set_title('Average Skeletal Carbon by pH', fontsize=20)
g1.set_xlabel(xlab, fontsize=15)
g1.set_ylabel('Skeletal Carbon', fontsize=15)

g2 = df.plot.scatter(x = 'pH_2s', y = 'Mg/Ca', color = c2, ax=axes[0,1])
corr = print_corr(df.pH_2s, df['Mg/Ca'])
xlab = 'pH\nCorrelation Coeff: ' + str(corr)
g2.set_title('Average Mg:Ca Ratio by pH', fontsize=20)
g2.set_xlabel(xlab, fontsize=15)
g2.set_ylabel('Mg/Ca Ratio', fontsize=15)

g3 = df.plot.scatter(x = 'pH_2s', y = 'Ba/Ca', color = c3, ax=axes[1,0])
corr = print_corr(df.pH_2s, df['Ba/Ca'])
xlab = 'pH\nCorrelation Coeff: ' + str(corr)
g3.set_title('Average Ba:Ca Ratio by pH', fontsize=20)
g3.set_xlabel(xlab, fontsize=15)
g3.set_ylabel('Ba/Ca Ratio', fontsize=15)

g4 = df.plot.scatter(x = 'pH_2s', y = 'Sr/Ca', color = c4, ax=axes[1,1])
corr = print_corr(df.pH_2s, df['Sr/Ca'])
xlab = 'pH\nCorrelation Coeff: ' + str(corr)
g4.set_title('Average Sr:Ca Ratio by pH', fontsize=20)
g4.set_xlabel(xlab, fontsize=15)
g4.set_ylabel('Sr/Ca Ratio', fontsize=15)

Text(0, 0.5, 'Sr/Ca Ratio')


# Gradient descent function implementation with appropriate alpha value for reaching a minimum cost
def grad_descent(x, y, T, alpha, count, shortoutput):
    m,n = x.shape
    theta = np.zeros(n)
    f = np.zeros(T)
    # Iterate through the range T given
    print('Sample gradient descent values printed from first 3 iterations')
    for i in range(T):
        # Use a counter to display the first few iterations and not use up a bunch of pages
        count = count + 1
        # Print the cost of each movement during descent
        f[i] = 0.5*np.linalg.norm(x.dot(theta) - y)**2
        g = x.T.dot(x.dot(theta)-y)
        theta = theta - alpha*g
        if shortoutput == 'yes':
            if count < 3 or count > T - 2:
                # Print out important values
                print("Iteration: {}\nF[i]: {}\nG: {}\nTHETA: {}\n________________________________________________________".format(count, f[i], g, theta))
        else: 
            # Print out important values
            print("F[i] {}, \nG\n{}\nTHETA\n{}\n".format(f[i], g, theta))
    return theta, f[i]


coral_dat = df.drop(['samp','age','d18O','d11B','d11B_2s','pH','pH_2s'], axis=1)
pH = df.drop(['samp','age','d18O','d11B','d11B_2s','pH','Sr/Ca','d13C','Mg/Ca','Ba/Ca'], axis=1)
coral_dat = coral_dat.abs()

coral_dat = np.array(coral_dat)
pH = np.array(pH).squeeze()
pH = pH.tolist()
grad_descent(coral_dat, pH, 200, 0.0001, 0, 'yes')

a1 = 0.32208252
a2 = 0.67042778
a3 = 0.01299445
a4 = 0.30406741

df['Sr/Ca'] = abs(df['Sr/Ca'])
df['Mg/Ca'] = abs(df['Mg/Ca'])

df['PredictedpH'] = a1*(df['Sr/Ca']) + a2*(df['d13C']) + a3*(df['Mg/Ca']) + a4*(df['Ba/Ca'])
df['Residual'] = df['PredictedpH'] - df['pH_2s']



df['pH_Score'] = np.where(df['pH_2s'] > 8.0, 'High', 'Low')
tempdf = [x for _, x in df.groupby(by=["pH_Score"])]
highdf = tempdf[0]
l1 = sb.violinplot(x='pH_2s', y='Residual', data=highdf, figsize=(15,10))

print('pH = {}*Sr + {}*d13C +{}*Mg + {}*Ba'.format(a1, a2, a3, a4))

Sample gradient descent values printed from first 3 iterations
Iteration: 1
F[i]: 2923.93015
G: [-3568.6461  -5664.90161 -1379.8825  -2886.41534]
THETA: [0.35686461 0.56649016 0.13798825 0.28864153]
________________________________________________________
Iteration: 2
F[i]: 12.959255691038395
G: [-201.17101646 -329.12741092  -70.40112328 -165.62951679]
THETA: [0.37698171 0.5994029  0.14502836 0.30520449]
________________________________________________________
Iteration: 199
F[i]: 1.9249635671526308
G: [ 2.36024591 -2.89824602  5.08176248  0.3166919 ]
THETA: [0.31993992 0.67305884 0.00836385 0.30378818]
________________________________________________________
Iteration: 200
F[i]: 1.9209795072680613
G: [ 2.35518761 -2.89201634  5.06607868  0.31828398]
THETA: [0.3197044  0.67334804 0.00785725 0.30375635]
________________________________________________________
pH = 0.32208252*Sr + 0.67042778*d13C +0.01299445*Mg + 0.30406741*Ba


grad_descent(coral_dat, pH, 1000, 0.0001, 0, 'yes')

a1 = 0.22080397
a2 = 0.7962654
a3 = -0.13454225
a4 = 0.25364617

df['Sr/Ca'] = abs(df['Sr/Ca'])
df['Mg/Ca'] = abs(df['Mg/Ca'])

df['PredictedpH'] = a1*(df['Sr/Ca']) + a2*(df['d13C']) + a3*(df['Mg/Ca']) + a4*(df['Ba/Ca'])
df['Residual'] = df['PredictedpH'] - df['pH_2s']



df['pH_Score'] = np.where(df['pH_2s'] > 8.0, 'High', 'Low')
tempdf = [x for _, x in df.groupby(by=["pH_Score"])]
highdf = tempdf[0]
l1 = sb.violinplot(x='pH_2s', y='Residual', data=highdf, figsize=(15,10))
print('pH = {}*Sr + {}*d13C +{}*Mg + {}*Ba'.format(a1, a2, a3, a4))

Sample gradient descent values printed from first 3 iterations
Iteration: 1
F[i]: 2923.93015
G: [-3568.6461  -5664.90161 -1379.8825  -2886.41534]
THETA: [0.35686461 0.56649016 0.13798825 0.28864153]
________________________________________________________
Iteration: 2
F[i]: 12.959255691038395
G: [-201.17101646 -329.12741092  -70.40112328 -165.62951679]
THETA: [0.37698171 0.5994029  0.14502836 0.30520449]
________________________________________________________
Iteration: 999
F[i]: 1.135647167663162
G: [ 0.65564306 -0.85236277  0.24619817  0.74160267]
THETA: [ 0.22086946  0.79618025 -0.13451778  0.25372033]
________________________________________________________
Iteration: 1000
F[i]: 1.1354705434177734
G: [ 0.65493972 -0.85157528  0.24474742  0.74162708]
THETA: [ 0.22080397  0.7962654  -0.13454225  0.25364617]
________________________________________________________
pH = 0.22080397*Sr + 0.7962654*d13C +-0.13454225*Mg + 0.25364617*Ba

	samp	age	d18O	Sr/Ca	d13C	Mg/Ca	Ba/Ca	d11B	d11B_2s	pH	pH_2s
0	AREO	4-1805-09	1807	-4.40	7.762	-1.59	4.310	3.53	23.61	0.16	8.02
1	AREO	4-1810-14	1812	-4.49	7.910	-2.00	4.139	4.03	24.53	0.15	8.13
2	AREO	4-1815-19	1817	-4.56	7.773	-1.47	4.302	3.36	22.74	0.15	7.90
3	AREO	4-1820-24	1822	-4.49	7.792	-1.25	4.101	3.71	23.86	0.16	8.05
4	AREO	4-1825-29	1827	-4.37	7.745	-1.36	4.249	3.31	24.77	0.12	8.15

	COUNTRY	LOCATION	SITE_NAME	LATITUDE	LONGITUDE	MONTH	YEAR	DEPTH	SEVERITY_CODE	PERCENT_BLEACHED	MORTALITY_CODE	PERCENT_MORTALITY	SURVEY_TYPE	SOURCE	NAME	CITATION	COMMENTS	ENTRY_CODE	DATABASE_CODE
0	Jamaica	Jamaica	NaN	18.200000	-77.250000	NaN	1963	NaN	-1	NaN	NaN	NaN	NaN	ReefBase	NaN	NaN	Some of the surviving corals bleached (Goreau,...	NaN	1
1	Puerto Rico	Puerto Rico	NaN	17.866667	-66.433333	NaN	1969	NaN	-1	NaN	NaN	NaN	NaN	ReefBase	NaN	NaN	An intensive and extensive bleaching event occ...	NaN	1
2	Colombia	Providencia Island ( Isla de Providencia )	NaN	13.358333	-81.383333	NaN	1969	1-20 m	-1	NaN	NaN	NaN	NaN	ReefBase	NaN	NaN		NaN	1

Visualizing Ocean Data on Reconstructed pH and Coral Bleaching Reports¶

By: Stephen Tennyson¶

Introduction¶

Background¶

What is a Coral?¶

What is Ocean pH and why is it so important to coral?¶

What is Coral Bleaching and why is it bad?¶

Getting Started¶

Sources of Data Used in this Tutorial¶

(1) Evidence for ocean acidification in the Great Barrier Reef of Australia¶

Isotopes and Minerals Analyzed¶

(2) A new, high-resolution global mass coral bleaching database.¶

Severity key¶

Data Preprocessing¶

Data Extraction - (1) Wei et al., 2009¶

Data Extraction - (2) Donner et al., 2017¶

Visualizing the Data¶

Coral Bleaching Reports¶

Mapping Coral Bleaching Reports¶

Plotting All Coordinates as a Heatmap (1963 - 2005)¶

Plotting coordinates of coral bleaching reports prior to 1987¶

Layered Map Grouped by 12 Years¶

Interactive map: Select layer (top right) to view reports within a time frame¶

Warning:¶

Reconstructed Ocean pH and Other Parameters¶

Visualizing Both Datasets Together¶

Exploratory Analysis on pH and Other Parameters¶

Linear Regression¶

The Code¶

Linear Regression Results¶

Polynomial Regression¶

The code¶

Polynomial Regression Results¶

Correlations between pH and other parameters¶

The Code¶

Correlation Results¶

Machine Learning for Multivariate Regression¶

After 200 Iterations¶

Machine Learning Results After 200 Iterations¶

After 1000 Iterations¶

Machine Learning Results After 1000 Iterations¶

Concluding Remarks¶

References¶