# Data Download and Cleaning

We will download SNOTEL data set using the [metloom](https://metloom.readthedocs.io/en/latest/installation.html).

## Download Data

In [None]:
%pip install -q metloom 

In [None]:
from datetime import datetime
from metloom.pointdata import SnotelPointData

ALLOWED_VARIABLES = [
    SnotelPointData.ALLOWED_VARIABLES.SWE,
    SnotelPointData.ALLOWED_VARIABLES.TEMPAVG,
    SnotelPointData.ALLOWED_VARIABLES.SNOWDEPTH,
    SnotelPointData.ALLOWED_VARIABLES.PRECIPITATION,
]

# You can get triplets from: https://wcc.sc.egov.usda.gov/nwcc/yearcount?network=sntl&state=&counttype=statelist

snotel_point = SnotelPointData(station_id="502:WA:SNTL", name="Green Lake")
data = snotel_point.get_daily_data(
                start_date=datetime(*(2010, 1, 1)),
                end_date=datetime(*(2023, 1, 1)),
                variables=ALLOWED_VARIABLES,
            )

data.info()
data.head()

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

for_plotting=data.reset_index()

units={
    "SWE": "in",
    "SNOWDEPTH": "in",
    "AVG AIR TEMP": "degF",
    "PRECIPITATION": "in"
}

variables_to_plot = [
    "SWE", "SNOWDEPTH", "AVG AIR TEMP", "PRECIPITATION"
]

plt.figure(figsize=(12, 8))

for variable in variables_to_plot:
    plt.subplot(2, 2, variables_to_plot.index(variable) + 1)
    plt.plot(for_plotting["datetime"], for_plotting[variable], label=variable)
    plt.ylabel(f"{variable} ({units[variable]})", fontsize=14)
    plt.xlabel("Date", fontsize=14)

plt.tight_layout()
plt.show()

In [None]:
for_plotting.isnull().sum() # Check for missing values

## Some Background

At a given point, snow depth ($h_s$) is related to Snow Water Equivalent (SWE) by the local bulk density ($\rho_b$):

$$
\text{SWE} = h_s \frac{\rho_b}{\rho_w}
$$

where depth is measured in centimeters, density in grams per centimeters cubed, $\rho_w$ is the density of water (1 g cm $^{-3}$), and SWE is measured in centimeters of water. As such,

$$
\text{SWE}  = h_s \times \frac{\rho_b}{1}
$$

$$
\rho_b  = \frac{\text{SWE}}{h_s}
$$

In [None]:
clean_df=(
    for_plotting
    .assign(
        swe=lambda x: x.SWE.map(lambda y: y*2.54 if y is not None else None),
        snowdepth=lambda x: x.SNOWDEPTH.map(lambda y: y*2.54 if y is not None else None),
        precipitation=lambda x: x.PRECIPITATION.map(lambda y: y*2.54 if y is not None else None),
        tempavg=lambda x: x['AVG AIR TEMP'].map(lambda y: (y-32)*5/9 if y is not None else None)
    )
    .set_index('datetime')
    .assign(
        precip_7_days_avg=lambda x: x.precipitation.shift().rolling(window="7D", min_periods=7).mean(),
        tempavg_7_days_avg=lambda x: x.tempavg.shift().rolling(window="7D", min_periods=7).mean(),
    )
    .filter(["datetime", "swe", "snowdepth", "tempavg_7_days_avg", "precip_7_days_avg"])
    .dropna()
    .query(
        "snowdepth != 0 and swe != 0 and "
        "snowdepth > 5 and swe > 3"
    )
    .assign(snowdensity=lambda x: x.swe / x.snowdepth)
)

clean_df.head()

In [None]:
# let's store data for later use

import os

os.makedirs("data", exist_ok=True)
clean_df.to_csv("data/clean_data.csv", index=False)