# Importing necessary libraries for data processing and visualization
import pandas as pd                 # Pandas for tabular datasets processing and analysis
import geopandas as gpd             # GeoPandas for Vector GeoSpatial Datasets processing and analysis
import xarray as xr                 # Xarray for Rester GeoSpatial Datasets processing and analysis
import numpy as np                  # Numpy for numerical computing with arrays
import matplotlib.pyplot as plt     # Matplotlib's pyplot for creating static, animated, and interactive visualizations

import os                           # os lib for file path processing
import json                         # json lib for structured textual printing

import plotly.express as px         # Plotly for Interactive Visualization
import plotly.graph_objects as go

import folium                       # Folium for Interactive Visualization

BASE_DIR = os.getcwd()                  # Get current working dir as `BASE_DIR`
# print(BASE_DIR)
DATASET_DIR = f"{BASE_DIR}/DataSets"    # Get the dir of datasets

# load datasets
walmart_loc_geojson_path = f"{DATASET_DIR}/Walmart_Topo/WalmartStoreLoc.geojson"    # Geographic dataset
walmart_loc_csv_path = f"{DATASET_DIR}/Walmart_Topo/WalmartStoreLoc.csv"            # Tabular dataset

# read geojson file
gdf_walm_loc = gpd.read_file(walmart_loc_geojson_path).drop(columns=['date_super', 'conversion', 'st', 'county'])   # load dataset

# preview
print(gdf_walm_loc.columns)
gdf_walm_loc.head()

print(gdf_walm_loc.crs)
ax = gdf_walm_loc.plot()            # plot with default projection system
ax.set_title("WGS84 (lat/lon)");

gdf_walm_loc_ESRI = gdf_walm_loc.to_crs("ESRI:102003")      # convert to ESRI projection system
ax = gdf_walm_loc_ESRI.plot()
ax.set_title("ESRI:102003");

data = []
layout = dict(
    title = 'New Walmart Stores per year 1962-2006<br>\
        Source: <a href="http://www.econ.umn.edu/~holmes/data/WalMart/index.html">\
        University of Minnesota</a>',
    # showlegend = False,
    autosize = False,
    width = 1200,
    height = 800,
    hovermode = False,
    legend = dict(
        x=0.7,
        y=-0.1,
        bgcolor="rgba(255, 255, 255, 0)",
        font = dict( size=11 ),
    )
)
years = gdf_walm_loc['YEAR'].unique()

for i in range(len(years)):
    geo_key = 'geo'+str(i+1) if i != 0 else 'geo'
    lons = list(gdf_walm_loc[ gdf_walm_loc['YEAR'] == years[i] ]['LON'])
    lats = list(gdf_walm_loc[ gdf_walm_loc['YEAR'] == years[i] ]['LAT'])
    # Walmart store data
    data.append(
        dict(
            type = 'scattergeo',
            showlegend=False,
            lon = lons,
            lat = lats,
            geo = geo_key,
            name = int(years[i]),
            marker = dict(
                color = "rgb(0, 0, 255)",
                opacity = 0.5
            )
        )
    )
    # Year markers
    data.append(
        dict(
            type = 'scattergeo',
            showlegend = False,
            lon = [-78],
            lat = [47],
            geo = geo_key,
            text = [years[i]],
            mode = 'text',
        )
    )
    layout[geo_key] = dict(
        scope = 'usa',
        showland = True,
        landcolor = 'rgb(229, 229, 229)',
        showcountries = False,
        domain = dict( x = [], y = [] ),
        subunitcolor = "rgb(255, 255, 255)",
    )


def draw_sparkline( domain, lataxis, lonaxis ):
    ''' Returns a sparkline layout object for geo coordinates  '''
    return dict(
        showland = False,
        showframe = False,
        showcountries = False,
        showcoastlines = False,
        domain = domain,
        lataxis = lataxis,
        lonaxis = lonaxis,
        bgcolor = 'rgba(255,200,200,0.0)'
    )

# Stores per year sparkline
layout['geo44'] = draw_sparkline(
                    domain={
                        'x':[0.6,0.8], 
                        'y':[0,0.15]
                    },
                    lataxis={
                        'range':[-5.0, 30.0]
                    }, 
                    lonaxis={
                        'range':[0.0, 40.0]
                    }
                )
data.append(
    dict(
        type = 'scattergeo',
        mode = 'lines',
        lat = list(gdf_walm_loc.groupby(by=['YEAR']).count()['storenum']/1e1),
        lon = list(range(len(gdf_walm_loc.groupby(by=['YEAR']).count()['storenum']/1e1))),
        line = dict( color = "rgb(0, 0, 255)" ),
        name = "New stores per year<br>Peak of 178 stores per year in 1990",
        geo = 'geo44',
    )
)

# Cumulative sum sparkline
layout['geo45'] = draw_sparkline({'x':[0.8,1], 'y':[0,0.15]}, \
                                 {'range':[-5.0, 50.0]}, {'range':[0.0, 50.0]} )
data.append(
    dict(
        type = 'scattergeo',
        mode = 'lines',
        lat = list(gdf_walm_loc.groupby(by=['YEAR']).count().cumsum()['storenum']/1e2),
        lon = list(range(len(gdf_walm_loc.groupby(by=['YEAR']).count()['storenum']/1e1))),
        line = dict( color = "rgb(214, 39, 40)" ),
        name ="Cumulative sum<br>3176 stores total in 2006",
        geo = 'geo45',
    )
)

z = 0
COLS = 5
ROWS = 9
for y in reversed(range(ROWS)):
    for x in range(COLS):
        geo_key = 'geo'+str(z+1) if z != 0 else 'geo'
        layout[geo_key]['domain']['x'] = [float(x)/float(COLS), float(x+1)/float(COLS)]
        layout[geo_key]['domain']['y'] = [float(y)/float(ROWS), float(y+1)/float(ROWS)]
        z=z+1
        if z > 42:
            break

fig = go.Figure(data=data, layout=layout)
fig.update_layout(width=800)
fig.show()

fig = px.scatter_geo(gdf_walm_loc,
                    width=1200,
                    height=800,
                    lat='LAT',
                    lon='LON',
                    hover_name='STRCITY',  # Show street address on hover
                    animation_frame='YEAR',  # Animate by year
                    scope='usa',
                    title='Walmart Store Openings Over the Years')

# Update layout for a better visualization
fig.update_layout(
    geo=dict(landcolor='rgb(217, 217, 217)'),
    margin={
        "r":0,
        "t":50,
        "l":0,
        "b":0
    }
)

fig.show()

# prepare datasets
# geometry datasets describing shapes of the tracts
#cencus_tract_path = "DataSets/OKC_Census/census_tracts/tl_2020_40_tract.shp"
cencus_tract_path = "DataSets/OKC_Census/census_tracts/tl_2020_40_tract.geojson"

# census datasets containing feature columns
census_data_path = "DataSets/OKC_Census/census_data/data.xlsx"

# load datasets
census_gdf = gpd.read_file(cencus_tract_path)
census_df = pd.read_excel(census_data_path, sheet_name="2020AGE")

# clear columns with no data
census_df = census_df.drop(census_df.index[0])

# unify GEOID column for datasets concatenation
census_gdf['GEOID'] = census_gdf['GEOID'].astype(int)
census_df['GEOID'] = census_df['GEOID'].astype(int)

# clear string data and make them float
def clean_data(gdf):
    for i in range(gdf.shape[0]):
        for j in range(gdf.shape[1]):
            item = gdf.iloc[i,j]
            if type(item) == str:
                gdf.iat[i,j] = 0.0
    return gdf

# rename column functions
def rename_columns(gdf):
    gdf = gdf[
        [
            'geometry',
            'GEOID',
            'Female%',
            'Age<5>65%',
            '>15property%',
            'nodiploma%',
            '%livingalone',
            '%minority',
            '%unemployment',
            '%language',
            '%renthouse',
            'Novehicle%',
            '%noinsurance',
            '%disability',
            '%computer',
            '%nointernet',
            '%nophone',  
        ]
    ]

    gdf = gdf.rename(
        columns={
            'Female%': 'Female_Percent',
            'Age<5>65%': 'Elder&Young_Percent',
            '>15property%': 'Property_Percent',
            'nodiploma%': 'Low_Educ_Percent',
            '%livingalone': 'Living_Alone_Percent',
            '%minority': 'Minority_Percent',
            '%unemployment': 'Unemployment_Percent',
            '%language': 'Language_Percent',
            '%renthouse': 'RentHouse_Percent',
            'Novehicle%': 'No_Vehicle_Percent',
            '%noinsurance': 'No_Insurance_Percent',
            '%disability': 'Disable_Percent',
            '%computer': 'Computer_Availability',
            '%nointernet': 'No_Internet_Percent',
            '%nophone': 'No_Phone_Percent'
        }
    )
    return gdf

# Define a class to handle map creation and manipulation using Folium
class Map():
    def __init__(self, gdf) -> None:
        # Initialize the Map object with a GeoDataFrame
        self.gdf = gdf
        self.attr = None  # Placeholder for attribute to be visualized
        self.map = None   # Placeholder for the Folium map object
        self.figure = None  # Placeholder for the Folium figure object

    def create_map(self, attr="Female_Percent", fill_color='YlGn'):
        # Set the attribute to visualize on the map
        self.attr = attr
        # Initialize a Folium map centered at given coordinates with specified zoom level, width, and height
        self.map = folium.Map(
            location=[36.084621, -96.921387], 
            zoom_start=7,
            width=1200,
            height=800
        )
        # Add a choropleth layer to the map using the provided attribute and color
        add_choropleth(self.map, self.gdf, self.attr, fill_color=fill_color)
        # Add layer control to the map
        folium.LayerControl().add_to(self.map)
        # Create a figure object and add the map to it
        self.figure = folium.Figure()
        self.map.add_to(self.figure)
        # Render the figure
        self.figure.render()
        # Return the figure object
        return self.figure

# Define a function to add a choropleth layer to a Folium map
def add_choropleth(folium_map, gdf, attr, fill_color):
    # Add a Choropleth layer to the provided Folium map based on GeoDataFrame and specified attributes
    folium.Choropleth(
        geo_data=gdf['geometry'],  # Use the 'geometry' column of the GeoDataFrame for choropleth
        data=gdf.dropna(subset=[attr])[attr],  # Filter out missing data for the specified attribute
        key_on='feature.id',  # Reference to GeoJSON features
        fill_color=fill_color,  # Color palette for the choropleth
        name=attr+'Choropleth',  # Name of the layer
        line_weight=0.1,  # Line weight for the choropleth boundaries
    ).add_to(folium_map)

# Define a function to add a mouse position plugin to a Folium map
def add_mouse_position(folium_map):
    # Configure a mouse position plugin to display coordinates on the map
    formatter = "function(num) {return L.Util.formatNum(num, 3) + ' º ';};"  # Formatter for the coordinate display
    folium.plugins.MousePosition(
        position="topright",  # Position of the coordinate display on the map
        separator=" | ",  # Separator for latitude and longitude display
        empty_string="NaN",  # Display when coordinates are not available
        lng_first=True,  # Display longitude before latitude
        num_digits=20,  # Number of digits after decimal point for coordinates
        prefix="Coordinates:",  # Text prefix before coordinates
        lat_formatter=formatter,  # Formatter for latitude display
        lng_formatter=formatter,  # Formatter for longitude display
    ).add_to(folium_map)

# Define a function to add a GeoJSON layer to a Folium map
def add_layer(shp_file, folium_map, layer_name, color):
    # Read a shapefile into a GeoDataFrame
    gdf = gpd.read_file(shp_file)
    # Add a GeoJson layer to the Folium map with specified styling
    folium.GeoJson(
        gdf,
        name=layer_name,  # Name of the layer
        style_function=lambda feature: {
            'fillColor': color,  # Fill color of the features
            'color': color,  # Border color of the features
            'weight': 1,  # Border weight of the features
            'fillOpacity': 0.1,  # Opacity of the feature fill
        }
    ).add_to(folium_map)

# Merge geojson (geoDataFrame) and xlxs (DataFrame) datasets
gdf_OKC_census = census_gdf.merge(census_df, on='GEOID', how='inner')

# Clean and rename columns
gdf_OKC_census = clean_data(gdf_OKC_census)
gdf_OKC_census = rename_columns(gdf_OKC_census)

# Instantiate the class just defined
map_OKC = Map(gdf_OKC_census)

# Create a map with the attribute "RentHouse_Percent"
# Default fill color is "YlGn"
# Default attribute is "Female_Percent"
map_OKC.create_map(attr="RentHouse_Percent", fill_color='YlGnBu')

file_path = f"{DATASET_DIR}/NBSv02_wind_6hourly_20230106_nrt.nc"
ds = xr.open_dataset(file_path, engine="netcdf4")                   # load raster dataset
print(ds)

u_wind = ds['u_wind']

# Select a subset of data for the Pacific region based on latitude and longitude ranges
pacific_subset_sel = u_wind.sel(lat=slice(-60, 60), lon=slice(120, 270))

# Select the first time point from the 'time' coordinate of the subset
time_point_sel = pacific_subset_sel['time'][0]

# Select the data corresponding to the first time point from the subset
pacific_subset_time_sel = pacific_subset_sel.sel(time=time_point_sel)

# Print a statement indicating that the following output is the selected subset for a specific time
print("pacific_subset_time_sel:")

# Print the data for the selected subset at the specified time point
print(pacific_subset_time_sel)

# Print the shape of the numpy array for the selected subset at the specified time point
print(pacific_subset_time_sel.values.shape)

# Find the index of the first latitude value that is greater than or equal to -60 degrees
lat_start_index = np.argmax(u_wind['lat'].values >= -60)
# Find the index of the first latitude value that is greater than 60 degrees
lat_end_index = np.argmax(u_wind['lat'].values > 60)

# Find the index of the first longitude value that is greater than or equal to 120 degrees
lon_start_index = np.argmax(u_wind['lon'].values >= 120)
# Find the index of the first longitude value that is greater than 270 degrees
lon_end_index = np.argmax(u_wind['lon'].values > 270)

# Select a subset of the u_wind data for latitudes between -60 and 60 degrees and longitudes between 120 and 270 degrees
# using integer-based indexing
pacific_subset_isel = u_wind.isel(lat=slice(lat_start_index, lat_end_index),
                                  lon=slice(lon_start_index, lon_end_index))

# Select the data for the first time point from the subset
pacific_subset_time_isel = pacific_subset_isel.isel(time=0)

# Print a statement indicating that the following output is the selected subset for the first time point
print("pacific_subset_time_isel:")

# Print the data for the selected subset at the first time point
print(pacific_subset_time_isel)

# Print the shape of the numpy array for the selected subset at the first time point
print(pacific_subset_time_isel.values.shape)

# Select a subset of u_wind data where latitude is between -60 and 60, and longitude is between 120 and 270
# This is done using label-based indexing with the 'loc' method and a dictionary specifying the slices for lat and lon
pacific_subset_loc = u_wind.loc[dict(lat=slice(-60, 60), lon=slice(120, 270))]

# Retrieve the first time point from the 'time' coordinate of the selected subset
time_point_loc = pacific_subset_loc['time'][0]

# Using the previously retrieved time point, select the corresponding subset of data from pacific_subset_loc
# This further narrows down the data to the specific time point using label-based indexing
pacific_subset_time_loc = pacific_subset_loc.loc[dict(time=time_point_loc)]

# Print the subset of data corresponding to the specific time point
print(pacific_subset_time_loc)

# Print the shape of the numpy array representing the selected subset for the specified time point
# This provides an understanding of the data dimensions at this particular time slice
print(pacific_subset_time_loc.values.shape)

# Use np.where to find indices where latitude values are between -60 and 60 degrees.
# This returns a tuple with the first element containing the indices, so [0] is used to select these indices.
lat_indices = np.where((u_wind['lat'] >= -60) & (u_wind['lat'] <= 60))[0]

# Similarly, find indices for longitude values between 120 and 270 degrees using np.where.
lon_indices = np.where((u_wind['lon'] >= 120) & (u_wind['lon'] <= 270))[0]

# Select a subset of the u_wind data using the indices found for latitude and longitude.
# The isel method is used here for integer-location based indexing, which is efficient for large datasets.
pacific_subset_iloc = u_wind.isel(lat=lat_indices, lon=lon_indices)

# Select the data for the first time point from the indexed subset using isel.
# This further narrows the data to include only the first time slice.
pacific_subset_time_iloc = pacific_subset_iloc.isel(time=0)

# Print the subset of data for the specific time point to verify the correct data selection.
print(pacific_subset_time_iloc)

# Print the shape of the numpy array of the data subset to understand its dimensions.
# This helps in verifying the size of the data slice and ensuring the selection is as expected.
print(pacific_subset_time_iloc.values.shape)

pacific_subset = pacific_subset_iloc
print(f"pacific_subset array shape: {pacific_subset.values.shape}")
pacific_subset_time = pacific_subset_time_iloc
print(f"pacific_subset_time array shape: {pacific_subset_time.values.shape}")

# Xarray.DataArray default `plot`
pacific_subset_time.plot()

# Xarray.DataArray default plot for subplots at certain dimension
pacific_subset.plot(
    x="lon", 
    y="lat", 
    col="time", 
    col_wrap=4,
    cmap=plt.cm.BuPu
)

# Create a heatmap using Plotly Express's imshow function to visualize the wind data
# The first slice of data along the first dimension of 'pacific_subset_time.values' is used as the image data
fig = px.imshow(
    pacific_subset_time.values[0],  # The 2D data array to be visualized
    labels={'color':'U Wind'},  # Label for the color bar, indicating the variable shown
    title=r'`u_wind` Heatmap',  # Title of the plot, using a raw string for any special characters
    origin='lower',  # Set the origin of the heatmap to the lower-left corner, affecting how data is plotted
    x=pacific_subset_time['lon'].values,  # Longitude values to label the x-axis
    y=pacific_subset_time['lat'].values   # Latitude values to label the y-axis
)

# Update the layout of the figure to set the width and height of the image
fig.update_layout(
    width=800,  # Set the width of the figure to 800 pixels
    height=600  # Set the height of the figure to 600 pixels
)

# Display the figure
fig.show()

# Extract 'u_wind' variable and convert it to DataFrame
u_wind_data = u_wind.to_dataframe().reset_index()
u_wind_data.head()

# Convert 'u_wind' variable into a DataFrame suitable for heatmaps, ignoring NaN values and considering the zlev dimension
heatmap_df = u_wind.to_dataframe().reset_index()
heatmap_df.dropna(subset=['u_wind'], inplace=True)
# Generate a heatmap for each time point with Plotly
import plotly.graph_objects as go

# Unique time points
time_points = heatmap_df['time'].unique()

import plotly.graph_objects as go

# Choose a specific time point, for example, selecting the second entry in the time_points list
specific_time = time_points[1]

# Filter the data for the selected time point
temp_df = heatmap_df[heatmap_df['time'] == specific_time]

# Create a heatmap using the filtered data
fig = go.Figure(data=go.Heatmap(
    z=temp_df['u_wind'],  # Wind speed data
    x=temp_df['lon'],     # Longitude values for the x-axis
    y=temp_df['lat'],     # Latitude values for the y-axis
    colorscale='Viridis', # Color scale for the heatmap
    zmin=-20,             # Minimum value of the color scale
    zmax=20               # Maximum value of the color scale
))

# Update the layout of the figure
fig.update_layout(
    title=f'U-Wind Speed Heatmap at Time {specific_time}',  # Title of the heatmap, including the specific time
    height=600,  # Height of the figure in pixels
    width=800,   # Width of the figure in pixels
    xaxis=dict(title='Longitude'),  # Label for the x-axis
    yaxis=dict(title='Latitude')    # Label for the y-axis
)

# Display the figure
fig.show()

Geospatial Visualization
¶

Before Start¶

Learning Objectives
¶

Module 1. Setting Up
¶

Step 1: Import Libraries
¶

Step 2: `Env` Variables Setup
¶

Module 2. Vector Data Format
¶

2.1 Introduction to Vector Data Format¶

Key Features¶

Common Vector Data Formats¶

Why GeoJSON¶

2.2 Data Overview¶

Source: University of Minnesota ¶

2.3 Basic Plotting with projection¶

2.4 Advanced Interactive Plotting of Vector Datasets Through `Plotly`¶

2.5 Animation of Geo-Scatter Plots Through Plotly¶

2.6 Interactive Plotting of GeoJson through Folium¶

2.7 Exercise¶

Task¶

Answer:¶

Module 3. Raster Data Format
¶

3.1 Introduction to Raster Data Format¶

Key Features¶

Common Raster Data Formats¶

Why `NetCDF` is a Popular Format¶

3.2 Data Overview¶

NOAA CoastWatch Blended Winds (6-hourly)¶

3.3 Locate Subsets¶

Locate Using `sel` method¶

Locate Using `isel` method¶

Locate Using `loc` method¶

Locate Using `iloc` method¶

Question¶

3.4 `Xarray.DataArray` Plotting¶

3.5 Heatmap Using `Plotly`¶

3.6 Visualization by Converting `Raster` to `Vector`¶

Geospatial Visualization¶

Before Start¶

Learning Objectives¶

Module 1. Setting Up¶

Step 1: Import Libraries¶

Step 2: Env Variables Setup¶

Module 2. Vector Data Format¶

2.1 Introduction to Vector Data Format¶

Key Features¶

Common Vector Data Formats¶

Why GeoJSON¶

2.2 Data Overview¶

Source: University of Minnesota¶

2.3 Basic Plotting with projection¶

2.4 Advanced Interactive Plotting of Vector Datasets Through Plotly¶

2.5 Animation of Geo-Scatter Plots Through Plotly¶

2.6 Interactive Plotting of GeoJson through Folium¶

2.7 Exercise¶

Task¶

Answer:¶

Module 3. Raster Data Format ¶

3.1 Introduction to Raster Data Format¶

Key Features¶

Common Raster Data Formats¶

Why NetCDF is a Popular Format¶

3.2 Data Overview¶

NOAA CoastWatch Blended Winds (6-hourly)¶

3.3 Locate Subsets¶

Locate Using sel method¶

Locate Using isel method¶

Locate Using loc method¶

Locate Using iloc method¶

Question¶

3.4 Xarray.DataArray Plotting¶

3.5 Heatmap Using Plotly¶

3.6 Visualization by Converting Raster to Vector¶

Geospatial Visualization
¶

Learning Objectives
¶

Module 1. Setting Up
¶

Step 1: Import Libraries
¶

Step 2: `Env` Variables Setup
¶

Module 2. Vector Data Format
¶

Source: University of Minnesota ¶

2.4 Advanced Interactive Plotting of Vector Datasets Through `Plotly`¶

Module 3. Raster Data Format
¶

Why `NetCDF` is a Popular Format¶

Locate Using `sel` method¶

Locate Using `isel` method¶

Locate Using `loc` method¶

Locate Using `iloc` method¶

3.4 `Xarray.DataArray` Plotting¶

3.5 Heatmap Using `Plotly`¶

3.6 Visualization by Converting `Raster` to `Vector`¶