Practical 4 Code Solution

Jump to “A Little Bit More…” Solution

Week 4 Result

"""
Understanding GIS: Practical 4
@author jonnyhuck

Visvalingam-Whyatt Line Simplification

References:
	Visvalingam & Whyatt (1993): https://www.tandfonline.com/doi/pdf/10.1179/000870493786962263
	Visvalingam (2016): 		 https://www.tandfonline.com/doi/pdf/10.1080/00087041.2016.1151097
	Mandelbrot (1983): 			 https://science.sciencemag.org/content/sci/156/3775/636.full.pdf
	https://bost.ocks.org/mike/simplify/
	https://mapshaper.org/
	https://matplotlib.org/stable/gallery/text_labels_and_annotations/custom_legends.html
	https://cartographicperspectives.org/index.php/journal/article/view/cp68-roth-et-al/html
	https://docs.python.org/3/tutorial/datastructures.html
	https://docs.python.org/3/library/copy.html

New Topics:
	List Comprehension
  	if Statements
  	Shallow copies
	Dictionaries
	Shapely Geometry Types
"""

from sys import exit
from math import sqrt
from shapely.geometry import LineString
from geopandas import read_file, GeoSeries
from matplotlib_scalebar.scalebar import ScaleBar
from matplotlib.pyplot import subplots, savefig, subplots_adjust

def distance(x1, y1, x2, y2):
	"""
	* Use Pythagoras' theorem to measure a distance.
	*	https://en.wikipedia.org/wiki/Pythagorean_theorem
	"""
	return sqrt((x1 - x2)**2 + (y1 - y2)**2)


def get_effective_area(a, b, c):
	"""
	* Calculate the area of a triangle made from the points a, b and c using Heron's formula
	* 	https://en.wikipedia.org/wiki/Heron%27s_formula
	*
	* Could be simplified to:
	* 	`abs( (a[0]-c[0]) * (b[1]-a[1]) - (a[0]-b[0]) * (c[1]-a[1]) ) * 0.5`
	"""
	# calculate the length of each side
	side_a = distance(b[0], b[1], c[0], c[1])
	side_b = distance(a[0], a[1], c[0], c[1])
	side_c = distance(a[0], a[1], b[0], b[1])

	# calculate semi-perimeter of the triangle (perimeter / 2)
	s = (side_a + side_b + side_c) / 2

	# apply Heron's formula and return
	return sqrt(s * (s - side_a) * (s - side_b) * (s - side_c))


def visvalingam_whyatt(node_list, n_nodes):
	"""
	* Simplify a line using point elimination based on effective area
	"""

	# calculate and store the effective area for each point, excluding the end points
	areas = [ {"point": node_list[i], "area": get_effective_area(node_list[i-1],
		node_list[i], node_list[i+1])} for i in range(1, len(node_list)-1) ]

	# add the end points back in (in this case they are the same as our line is a polygon)
	areas.insert(0, {"point": node_list[0], "area": 0})
	areas.insert(len(areas), {"point": node_list[len(node_list)-1], "area": 0})

	# take a shallow copy of the list so that we don't edit the original
	nodes = areas.copy()

	# keep going until we run out of nodes
	while len(nodes) > n_nodes:

		# init min area with a large number
		min_area = float("inf")

		# loop through every point, excluding the end points
		for i in range(1, len(nodes)-1):

			# if the effective area of this point is smaller than the previous minimum
			if nodes[i]['area'] < min_area:

				# store the mew minimum area and the index of the point
				min_area = nodes[i]['area']
				node_to_delete = i

		# remove the current point from the list
		nodes.pop(node_to_delete)

		# recalculate effective area to the left of the deleted node
		nodes[node_to_delete-1]['area'] = get_effective_area(nodes[node_to_delete-2]['point'],
			nodes[node_to_delete-1]['point'], nodes[node_to_delete]['point'])		# left

		# if there is a node to the right of the deleted node, recalculate the effective area
		if node_to_delete < len(nodes)-1:
			nodes[node_to_delete]['area'] = get_effective_area(nodes[node_to_delete-1]['point'],
				nodes[node_to_delete]['point'], nodes[node_to_delete+1]['point'])	# right

	# extract the nodes and return
	return [ node['point'] for node in nodes ]


# set the percentage of nodes that you want to remove
SIMPLIFICATION_PERC = 98

# get the proj string definition for British National Grid (OSGB)
osgb = "+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +towgs84=446.448,-125.157,542.06,0.15,0.247,0.842,-20.489 +units=m +no_defs"

# open a dataset of all contries in the world
world = read_file("../data/natural-earth/ne_10m_admin_0_countries.shp")

# extract the UK, project, and extract the geometry (a multipolygon)
uk = world[(world.ISO_A3 == 'GBR')].to_crs(osgb).geometry.iloc[0]

# loop through the multipolygon and find the biggest bit (mainland Great Britain)
if uk.geom_type != 'MultiPolygon':
  print("Data is not a MultiPolygon, exiting...")
  exit()

coord_list = []
biggest_area = 0
for poly in uk.geoms:

	# if it is the biggest so far, update biggest area store the coordinates
    if poly.area > biggest_area:
        biggest_area = poly.area
        coord_list = list(poly.boundary.coords)

# make a linestring out of the coordinates
before_line = LineString(coord_list)
print(f"original node count: {len(coord_list)}")
print(f"original length: {before_line.length / 1000:.2f}km\n")	# add newline (\n) to the end of this one

# how many nodes do we need?
n_nodes = int(len(coord_list) / 100.0 * (100 - SIMPLIFICATION_PERC))

# ensure that there are at least 3 nodes (minimum for a polygon)
if n_nodes < 3:
	n_nodes = 3 

# remove one node and overwrite it with the new, shorter list
simplified_nodes = visvalingam_whyatt(coord_list, n_nodes)

# make the resulting list of coordinates into a line
after_line = LineString(simplified_nodes)
print(f"simplified node count: {len(simplified_nodes)}")
print(f"simplified length: {after_line.length / 1000:.2f}km")
print()	# print blank line

# create map axis object, with two axes (maps)
fig, my_axs = subplots(1, 2, figsize=(16, 10))

# set titles
fig.suptitle("The Length of the Coastline of Mainland Great Britain")
my_axs[0].set_title(f"Original: {before_line.length / 1000:.0f}km, {len(coord_list)} nodes.")
my_axs[1].set_title(f"{SIMPLIFICATION_PERC}% Simplified: {after_line.length / 1000:.0f}km, {len(simplified_nodes)} nodes.")

# reduce the gap between the subplots
subplots_adjust(wspace=0)

# add the original coastline
GeoSeries(before_line, crs=osgb).plot(
    ax=my_axs[0],
    color='blue',
    linewidth = 0.6,
	)

# add the new coastline
GeoSeries(after_line, crs=osgb).plot(
    ax=my_axs[1],
    color='red',
    linewidth = 0.6,
	)

# edit individual axis
for my_ax in my_axs:

	# remove axes
	my_ax.axis('off')

	# add north arrow
	x, y, arrow_length = 0.95, 0.99, 0.1
	my_ax.annotate('N', xy=(x, y), xytext=(x, y-arrow_length),
		arrowprops=dict(facecolor='black', width=5, headwidth=15),
		ha='center', va='center', fontsize=20, xycoords=my_ax.transAxes)

	# add scalebar
	my_ax.add_artist(ScaleBar(dx=1, units="m", location="lower right"))

# save the result
savefig(f'out/4.png', bbox_inches='tight')
print("done!")

A Little Bit More…

Result of extra task

"""
Understanding GIS: Practical 4 (A Little Bit Extra... Task 2)
@author jonnyhuck

Visvalingam-Whyatt Line Simplification of multiple shapes

References:
	Visvalingam & Whyatt (1993): https://www.tandfonline.com/doi/pdf/10.1179/000870493786962263
	Visvalingam (2016): 		 https://www.tandfonline.com/doi/pdf/10.1080/00087041.2016.1151097
	Mandelbrot (1983): 			 https://science.sciencemag.org/content/sci/156/3775/636.full.pdf
	https://bost.ocks.org/mike/simplify/
	https://mapshaper.org/
	https://matplotlib.org/stable/gallery/text_labels_and_annotations/custom_legends.html
	https://cartographicperspectives.org/index.php/journal/article/view/cp68-roth-et-al/html
	https://docs.python.org/3/tutorial/datastructures.html
	https://docs.python.org/3/library/copy.html

New Topics:
	List Comprehension
  	if Statements
  	Shallow copies
	Dictionaries
	Shapely Geometry Types
"""

from sys import exit
from math import sqrt
from shapely.geometry import LineString
from geopandas import read_file, GeoSeries
from matplotlib_scalebar.scalebar import ScaleBar
from matplotlib.pyplot import subplots, savefig, subplots_adjust

def distance(x1, y1, x2, y2):
	"""
	* Use Pythagoras' theorem to measure a distance.
	*	https://en.wikipedia.org/wiki/Pythagorean_theorem
	"""
	return sqrt((x1 - x2)**2 + (y1 - y2)**2)


def get_effective_area(a, b, c):
	"""
	* Calculate the area of a triangle made from the points a, b and c using Heron's formula
	* 	https://en.wikipedia.org/wiki/Heron%27s_formula
	*
	* Could be simplified to:
	* 	`abs( (a[0]-c[0]) * (b[1]-a[1]) - (a[0]-b[0]) * (c[1]-a[1]) ) * 0.5`
	"""
	# calculate the length of each side
	side_a = distance(b[0], b[1], c[0], c[1])
	side_b = distance(a[0], a[1], c[0], c[1])
	side_c = distance(a[0], a[1], b[0], b[1])

	# calculate semi-perimeter of the triangle (perimeter / 2)
	s = (side_a + side_b + side_c) / 2

	# apply Heron's formula and return
	return sqrt(s * (s - side_a) * (s - side_b) * (s - side_c))


def visvalingam_whyatt(node_list, n_nodes):
	"""
	* Simplify a line using point elimination based on effective area
	"""

	# calculate and store the effective area for each point, excluding the end points
	areas = [ {"point": node_list[i], "area": get_effective_area(node_list[i-1],
		node_list[i], node_list[i+1])} for i in range(1, len(node_list)-1) ]

	# add the end points back in (in this case they are the same as our line is a polygon)
	areas.insert(0, {"point": node_list[0], "area": 0})
	areas.insert(len(areas), {"point": node_list[len(node_list)-1], "area": 0})

	# take a shallow copy of the list so that we don't edit the original
	nodes = areas.copy()

	# keep going until we run out of nodes
	while len(nodes) > n_nodes:

		# init min area with a large number
		min_area = float("inf")

		# loop through every point, excluding the end points
		for i in range(1, len(nodes)-1):

			# if the effective area of this point is smaller than the previous minimum
			if nodes[i]['area'] < min_area:

				# store the mew minimum area and the index of the point
				min_area = nodes[i]['area']
				node_to_delete = i

		# remove the current point from the list
		nodes.pop(node_to_delete)

		# recalculate effective area to the left of the deleted node
		nodes[node_to_delete-1]['area'] = get_effective_area(nodes[node_to_delete-2]['point'],
			nodes[node_to_delete-1]['point'], nodes[node_to_delete]['point'])		# left

		# if there is a node to the right of the deleted node, recalculate the effective area
		if node_to_delete < len(nodes)-1:
			nodes[node_to_delete]['area'] = get_effective_area(nodes[node_to_delete-1]['point'],
				nodes[node_to_delete]['point'], nodes[node_to_delete+1]['point'])	# right

	# extract the nodes and return
	return [ node['point'] for node in nodes ]


# set the percentage of nodes that you want to remove
SIMPLIFICATION_PERC = 95
EXCLUSION_AREA = 500000000

# get the proj string definition for British National Grid (OSGB)
osgb = "+proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +towgs84=446.448,-125.157,542.06,0.15,0.247,0.842,-20.489 +units=m +no_defs"

# open a dataset of all contries in the world
world = read_file("../data/natural-earth/ne_10m_admin_0_countries.shp")

# extract the UK, project, and extract the geometry (a multipolygon)
uk = world[(world.ISO_A3 == 'GBR')].to_crs(osgb).geometry.iloc[0]

# loop through the multipolygon and find the biggest bit (mainland Great Britain)
if uk.geom_type != 'MultiPolygon':
  print("Data is not a MultiPolygon, exiting...")
  exit()

# init counter variables
original_nodes = 0
original_len = 0
original_islands = 0
simplified_nodes = 0
simplified_len = 0
simplified_islands = 0

# init lists to hold the results
original_lines = []
simplified_lines = []

# loop through each polygon in the multipolygon
for poly in uk.geoms:

	''' this is done outside of the if statement to account for nodes lost in elimination '''

	# extract coordinates for the current polygon
	coord_list = list(poly.boundary.coords)

	# make a linestring out of the coordinates & update counters
	before_line = LineString(coord_list)
	original_lines.append(before_line)

	# update counters
	original_nodes += len(coord_list)
	original_len += before_line.length / 1000
	original_islands += 1

	# eliminate polygons that are too small
	if poly.area > EXCLUSION_AREA:

		# how many nodes do we need?
		n_nodes = int(len(coord_list) / 100.0 * (100 - SIMPLIFICATION_PERC)) 
		
		# ensure that there are at least 3 nodes
		if n_nodes < 3:
			n_nodes = 3 

		# remove one node and overwrite it with the new, shorter list
		simplified_coord_list = visvalingam_whyatt(coord_list, n_nodes)

		# make the resulting list of coordinates into a line
		after_line = LineString(simplified_coord_list)
		simplified_lines.append(after_line)

		# update counters
		simplified_nodes += len(simplified_coord_list)
		simplified_len += after_line.length / 1000
		simplified_islands += 1

print(f"original node count: {original_nodes}")
print(f"original length: {original_len:.2f}km\n")

print(f"simplified node count: {simplified_nodes}")
print(f"simplified length: {simplified_len:.2f}km")

# create map axis object, with two axes (maps)
fig, my_axs = subplots(1, 2, figsize=(16, 10))

# set titles
fig.suptitle("The Length of the Coastline of The United Kingdom")
my_axs[0].set_title(f"Original:\n{original_islands} islands, {original_len:,.0f}km, {original_nodes} nodes.")
my_axs[1].set_title(f"{SIMPLIFICATION_PERC}% Simplified & {EXCLUSION_AREA / 1000:,.0f}sq.km Exclusion:\n{simplified_islands} islands, {simplified_len:,.0f}km, {simplified_nodes} nodes.")

# reduce the gap between the subplots
subplots_adjust(wspace=0)

# add the original coastline
GeoSeries(original_lines, crs=osgb).plot(
    ax=my_axs[0],
    color='blue',
    linewidth = 0.6,
	)

# add the new coastline
GeoSeries(simplified_lines, crs=osgb).plot(
    ax=my_axs[1],
    color='red',
    linewidth = 0.6,
	)

# edit individual axis
for my_ax in my_axs:

	# remove axes
	my_ax.axis('off')

	# add north arrow
	x, y, arrow_length = 0.95, 0.99, 0.1
	my_ax.annotate('N', xy=(x, y), xytext=(x, y-arrow_length),
		arrowprops=dict(facecolor='black', width=5, headwidth=15),
		ha='center', va='center', fontsize=20, xycoords=my_ax.transAxes)

	# add scalebar
	my_ax.add_artist(ScaleBar(dx=1, units="m", location="lower right"))

# save the result
savefig(f'out/4-multi.png', bbox_inches='tight')
print("done!")