import geopandas as gpd
import pandas as pd
import glob
def get_processed_region_ids(directory):
return [int(s.split('_')[-1].split('.')[0]) for s in glob.glob(directory + '*')]
Morphometic processing
= '/data/uscuni-eurofab/regions/buildings/'
regions_buildings_dir = '/data/uscuni-eurofab/processed_data/buildings/'
buildings_dir = '/data/uscuni-eurofab/overture_streets/'
overture_streets_dir = '/data/uscuni-eurofab/processed_data/streets/'
streets_dir = '/data/uscuni-eurofab/processed_data/enclosures/'
enclosures_dir = '/data/uscuni-eurofab/processed_data/tessellations/'
tessellations_dir = '/data/uscuni-eurofab/processed_data/neigh_graphs/'
graph_dir = '/data/uscuni-eurofab/processed_data/chars/'
chars_dir = '/data/uscuni-eurofab/processed_data/simplified_buildings/'
simplfied_buildings_dir
= "/data/uscuni-eurofab/" regions_datadir
= "/data/uscuni-eurofab/"
regions_datadir = gpd.read_parquet(
region_hulls + "regions/" + "ms_ce_region_hulls.parquet"
regions_datadir
) region_hulls.shape
(474, 1)
= 53490 region_id
Process buildings
Run the building processing pipeline for all regions
from core.generate_buildings import read_region_buildings, process_region_buildings
def process_single_region_buildings(region_id):
print('processing', region_id)
= gpd.read_parquet(regions_buildings_dir + f'buildings_{region_id}.pq')
buildings = process_region_buildings(buildings, True, simplification_tolerance=.1, merge_limit=25)
buildings + f"buildings_{region_id}.parquet") buildings.to_parquet(simplfied_buildings_dir
%%capture cap
for region_id, _ in region_hulls.iterrows():
process_single_region_buildings(region_id)
Copy over the simplified buildings to the processed data folder.
# !cp -r /data/uscuni-eurofab/processed_data/simplified_buildings/ /data/uscuni-eurofab/processed_data/buildings/
Process streets
Run the street processing pipeline for all regions.
from core.generate_streets import process_region_streets
%%capture cap
for region_id, _ in region_hulls.iterrows():
= process_region_streets(region_id, overture_streets_dir, buildings_dir)
streets + f'streets_{region_id}.parquet') streets.to_parquet(streets_dir
= 109005 region_id
Process elements
Run the element generating pipeline for all regions.
from core.generate_elements import process_region_elements, generate_enclosures_representative_points, generate_tess
= get_processed_region_ids(tessellations_dir) processed_region_ids
for region_id, _ in region_hulls[~region_hulls.index.isin(processed_region_ids)].iterrows():
= process_region_elements(buildings_dir, streets_dir, region_id)
enclosures, tesselations
+ f"enclosure_{region_id}.parquet")
enclosures.to_parquet(enclosures_dir print("Processed enclosures")
## save files
tesselations.to_parquet(+ f"tessellation_{region_id}.parquet"
tessellations_dir
)print("processed tesselations")
Process graphs
Run the graph generating pipeline for all regions.
from core.generate_ngraphs import process_region_graphs
for region_id, _ in region_hulls.iterrows():
process_region_graphs(
region_id,
graph_dir,
buildings_dir,
streets_dir,
enclosures_dir,
tessellations_dir, )
Process morphometrics
Run the morphometric character processing pipeline for all regions.
from core.generate_chars import process_single_region_chars, process_building_chars
for region_id, _ in region_hulls.iterrows():
process_single_region_chars(
region_id,
graph_dir,
buildings_dir,
streets_dir,
enclosures_dir,
tessellations_dir,
chars_dir )
Merge data
Merge all the building, street, nodes, enclosures and ETCs morphometric.
from core.generate_merged_primary_chars import merge_into_primary
from core.utils import used_keys
for region_id, _ in region_hulls.iterrows():
= gpd.read_parquet(chars_dir + f"tessellations_chars_{region_id}.parquet")
tessellation = gpd.read_parquet(chars_dir + f"buildings_chars_{region_id}.parquet")
buildings = gpd.read_parquet(chars_dir + f"enclosures_chars_{region_id}.parquet")
enclosures = gpd.read_parquet(chars_dir + f"streets_chars_{region_id}.parquet")
streets = gpd.read_parquet(chars_dir + f"nodes_chars_{region_id}.parquet")
nodes
= pd.merge(
merged =["geometry"]),
tessellation.drop(columns=["nodeID", "geometry", 'nID']),
buildings.drop(columns=True,
right_index=True,
left_index="left",
how
)
= merged.merge(
merged ="geometry"),
enclosures.drop(columns="eID",
right_on="enclosure_index",
left_on="left",
how
)
= merged.merge(streets.drop(columns="geometry"), on="nID", how="left")
merged = merged.merge(nodes.drop(columns="geometry"), on="nodeID", how="left")
merged
= merged.drop(
merged =[
columns"nID",
"eID",
"nodeID",
"mm_len",
"cdsbool",
"node_start",
"node_end",
# "x",
# "y",
"enclosure_index",
# "id",
# "osm_id",
# "index", ## maybe keep
]
)= merged.set_index(tessellation.index)
merged
= merged[list(used_keys.keys())]
primary + f'primary_chars_{region_id}.parquet') primary.to_parquet(chars_dir