Morphometic processing

import geopandas as gpd
import pandas as pd
import glob

def get_processed_region_ids(directory):
    return [int(s.split('_')[-1].split('.')[0]) for s in glob.glob(directory + '*')]
regions_buildings_dir = '/data/uscuni-eurofab/regions/buildings/'
buildings_dir = '/data/uscuni-eurofab/processed_data/buildings/'
overture_streets_dir = '/data/uscuni-eurofab/overture_streets/'
streets_dir = '/data/uscuni-eurofab/processed_data/streets/'
enclosures_dir = '/data/uscuni-eurofab/processed_data/enclosures/'
tessellations_dir = '/data/uscuni-eurofab/processed_data/tessellations/'
graph_dir = '/data/uscuni-eurofab/processed_data/neigh_graphs/'
chars_dir = '/data/uscuni-eurofab/processed_data/chars/'
simplfied_buildings_dir = '/data/uscuni-eurofab/processed_data/simplified_buildings/'


regions_datadir = "/data/uscuni-eurofab/"
regions_datadir = "/data/uscuni-eurofab/"
region_hulls = gpd.read_parquet(
        regions_datadir + "regions/" + "ms_ce_region_hulls.parquet"
    )
region_hulls.shape
(474, 1)
region_id = 53490

Process buildings

Run the building processing pipeline for all regions

from core.generate_buildings import read_region_buildings, process_region_buildings
def process_single_region_buildings(region_id):
    print('processing', region_id)
    buildings = gpd.read_parquet(regions_buildings_dir + f'buildings_{region_id}.pq')
    buildings = process_region_buildings(buildings, True, simplification_tolerance=.1, merge_limit=25)
    buildings.to_parquet(simplfied_buildings_dir + f"buildings_{region_id}.parquet")
%%capture cap
for region_id, _ in region_hulls.iterrows():
    process_single_region_buildings(region_id)

Copy over the simplified buildings to the processed data folder.

# !cp -r /data/uscuni-eurofab/processed_data/simplified_buildings/ /data/uscuni-eurofab/processed_data/buildings/

Process streets

Run the street processing pipeline for all regions.

from core.generate_streets import process_region_streets
%%capture cap
for region_id, _ in region_hulls.iterrows():
    streets = process_region_streets(region_id, overture_streets_dir, buildings_dir)
    streets.to_parquet(streets_dir + f'streets_{region_id}.parquet')
region_id  = 109005

Process elements

Run the element generating pipeline for all regions.

from core.generate_elements import process_region_elements, generate_enclosures_representative_points, generate_tess
processed_region_ids = get_processed_region_ids(tessellations_dir)
for region_id, _ in region_hulls[~region_hulls.index.isin(processed_region_ids)].iterrows():
    enclosures, tesselations = process_region_elements(buildings_dir, streets_dir, region_id)

    enclosures.to_parquet(enclosures_dir + f"enclosure_{region_id}.parquet")
    print("Processed enclosures")
    
    ## save files
    tesselations.to_parquet(
        tessellations_dir + f"tessellation_{region_id}.parquet"
    )
    print("processed tesselations")

Process graphs

Run the graph generating pipeline for all regions.

from core.generate_ngraphs import process_region_graphs
for region_id, _ in region_hulls.iterrows():

    process_region_graphs(
        region_id,
        graph_dir,
        buildings_dir,
        streets_dir,
        enclosures_dir,
        tessellations_dir,
    )

Process morphometrics

Run the morphometric character processing pipeline for all regions.

from core.generate_chars import process_single_region_chars, process_building_chars
for region_id, _ in region_hulls.iterrows():

    process_single_region_chars(
        region_id,
        graph_dir,
        buildings_dir,
        streets_dir,
        enclosures_dir,
        tessellations_dir,
        chars_dir
    )

Merge data

Merge all the building, street, nodes, enclosures and ETCs morphometric.

from core.generate_merged_primary_chars import merge_into_primary
from core.utils import used_keys
for region_id, _ in region_hulls.iterrows():
    tessellation = gpd.read_parquet(chars_dir + f"tessellations_chars_{region_id}.parquet")
    buildings = gpd.read_parquet(chars_dir + f"buildings_chars_{region_id}.parquet")
    enclosures = gpd.read_parquet(chars_dir + f"enclosures_chars_{region_id}.parquet")
    streets = gpd.read_parquet(chars_dir + f"streets_chars_{region_id}.parquet")
    nodes = gpd.read_parquet(chars_dir + f"nodes_chars_{region_id}.parquet")
    
    
    merged = pd.merge(
        tessellation.drop(columns=["geometry"]),
        buildings.drop(columns=["nodeID", "geometry", 'nID']),
        right_index=True,
        left_index=True,
        how="left",
    )
    
    merged = merged.merge(
        enclosures.drop(columns="geometry"),
        right_on="eID",
        left_on="enclosure_index",
        how="left",
    )
    
    merged = merged.merge(streets.drop(columns="geometry"), on="nID", how="left")
    merged = merged.merge(nodes.drop(columns="geometry"), on="nodeID", how="left")
    
    merged = merged.drop(
        columns=[
            "nID",
            "eID",
            "nodeID",
            "mm_len",
            "cdsbool",
            "node_start",
            "node_end",
            # "x",
            # "y",
            "enclosure_index",
            # "id",
            # "osm_id",
            # "index",  ## maybe keep
        ]
    )
    merged = merged.set_index(tessellation.index)
    
    primary = merged[list(used_keys.keys())]
    primary.to_parquet(chars_dir + f'primary_chars_{region_id}.parquet')