Assign ETCs to hexagons

Assign all ETCs in all regions to h3 hexagons, which will be used to limit spatial linkage in the model train/test split.

import h3
import shapely
import pandas as pd
import tobler
import geopandas as gpd
h3_resolution = 7
regions_datadir = "/data/uscuni-eurofab-overture/"
tessellations_dir = '/data/uscuni-eurofab-overture/processed_data/tessellations/'
buildings_dir = '/data/uscuni-eurofab-overture/processed_data/buildings/'

region_hulls = gpd.read_parquet(
        regions_datadir + "regions/" + "ov_ce_region_hulls.parquet"
    )
region_hulls.shape
(709, 1)
def assign_hexagons(region_id, region_hull):
    '''Assign all ETCs in a reigion to h3 hexagons.'''
    
    ## split region hull into hexagons
    bounds = region_hull.iloc[0]
    poly = h3.geo_to_cells(bounds, res=h3_resolution)
    res = [shapely.geometry.shape(h3.cells_to_geo([p])) for p in poly]
    hexagons = gpd.GeoSeries(res, index=poly,name='geometry', crs='epsg:4326').to_crs(epsg=3035)

    tess = gpd.read_parquet(
            tessellations_dir + f"tessellation_{region_id}.parquet"
    )

    # assign hexagons to tessellation cells
    inp, res = tess.sindex.query(hexagons, predicate='intersects')
    # polygons should be assigned to only one h3 grid
    duplicated = pd.Series(res).duplicated()
    inp = inp[~duplicated]
    res = res[~duplicated]
    
    hex_assignments = pd.Series(hexagons.index[inp].values, tess.index[res], name='hexagons').sort_index()
    return hex_assignments
%%time
for region_id, region_hull in region_hulls.to_crs(epsg=4326).iterrows():
    print(region_id)
    hex_assignments = assign_hexagons(region_id, region_hull)
    hex_assignments.reset_index().to_parquet(f'/data/uscuni-eurofab-overture/processed_data/hexagons/{region_id}_hexagon.pq')
4
5
20
356
401
646
687
810
1014
1049
1054
1117
1321
1476
1598
1642
1827
1940
2110
2112
2268
2304
2485
2779
2846
2885
2997
3086
3147
3256
3335
3408
3415
3462
3631
3751
3758
3770
3888
3892
3907
4040
4066
4347
4356
4798
4833
5084
5388
5513
5832
6004
6037
6085
6090
6108
6574
6612
6738
6918
7098
7111
7125
7355
7607
7634
7674
7688
7823
7867
7962
8160
8213
8245
8272
8320
8360
8438
8854
9048
9147
9310
9353
9656
9678
9728
9754
9830
9871
9981
10078
10135
10196
10385
10446
10454
10574
10771
10846
10939
10957
10962
11055
11196
11282
11345
11633
11700
11785
11818
11855
11860
11944
12041
12214
12311
12363
12458
12493
12502
12511
12551
12590
12647
12815
13347
13348
13385
13475
13566
13633
13928
14064
14137
14242
14328
14404
14459
14496
14663
14759
14766
14946
15176
15241
15338
15351
15406
15416
15882
16414
16430
16439
17204
17373
17744
17780
17846
17892
17933
18116
18127
18230
19263
19442
19537
19811
19906
20034
20332
20490
20556
20796
20861
21115
21229
21365
21811
22026
22246
22417
22543
22552
23103
23108
23891
24036
24098
24121
24362
24609
24665
25001
25412
25423
25542
25779
25875
25924
25945
25951
26132
26568
26659
26798
27160
27223
27224
27423
27610
27765
27832
27846
28020
28131
28132
28318
28858
28907
29070
29520
29591
29595
30057
30339
30406
30718
30802
30879
30976
31083
31361
31466
31810
31952
32398
32484
32698
32854
32928
33078
33284
33413
33475
33559
33710
33764
33899
33954
33987
34203
34239
34737
34746
34842
34910
34995
35093
35245
35381
35747
36318
36322
36469
36518
36673
36704
36769
36937
37025
37052
37215
37507
37680
37901
37959
38118
38266
38404
38689
38711
38834
38944
39222
39227
39379
39679
40159
40279
40343
40994
41074
41392
41479
41553
41592
41720
42149
42217
42778
42900
43046
43163
43358
43697
43706
44026
44124
44296
44370
44818
44954
45036
45229
45232
45255
45287
45693
45712
45927
45982
46007
46134
46356
46617
47043
47145
47182
47309
47986
48005
48597
48608
48627
48972
49034
49124
49274
49375
49577
50118
50194
50358
50553
50899
50990
51150
51204
51394
51406
51741
51847
51931
52205
53023
53391
53395
53644
53674
53811
53816
54043
54449
54500
54902
55045
55183
55255
55407
55421
55554
55981
56018
56155
57071
57160
57400
57642
57659
57923
58459
58597
59055
59218
59744
60257
60384
60640
60988
61019
61065
61071
61320
61437
61563
62171
62297
62341
62562
62749
62764
63344
63357
63677
63758
63855
63960
64539
64546
64638
64807
65014
65057
65198
65371
65565
65630
65680
66292
66335
66753
67169
67491
67612
68353
68706
68876
68882
69134
69159
69198
69502
70724
70832
70993
71005
71006
71021
71935
72129
72807
72892
73061
73516
73704
73741
74731
75342
75509
75745
76136
76353
76485
77286
77565
77791
78483
78714
79306
80132
80151
80175
80385
80622
80892
81218
81675
81914
82077
82594
82632
82799
83254
83289
83458
83752
84131
84201
84232
84542
84568
85004
85118
85345
86120
86197
86533
86686
86688
86691
86878
86974
86999
87377
87722
87781
87987
88191
88233
88782
89293
89298
89686
89824
90010
90283
90357
90905
90996
91132
91199
91484
92200
92570
93688
94293
94722
94733
95532
95556
95625
95783
95833
95876
96460
96489
96543
96840
97391
97581
97670
97733
97783
98043
98105
98208
98254
98402
98561
99170
99298
99449
99663
99840
99844
99890
99973
100065
100273
100502
100588
100668
100886
100951
101004
101076
101276
101333
101468
101479
101552
101573
101868
102307
102483
102649
102660
102728
102951
103240
103310
103669
103701
103757
103841
103955
103995
104650
104689
104700
104774
104942
105139
105549
105598
105620
106235
106236
106409
106603
106690
106876
106891
106928
106972
107132
107471
107621
107675
107744
108119
108129
108805
108885
108886
108938
109204
109339
109648
109769
109915
110149
110569
110785
111027
111785
111990
112303
112689
112750
113056
113175
113257
113603
113842
114614
114684
115038
115302
115459
115629
116188
116245
116433
116665
117011
117344
117715
117754
117987
118186
118968
119151
119682
119838
120289
120682
120707
121047
121247
121367
121511
121543
121638
121880
122862
123006
123161
123183
123496
123530
123553
123931
124010
124188
124343
124658
124693
125382
125435
125595
125714
125931
126020
126123
126342
126817
126919
127339
127965
128383
129133
129633
129657
129981
130016
130054
130057
130435
130457
130576
130652
131601
131732
132082
132096
132420
132991
133341
133415
133506
133629
133675
133728
134690
134697
134988
135194
135398
CPU times: user 4min, sys: 47.8 s, total: 4min 48s
Wall time: 4min 27s

Explore assignment

region_id = 65806
region_id = 66292
hex_assignments = pd.read_parquet(f'/data/uscuni-eurofab-overture/processed_data/hexagons/{region_id}_hexagon.pq').set_index('index')
selected = hex_assignments[hex_assignments.hexagons == '871e354ddffffff'].index
selected.shape
(1383,)
tess = gpd.read_parquet(
            tessellations_dir + f"tessellation_{region_id}.parquet"
    )
tess.loc[selected].explore()
Make this Notebook Trusted to load map: File -> Trust Notebook