pgLatLon
changeset 10:684a78d2f9f0
Introduced lossy overlap operator (&&+) and fixed ecircle overlap searches on GiST-indexed ecluster columns
author | jbe |
---|---|
date | Fri Sep 02 14:08:01 2016 +0200 (2016-09-02) |
parents | dc8da756b761 |
children | 7c1c76e7d341 |
files | README.mkd create_test_db.data.sql create_test_db.schema.sql latlon--0.3.sql latlon-v0003.c |
line diff
1.1 --- a/README.mkd Fri Sep 02 14:04:04 2016 +0200 1.2 +++ b/README.mkd Fri Sep 02 14:08:01 2016 +0200 1.3 @@ -1,4 +1,4 @@ 1.4 -pgLatLon v0.1 documentation 1.5 +pgLatLon v0.3 documentation 1.6 =========================== 1.7 1.8 pgLatLon is a spatial database extension for the PostgreSQL object-relational 1.9 @@ -6,12 +6,15 @@ 1.10 for the WGS-84 spheroid. 1.11 1.12 While many other spatial databases still use imprecise bounding boxes for many 1.13 -operations, pgLatLon supports more precise geometric calculations for all 1.14 -implemented operators. Efficient indexing of geometric objects is provided 1.15 +operations, pgLatLon aims to support more precise geometric calculations for 1.16 +all implemented operators. Efficient indexing of geometric objects is provided 1.17 using space-filling fractal curves. Optimizations on bit level (including 1.18 logarithmic compression) allow for a highly memory-efficient non-overlapping 1.19 index suitable for huge datasets. 1.20 1.21 +pgLatLon is a lightweight solution as it only depends on PostgreSQL itself (and 1.22 +a C compiler for building). 1.23 + 1.24 Unlike competing spatial extensions for PostgreSQL, pgLatLon is available under 1.25 the permissive MIT/X11 license to avoid problems with viral licenses like the 1.26 GPLv2/v3. 1.27 @@ -239,6 +242,24 @@ 1.28 The `&&` operator is commutative, i.e. `a && b` is the same as `b && a`. Each 1.29 commutation is supported as well. 1.30 1.31 +#### Lossy overlap operator `&&+` 1.32 + 1.33 +Tests if two geographic objects may have at least one point in common. Opposed 1.34 +to the `&&` operator, the `&&+` operator may return false positives and is 1.35 +currently implemented for: 1.36 + 1.37 +* `epoint &&+ ecluster` 1.38 +* `ebox &&+ ecircle` 1.39 +* `ebox &&+ ecluster` 1.40 +* `ecircle &&+ ecluster` 1.41 +* `ecluster &&+ ecluster` 1.42 + 1.43 +The `&&+` operator is commutative, i.e. `a &&+ b` is the same as `b &&+ a`. Each 1.44 +commutation is supported as well. 1.45 + 1.46 +Where two data types support both the `&&` and the `&&+` operator, the `&&+` 1.47 +operator computes faster. 1.48 + 1.49 #### Distance operator `<->` 1.50 1.51 Calculates the shortest distance between two geographic objects in meters (zero
2.1 --- a/create_test_db.data.sql Fri Sep 02 14:04:04 2016 +0200 2.2 +++ b/create_test_db.data.sql Fri Sep 02 14:08:01 2016 +0200 2.3 @@ -1,6 +1,24 @@ 2.4 2.5 -INSERT INTO "test" ("location", "area") SELECT 2.6 +CREATE OR REPLACE FUNCTION tmp_three_points() 2.7 + RETURNS epoint[] 2.8 + LANGUAGE plpgsql VOLATILE AS $$ 2.9 + DECLARE 2.10 + p1 epoint; 2.11 + p2 epoint; 2.12 + p3 epoint; 2.13 + BEGIN 2.14 + p1 := epoint((asin(2*random()-1) / pi()) * 180, (2*random()-1) * 180); 2.15 + p2 := epoint(latitude(p1) + (2*random()-1), longitude(p1) + cos(latitude(p1)/180*pi()) * (2*random()-1)); 2.16 + p3 := epoint(latitude(p1) + (2*random()-1), longitude(p1) + cos(latitude(p1)/180*pi()) * (2*random()-1)); 2.17 + RETURN ARRAY[p1, p2, p3]; 2.18 + END; 2.19 + $$; 2.20 + 2.21 +INSERT INTO "test" ("location", "surrounding", "multipoint", "triangle") SELECT 2.22 epoint((asin(2*random()-1) / pi()) * 180, (2*random()-1) * 180), 2.23 - ecircle((asin(2*random()-1) / pi()) * 180, (2*random()-1) * 180, -ln(1-random()) * 1000) 2.24 + ecircle((asin(2*random()-1) / pi()) * 180, (2*random()-1) * 180, -ln(1-random()) * 1000), 2.25 + ecluster_create_multipoint(tmp_three_points()), 2.26 + ecluster_create_polygon(tmp_three_points()) 2.27 FROM generate_series(1, 10000); 2.28 2.29 +DROP FUNCTION tmp_three_points();
3.1 --- a/create_test_db.schema.sql Fri Sep 02 14:04:04 2016 +0200 3.2 +++ b/create_test_db.schema.sql Fri Sep 02 14:08:01 2016 +0200 3.3 @@ -4,8 +4,12 @@ 3.4 CREATE TABLE "test" ( 3.5 "id" SERIAL4 PRIMARY KEY, 3.6 "location" EPOINT NOT NULL, 3.7 - "area" ECIRCLE NOT NULL ); 3.8 + "surrounding" ECIRCLE NOT NULL, 3.9 + "multipoint" ECLUSTER NOT NULL, 3.10 + "triangle" ECLUSTER NOT NULL ); 3.11 3.12 -CREATE INDEX "test_location_key" ON "test" USING gist ("location"); 3.13 -CREATE INDEX "test_area_key" ON "test" USING gist ("area"); 3.14 +CREATE INDEX "test_location_key" ON "test" USING gist ("location"); 3.15 +CREATE INDEX "test_surrounding_key" ON "test" USING gist ("surrounding"); 3.16 +CREATE INDEX "test_multipoint_key" ON "test" USING gist ("multipoint"); 3.17 +CREATE INDEX "test_triangle_key" ON "test" USING gist ("triangle"); 3.18
4.1 --- a/latlon--0.3.sql Fri Sep 02 14:04:04 2016 +0200 4.2 +++ b/latlon--0.3.sql Fri Sep 02 14:08:01 2016 +0200 4.3 @@ -732,11 +732,26 @@ 4.4 LANGUAGE C IMMUTABLE STRICT 4.5 AS '$libdir/latlon-v0003', 'pgl_epoint_ecluster_overlap'; 4.6 4.7 +CREATE FUNCTION epoint_ecluster_may_overlap_proc(epoint, ecluster) 4.8 + RETURNS boolean 4.9 + LANGUAGE C IMMUTABLE STRICT 4.10 + AS '$libdir/latlon-v0003', 'pgl_epoint_ecluster_may_overlap'; 4.11 + 4.12 CREATE FUNCTION ebox_overlap_proc(ebox, ebox) 4.13 RETURNS boolean 4.14 LANGUAGE C IMMUTABLE STRICT 4.15 AS '$libdir/latlon-v0003', 'pgl_ebox_overlap'; 4.16 4.17 +CREATE FUNCTION ebox_ecircle_may_overlap_proc(ebox, ecircle) 4.18 + RETURNS boolean 4.19 + LANGUAGE C IMMUTABLE STRICT 4.20 + AS '$libdir/latlon-v0003', 'pgl_ebox_ecircle_may_overlap'; 4.21 + 4.22 +CREATE FUNCTION ebox_ecluster_may_overlap_proc(ebox, ecluster) 4.23 + RETURNS boolean 4.24 + LANGUAGE C IMMUTABLE STRICT 4.25 + AS '$libdir/latlon-v0003', 'pgl_ebox_ecluster_may_overlap'; 4.26 + 4.27 CREATE FUNCTION ecircle_overlap_proc(ecircle, ecircle) 4.28 RETURNS boolean 4.29 LANGUAGE C IMMUTABLE STRICT 4.30 @@ -747,6 +762,16 @@ 4.31 LANGUAGE C IMMUTABLE STRICT 4.32 AS '$libdir/latlon-v0003', 'pgl_ecircle_ecluster_overlap'; 4.33 4.34 +CREATE FUNCTION ecircle_ecluster_may_overlap_proc(ecircle, ecluster) 4.35 + RETURNS boolean 4.36 + LANGUAGE C IMMUTABLE STRICT 4.37 + AS '$libdir/latlon-v0003', 'pgl_ecircle_ecluster_may_overlap'; 4.38 + 4.39 +CREATE FUNCTION ecluster_may_overlap_proc(ecluster, ecluster) 4.40 + RETURNS boolean 4.41 + LANGUAGE C IMMUTABLE STRICT 4.42 + AS '$libdir/latlon-v0003', 'pgl_ecluster_may_overlap'; 4.43 + 4.44 CREATE FUNCTION epoint_distance_proc(epoint, epoint) 4.45 RETURNS float8 4.46 LANGUAGE C IMMUTABLE STRICT 4.47 @@ -878,6 +903,103 @@ 4.48 join = areajoinsel 4.49 ); 4.50 4.51 +CREATE OPERATOR &&+ ( 4.52 + leftarg = epoint, 4.53 + rightarg = ecluster, 4.54 + procedure = epoint_ecluster_may_overlap_proc, 4.55 + commutator = &&+, 4.56 + restrict = areasel, 4.57 + join = areajoinsel 4.58 +); 4.59 + 4.60 +CREATE FUNCTION epoint_ecluster_may_overlap_commutator(ecluster, epoint) 4.61 + RETURNS boolean 4.62 + LANGUAGE sql IMMUTABLE AS 'SELECT $2 &&+ $1'; 4.63 + 4.64 +CREATE OPERATOR &&+ ( 4.65 + leftarg = ecluster, 4.66 + rightarg = epoint, 4.67 + procedure = epoint_ecluster_may_overlap_commutator, 4.68 + commutator = &&+, 4.69 + restrict = areasel, 4.70 + join = areajoinsel 4.71 +); 4.72 + 4.73 +CREATE OPERATOR &&+ ( 4.74 + leftarg = ebox, 4.75 + rightarg = ecircle, 4.76 + procedure = ebox_ecircle_may_overlap_proc, 4.77 + commutator = &&+, 4.78 + restrict = areasel, 4.79 + join = areajoinsel 4.80 +); 4.81 + 4.82 +CREATE FUNCTION ebox_ecircle_may_overlap_commutator(ecircle, ebox) 4.83 + RETURNS boolean 4.84 + LANGUAGE sql IMMUTABLE AS 'SELECT $2 &&+ $1'; 4.85 + 4.86 +CREATE OPERATOR &&+ ( 4.87 + leftarg = ecircle, 4.88 + rightarg = ebox, 4.89 + procedure = ebox_ecircle_may_overlap_commutator, 4.90 + commutator = &&+, 4.91 + restrict = areasel, 4.92 + join = areajoinsel 4.93 +); 4.94 + 4.95 +CREATE OPERATOR &&+ ( 4.96 + leftarg = ebox, 4.97 + rightarg = ecluster, 4.98 + procedure = ebox_ecluster_may_overlap_proc, 4.99 + commutator = &&+, 4.100 + restrict = areasel, 4.101 + join = areajoinsel 4.102 +); 4.103 + 4.104 +CREATE FUNCTION ebox_ecluster_may_overlap_commutator(ecluster, ebox) 4.105 + RETURNS boolean 4.106 + LANGUAGE sql IMMUTABLE AS 'SELECT $2 &&+ $1'; 4.107 + 4.108 +CREATE OPERATOR &&+ ( 4.109 + leftarg = ecluster, 4.110 + rightarg = ebox, 4.111 + procedure = ebox_ecluster_may_overlap_commutator, 4.112 + commutator = &&+, 4.113 + restrict = areasel, 4.114 + join = areajoinsel 4.115 +); 4.116 + 4.117 +CREATE OPERATOR &&+ ( 4.118 + leftarg = ecircle, 4.119 + rightarg = ecluster, 4.120 + procedure = ecircle_ecluster_may_overlap_proc, 4.121 + commutator = &&+, 4.122 + restrict = areasel, 4.123 + join = areajoinsel 4.124 +); 4.125 + 4.126 +CREATE FUNCTION ecircle_ecluster_may_overlap_commutator(ecluster, ecircle) 4.127 + RETURNS boolean 4.128 + LANGUAGE sql IMMUTABLE AS 'SELECT $2 &&+ $1'; 4.129 + 4.130 +CREATE OPERATOR &&+ ( 4.131 + leftarg = ecluster, 4.132 + rightarg = ecircle, 4.133 + procedure = ecircle_ecluster_may_overlap_commutator, 4.134 + commutator = &&+, 4.135 + restrict = areasel, 4.136 + join = areajoinsel 4.137 +); 4.138 + 4.139 +CREATE OPERATOR &&+ ( 4.140 + leftarg = ecluster, 4.141 + rightarg = ecluster, 4.142 + procedure = ecluster_may_overlap_proc, 4.143 + commutator = &&+, 4.144 + restrict = areasel, 4.145 + join = areajoinsel 4.146 +); 4.147 + 4.148 CREATE OPERATOR <-> ( 4.149 leftarg = epoint, 4.150 rightarg = epoint, 4.151 @@ -1003,13 +1125,14 @@ 4.152 4.153 CREATE OPERATOR CLASS epoint_ops 4.154 DEFAULT FOR TYPE epoint USING gist AS 4.155 - OPERATOR 11 = , 4.156 - OPERATOR 22 && (epoint, ebox), 4.157 - OPERATOR 23 && (epoint, ecircle), 4.158 - OPERATOR 24 && (epoint, ecluster), 4.159 - OPERATOR 31 <-> (epoint, epoint) FOR ORDER BY float_ops, 4.160 - OPERATOR 33 <-> (epoint, ecircle) FOR ORDER BY float_ops, 4.161 - OPERATOR 34 <-> (epoint, ecluster) FOR ORDER BY float_ops, 4.162 + OPERATOR 11 = , 4.163 + OPERATOR 22 && (epoint, ebox), 4.164 + OPERATOR 23 && (epoint, ecircle), 4.165 + OPERATOR 24 && (epoint, ecluster), 4.166 + OPERATOR 124 &&+ (epoint, ecluster), 4.167 + OPERATOR 31 <-> (epoint, epoint) FOR ORDER BY float_ops, 4.168 + OPERATOR 33 <-> (epoint, ecircle) FOR ORDER BY float_ops, 4.169 + OPERATOR 34 <-> (epoint, ecluster) FOR ORDER BY float_ops, 4.170 FUNCTION 1 pgl_gist_consistent(internal, internal, smallint, oid, internal), 4.171 FUNCTION 2 pgl_gist_union(internal, internal), 4.172 FUNCTION 3 pgl_gist_compress_epoint(internal), 4.173 @@ -1022,13 +1145,15 @@ 4.174 4.175 CREATE OPERATOR CLASS ecircle_ops 4.176 DEFAULT FOR TYPE ecircle USING gist AS 4.177 - OPERATOR 13 = , 4.178 - OPERATOR 21 && (ecircle, epoint), 4.179 - OPERATOR 23 && (ecircle, ecircle), 4.180 - OPERATOR 24 && (ecircle, ecluster), 4.181 - OPERATOR 31 <-> (ecircle, epoint) FOR ORDER BY float_ops, 4.182 - OPERATOR 33 <-> (ecircle, ecircle) FOR ORDER BY float_ops, 4.183 - OPERATOR 34 <-> (ecircle, ecluster) FOR ORDER BY float_ops, 4.184 + OPERATOR 13 = , 4.185 + OPERATOR 21 && (ecircle, epoint), 4.186 + OPERATOR 122 &&+ (ecircle, ebox), 4.187 + OPERATOR 23 && (ecircle, ecircle), 4.188 + OPERATOR 24 && (ecircle, ecluster), 4.189 + OPERATOR 124 &&+ (ecircle, ecluster), 4.190 + OPERATOR 31 <-> (ecircle, epoint) FOR ORDER BY float_ops, 4.191 + OPERATOR 33 <-> (ecircle, ecircle) FOR ORDER BY float_ops, 4.192 + OPERATOR 34 <-> (ecircle, ecluster) FOR ORDER BY float_ops, 4.193 FUNCTION 1 pgl_gist_consistent(internal, internal, smallint, oid, internal), 4.194 FUNCTION 2 pgl_gist_union(internal, internal), 4.195 FUNCTION 3 pgl_gist_compress_ecircle(internal), 4.196 @@ -1041,7 +1166,12 @@ 4.197 4.198 CREATE OPERATOR CLASS ecluster_ops 4.199 DEFAULT FOR TYPE ecluster USING gist AS 4.200 - OPERATOR 21 && (ecluster, epoint), 4.201 + OPERATOR 21 && (ecluster, epoint), 4.202 + OPERATOR 121 &&+ (ecluster, epoint), 4.203 + OPERATOR 122 &&+ (ecluster, ebox), 4.204 + OPERATOR 23 && (ecluster, ecircle), 4.205 + OPERATOR 123 &&+ (ecluster, ecircle), 4.206 + OPERATOR 124 &&+ (ecluster, ecluster), 4.207 FUNCTION 1 pgl_gist_consistent(internal, internal, smallint, oid, internal), 4.208 FUNCTION 2 pgl_gist_union(internal, internal), 4.209 FUNCTION 3 pgl_gist_compress_ecluster(internal),
5.1 --- a/latlon-v0003.c Fri Sep 02 14:04:04 2016 +0200 5.2 +++ b/latlon-v0003.c Fri Sep 02 14:08:01 2016 +0200 5.3 @@ -2107,6 +2107,19 @@ 5.4 PG_RETURN_BOOL(retval); 5.5 } 5.6 5.7 +/* check if point may be inside cluster (lossy overl. operator "&&+") in SQL */ 5.8 +PG_FUNCTION_INFO_V1(pgl_epoint_ecluster_may_overlap); 5.9 +Datum pgl_epoint_ecluster_may_overlap(PG_FUNCTION_ARGS) { 5.10 + pgl_point *point = (pgl_point *)PG_GETARG_POINTER(0); 5.11 + pgl_cluster *cluster = (pgl_cluster *)PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); 5.12 + bool retval = pgl_distance( 5.13 + point->lat, point->lon, 5.14 + cluster->bounding.center.lat, cluster->bounding.center.lon 5.15 + ) <= cluster->bounding.radius; 5.16 + PG_FREE_IF_COPY(cluster, 1); 5.17 + PG_RETURN_BOOL(retval); 5.18 +} 5.19 + 5.20 /* check if two boxes overlap (overlap operator "&&") in SQL */ 5.21 PG_FUNCTION_INFO_V1(pgl_ebox_overlap); 5.22 Datum pgl_ebox_overlap(PG_FUNCTION_ARGS) { 5.23 @@ -2115,6 +2128,29 @@ 5.24 PG_RETURN_BOOL(pgl_boxes_overlap(box1, box2)); 5.25 } 5.26 5.27 +/* check if box and circle may overlap (lossy overl. operator "&&+") in SQL */ 5.28 +PG_FUNCTION_INFO_V1(pgl_ebox_ecircle_may_overlap); 5.29 +Datum pgl_ebox_ecircle_may_overlap(PG_FUNCTION_ARGS) { 5.30 + pgl_box *box = (pgl_box *)PG_GETARG_POINTER(0); 5.31 + pgl_circle *circle = (pgl_circle *)PG_GETARG_POINTER(1); 5.32 + PG_RETURN_BOOL( 5.33 + pgl_estimate_point_box_distance(&circle->center, box) <= circle->radius 5.34 + ); 5.35 +} 5.36 + 5.37 +/* check if box and cluster may overlap (lossy overl. operator "&&+") in SQL */ 5.38 +PG_FUNCTION_INFO_V1(pgl_ebox_ecluster_may_overlap); 5.39 +Datum pgl_ebox_ecluster_may_overlap(PG_FUNCTION_ARGS) { 5.40 + pgl_box *box = (pgl_box *)PG_GETARG_POINTER(0); 5.41 + pgl_cluster *cluster = (pgl_cluster *)PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); 5.42 + bool retval = pgl_estimate_point_box_distance( 5.43 + &cluster->bounding.center, 5.44 + box 5.45 + ) <= cluster->bounding.radius; 5.46 + PG_FREE_IF_COPY(cluster, 1); 5.47 + PG_RETURN_BOOL(retval); 5.48 +} 5.49 + 5.50 /* check if two circles overlap (overlap operator "&&") in SQL */ 5.51 PG_FUNCTION_INFO_V1(pgl_ecircle_overlap); 5.52 Datum pgl_ecircle_overlap(PG_FUNCTION_ARGS) { 5.53 @@ -2140,6 +2176,33 @@ 5.54 PG_RETURN_BOOL(retval); 5.55 } 5.56 5.57 +/* check if circle and cluster may be overlap (l. ov. operator "&&+") in SQL */ 5.58 +PG_FUNCTION_INFO_V1(pgl_ecircle_ecluster_may_overlap); 5.59 +Datum pgl_ecircle_ecluster_may_overlap(PG_FUNCTION_ARGS) { 5.60 + pgl_circle *circle = (pgl_circle *)PG_GETARG_POINTER(0); 5.61 + pgl_cluster *cluster = (pgl_cluster *)PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); 5.62 + bool retval = pgl_distance( 5.63 + circle->center.lat, circle->center.lon, 5.64 + cluster->bounding.center.lat, cluster->bounding.center.lon 5.65 + ) <= circle->radius + cluster->bounding.radius; 5.66 + PG_FREE_IF_COPY(cluster, 1); 5.67 + PG_RETURN_BOOL(retval); 5.68 +} 5.69 + 5.70 +/* check if two clusters may overlap (lossy overlap operator "&&+") in SQL */ 5.71 +PG_FUNCTION_INFO_V1(pgl_ecluster_may_overlap); 5.72 +Datum pgl_ecluster_may_overlap(PG_FUNCTION_ARGS) { 5.73 + pgl_cluster *cluster1 = (pgl_cluster *)PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); 5.74 + pgl_cluster *cluster2 = (pgl_cluster *)PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); 5.75 + bool retval = pgl_distance( 5.76 + cluster1->bounding.center.lat, cluster1->bounding.center.lon, 5.77 + cluster2->bounding.center.lat, cluster2->bounding.center.lon 5.78 + ) <= cluster1->bounding.radius + cluster2->bounding.radius; 5.79 + PG_FREE_IF_COPY(cluster1, 0); 5.80 + PG_FREE_IF_COPY(cluster2, 1); 5.81 + PG_RETURN_BOOL(retval); 5.82 +} 5.83 + 5.84 /* calculate distance between two points ("<->" operator) in SQL */ 5.85 PG_FUNCTION_INFO_V1(pgl_epoint_distance); 5.86 Datum pgl_epoint_distance(PG_FUNCTION_ARGS) { 5.87 @@ -2285,6 +2348,8 @@ 5.88 bool *recheck = (bool *)PG_GETARG_POINTER(4); 5.89 /* demand recheck because index and query methods are lossy */ 5.90 *recheck = true; 5.91 + /* strategy number aliases for different operators using the same strategy */ 5.92 + strategy %= 100; 5.93 /* strategy number 11: equality of two points */ 5.94 if (strategy == 11) { 5.95 /* query datum is another point */ 5.96 @@ -2653,6 +2718,8 @@ 5.97 /* demand recheck because distance is just an estimation */ 5.98 /* (real distance may be bigger) */ 5.99 *recheck = true; 5.100 + /* strategy number aliases for different operators using the same strategy */ 5.101 + strategy %= 100; 5.102 /* strategy number 31: distance to point */ 5.103 if (strategy == 31) { 5.104 /* query datum is a point */