# HG changeset patch # User jbe # Date 1472818081 -7200 # Node ID 684a78d2f9f021a5b2c0cbb5f58fb4e0f18690d3 # Parent dc8da756b761c5a9ab388e983bb2cd4b03981817 Introduced lossy overlap operator (&&+) and fixed ecircle overlap searches on GiST-indexed ecluster columns diff -r dc8da756b761 -r 684a78d2f9f0 README.mkd --- a/README.mkd Fri Sep 02 14:04:04 2016 +0200 +++ b/README.mkd Fri Sep 02 14:08:01 2016 +0200 @@ -1,4 +1,4 @@ -pgLatLon v0.1 documentation +pgLatLon v0.3 documentation =========================== pgLatLon is a spatial database extension for the PostgreSQL object-relational @@ -6,12 +6,15 @@ for the WGS-84 spheroid. While many other spatial databases still use imprecise bounding boxes for many -operations, pgLatLon supports more precise geometric calculations for all -implemented operators. Efficient indexing of geometric objects is provided +operations, pgLatLon aims to support more precise geometric calculations for +all implemented operators. Efficient indexing of geometric objects is provided using space-filling fractal curves. Optimizations on bit level (including logarithmic compression) allow for a highly memory-efficient non-overlapping index suitable for huge datasets. +pgLatLon is a lightweight solution as it only depends on PostgreSQL itself (and +a C compiler for building). + Unlike competing spatial extensions for PostgreSQL, pgLatLon is available under the permissive MIT/X11 license to avoid problems with viral licenses like the GPLv2/v3. @@ -239,6 +242,24 @@ The `&&` operator is commutative, i.e. `a && b` is the same as `b && a`. Each commutation is supported as well. +#### Lossy overlap operator `&&+` + +Tests if two geographic objects may have at least one point in common. Opposed +to the `&&` operator, the `&&+` operator may return false positives and is +currently implemented for: + +* `epoint &&+ ecluster` +* `ebox &&+ ecircle` +* `ebox &&+ ecluster` +* `ecircle &&+ ecluster` +* `ecluster &&+ ecluster` + +The `&&+` operator is commutative, i.e. `a &&+ b` is the same as `b &&+ a`. Each +commutation is supported as well. + +Where two data types support both the `&&` and the `&&+` operator, the `&&+` +operator computes faster. + #### Distance operator `<->` Calculates the shortest distance between two geographic objects in meters (zero diff -r dc8da756b761 -r 684a78d2f9f0 create_test_db.data.sql --- a/create_test_db.data.sql Fri Sep 02 14:04:04 2016 +0200 +++ b/create_test_db.data.sql Fri Sep 02 14:08:01 2016 +0200 @@ -1,6 +1,24 @@ -INSERT INTO "test" ("location", "area") SELECT +CREATE OR REPLACE FUNCTION tmp_three_points() + RETURNS epoint[] + LANGUAGE plpgsql VOLATILE AS $$ + DECLARE + p1 epoint; + p2 epoint; + p3 epoint; + BEGIN + p1 := epoint((asin(2*random()-1) / pi()) * 180, (2*random()-1) * 180); + p2 := epoint(latitude(p1) + (2*random()-1), longitude(p1) + cos(latitude(p1)/180*pi()) * (2*random()-1)); + p3 := epoint(latitude(p1) + (2*random()-1), longitude(p1) + cos(latitude(p1)/180*pi()) * (2*random()-1)); + RETURN ARRAY[p1, p2, p3]; + END; + $$; + +INSERT INTO "test" ("location", "surrounding", "multipoint", "triangle") SELECT epoint((asin(2*random()-1) / pi()) * 180, (2*random()-1) * 180), - ecircle((asin(2*random()-1) / pi()) * 180, (2*random()-1) * 180, -ln(1-random()) * 1000) + ecircle((asin(2*random()-1) / pi()) * 180, (2*random()-1) * 180, -ln(1-random()) * 1000), + ecluster_create_multipoint(tmp_three_points()), + ecluster_create_polygon(tmp_three_points()) FROM generate_series(1, 10000); +DROP FUNCTION tmp_three_points(); diff -r dc8da756b761 -r 684a78d2f9f0 create_test_db.schema.sql --- a/create_test_db.schema.sql Fri Sep 02 14:04:04 2016 +0200 +++ b/create_test_db.schema.sql Fri Sep 02 14:08:01 2016 +0200 @@ -4,8 +4,12 @@ CREATE TABLE "test" ( "id" SERIAL4 PRIMARY KEY, "location" EPOINT NOT NULL, - "area" ECIRCLE NOT NULL ); + "surrounding" ECIRCLE NOT NULL, + "multipoint" ECLUSTER NOT NULL, + "triangle" ECLUSTER NOT NULL ); -CREATE INDEX "test_location_key" ON "test" USING gist ("location"); -CREATE INDEX "test_area_key" ON "test" USING gist ("area"); +CREATE INDEX "test_location_key" ON "test" USING gist ("location"); +CREATE INDEX "test_surrounding_key" ON "test" USING gist ("surrounding"); +CREATE INDEX "test_multipoint_key" ON "test" USING gist ("multipoint"); +CREATE INDEX "test_triangle_key" ON "test" USING gist ("triangle"); diff -r dc8da756b761 -r 684a78d2f9f0 latlon--0.3.sql --- a/latlon--0.3.sql Fri Sep 02 14:04:04 2016 +0200 +++ b/latlon--0.3.sql Fri Sep 02 14:08:01 2016 +0200 @@ -732,11 +732,26 @@ LANGUAGE C IMMUTABLE STRICT AS '$libdir/latlon-v0003', 'pgl_epoint_ecluster_overlap'; +CREATE FUNCTION epoint_ecluster_may_overlap_proc(epoint, ecluster) + RETURNS boolean + LANGUAGE C IMMUTABLE STRICT + AS '$libdir/latlon-v0003', 'pgl_epoint_ecluster_may_overlap'; + CREATE FUNCTION ebox_overlap_proc(ebox, ebox) RETURNS boolean LANGUAGE C IMMUTABLE STRICT AS '$libdir/latlon-v0003', 'pgl_ebox_overlap'; +CREATE FUNCTION ebox_ecircle_may_overlap_proc(ebox, ecircle) + RETURNS boolean + LANGUAGE C IMMUTABLE STRICT + AS '$libdir/latlon-v0003', 'pgl_ebox_ecircle_may_overlap'; + +CREATE FUNCTION ebox_ecluster_may_overlap_proc(ebox, ecluster) + RETURNS boolean + LANGUAGE C IMMUTABLE STRICT + AS '$libdir/latlon-v0003', 'pgl_ebox_ecluster_may_overlap'; + CREATE FUNCTION ecircle_overlap_proc(ecircle, ecircle) RETURNS boolean LANGUAGE C IMMUTABLE STRICT @@ -747,6 +762,16 @@ LANGUAGE C IMMUTABLE STRICT AS '$libdir/latlon-v0003', 'pgl_ecircle_ecluster_overlap'; +CREATE FUNCTION ecircle_ecluster_may_overlap_proc(ecircle, ecluster) + RETURNS boolean + LANGUAGE C IMMUTABLE STRICT + AS '$libdir/latlon-v0003', 'pgl_ecircle_ecluster_may_overlap'; + +CREATE FUNCTION ecluster_may_overlap_proc(ecluster, ecluster) + RETURNS boolean + LANGUAGE C IMMUTABLE STRICT + AS '$libdir/latlon-v0003', 'pgl_ecluster_may_overlap'; + CREATE FUNCTION epoint_distance_proc(epoint, epoint) RETURNS float8 LANGUAGE C IMMUTABLE STRICT @@ -878,6 +903,103 @@ join = areajoinsel ); +CREATE OPERATOR &&+ ( + leftarg = epoint, + rightarg = ecluster, + procedure = epoint_ecluster_may_overlap_proc, + commutator = &&+, + restrict = areasel, + join = areajoinsel +); + +CREATE FUNCTION epoint_ecluster_may_overlap_commutator(ecluster, epoint) + RETURNS boolean + LANGUAGE sql IMMUTABLE AS 'SELECT $2 &&+ $1'; + +CREATE OPERATOR &&+ ( + leftarg = ecluster, + rightarg = epoint, + procedure = epoint_ecluster_may_overlap_commutator, + commutator = &&+, + restrict = areasel, + join = areajoinsel +); + +CREATE OPERATOR &&+ ( + leftarg = ebox, + rightarg = ecircle, + procedure = ebox_ecircle_may_overlap_proc, + commutator = &&+, + restrict = areasel, + join = areajoinsel +); + +CREATE FUNCTION ebox_ecircle_may_overlap_commutator(ecircle, ebox) + RETURNS boolean + LANGUAGE sql IMMUTABLE AS 'SELECT $2 &&+ $1'; + +CREATE OPERATOR &&+ ( + leftarg = ecircle, + rightarg = ebox, + procedure = ebox_ecircle_may_overlap_commutator, + commutator = &&+, + restrict = areasel, + join = areajoinsel +); + +CREATE OPERATOR &&+ ( + leftarg = ebox, + rightarg = ecluster, + procedure = ebox_ecluster_may_overlap_proc, + commutator = &&+, + restrict = areasel, + join = areajoinsel +); + +CREATE FUNCTION ebox_ecluster_may_overlap_commutator(ecluster, ebox) + RETURNS boolean + LANGUAGE sql IMMUTABLE AS 'SELECT $2 &&+ $1'; + +CREATE OPERATOR &&+ ( + leftarg = ecluster, + rightarg = ebox, + procedure = ebox_ecluster_may_overlap_commutator, + commutator = &&+, + restrict = areasel, + join = areajoinsel +); + +CREATE OPERATOR &&+ ( + leftarg = ecircle, + rightarg = ecluster, + procedure = ecircle_ecluster_may_overlap_proc, + commutator = &&+, + restrict = areasel, + join = areajoinsel +); + +CREATE FUNCTION ecircle_ecluster_may_overlap_commutator(ecluster, ecircle) + RETURNS boolean + LANGUAGE sql IMMUTABLE AS 'SELECT $2 &&+ $1'; + +CREATE OPERATOR &&+ ( + leftarg = ecluster, + rightarg = ecircle, + procedure = ecircle_ecluster_may_overlap_commutator, + commutator = &&+, + restrict = areasel, + join = areajoinsel +); + +CREATE OPERATOR &&+ ( + leftarg = ecluster, + rightarg = ecluster, + procedure = ecluster_may_overlap_proc, + commutator = &&+, + restrict = areasel, + join = areajoinsel +); + CREATE OPERATOR <-> ( leftarg = epoint, rightarg = epoint, @@ -1003,13 +1125,14 @@ CREATE OPERATOR CLASS epoint_ops DEFAULT FOR TYPE epoint USING gist AS - OPERATOR 11 = , - OPERATOR 22 && (epoint, ebox), - OPERATOR 23 && (epoint, ecircle), - OPERATOR 24 && (epoint, ecluster), - OPERATOR 31 <-> (epoint, epoint) FOR ORDER BY float_ops, - OPERATOR 33 <-> (epoint, ecircle) FOR ORDER BY float_ops, - OPERATOR 34 <-> (epoint, ecluster) FOR ORDER BY float_ops, + OPERATOR 11 = , + OPERATOR 22 && (epoint, ebox), + OPERATOR 23 && (epoint, ecircle), + OPERATOR 24 && (epoint, ecluster), + OPERATOR 124 &&+ (epoint, ecluster), + OPERATOR 31 <-> (epoint, epoint) FOR ORDER BY float_ops, + OPERATOR 33 <-> (epoint, ecircle) FOR ORDER BY float_ops, + OPERATOR 34 <-> (epoint, ecluster) FOR ORDER BY float_ops, FUNCTION 1 pgl_gist_consistent(internal, internal, smallint, oid, internal), FUNCTION 2 pgl_gist_union(internal, internal), FUNCTION 3 pgl_gist_compress_epoint(internal), @@ -1022,13 +1145,15 @@ CREATE OPERATOR CLASS ecircle_ops DEFAULT FOR TYPE ecircle USING gist AS - OPERATOR 13 = , - OPERATOR 21 && (ecircle, epoint), - OPERATOR 23 && (ecircle, ecircle), - OPERATOR 24 && (ecircle, ecluster), - OPERATOR 31 <-> (ecircle, epoint) FOR ORDER BY float_ops, - OPERATOR 33 <-> (ecircle, ecircle) FOR ORDER BY float_ops, - OPERATOR 34 <-> (ecircle, ecluster) FOR ORDER BY float_ops, + OPERATOR 13 = , + OPERATOR 21 && (ecircle, epoint), + OPERATOR 122 &&+ (ecircle, ebox), + OPERATOR 23 && (ecircle, ecircle), + OPERATOR 24 && (ecircle, ecluster), + OPERATOR 124 &&+ (ecircle, ecluster), + OPERATOR 31 <-> (ecircle, epoint) FOR ORDER BY float_ops, + OPERATOR 33 <-> (ecircle, ecircle) FOR ORDER BY float_ops, + OPERATOR 34 <-> (ecircle, ecluster) FOR ORDER BY float_ops, FUNCTION 1 pgl_gist_consistent(internal, internal, smallint, oid, internal), FUNCTION 2 pgl_gist_union(internal, internal), FUNCTION 3 pgl_gist_compress_ecircle(internal), @@ -1041,7 +1166,12 @@ CREATE OPERATOR CLASS ecluster_ops DEFAULT FOR TYPE ecluster USING gist AS - OPERATOR 21 && (ecluster, epoint), + OPERATOR 21 && (ecluster, epoint), + OPERATOR 121 &&+ (ecluster, epoint), + OPERATOR 122 &&+ (ecluster, ebox), + OPERATOR 23 && (ecluster, ecircle), + OPERATOR 123 &&+ (ecluster, ecircle), + OPERATOR 124 &&+ (ecluster, ecluster), FUNCTION 1 pgl_gist_consistent(internal, internal, smallint, oid, internal), FUNCTION 2 pgl_gist_union(internal, internal), FUNCTION 3 pgl_gist_compress_ecluster(internal), diff -r dc8da756b761 -r 684a78d2f9f0 latlon-v0003.c --- a/latlon-v0003.c Fri Sep 02 14:04:04 2016 +0200 +++ b/latlon-v0003.c Fri Sep 02 14:08:01 2016 +0200 @@ -2107,6 +2107,19 @@ PG_RETURN_BOOL(retval); } +/* check if point may be inside cluster (lossy overl. operator "&&+") in SQL */ +PG_FUNCTION_INFO_V1(pgl_epoint_ecluster_may_overlap); +Datum pgl_epoint_ecluster_may_overlap(PG_FUNCTION_ARGS) { + pgl_point *point = (pgl_point *)PG_GETARG_POINTER(0); + pgl_cluster *cluster = (pgl_cluster *)PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); + bool retval = pgl_distance( + point->lat, point->lon, + cluster->bounding.center.lat, cluster->bounding.center.lon + ) <= cluster->bounding.radius; + PG_FREE_IF_COPY(cluster, 1); + PG_RETURN_BOOL(retval); +} + /* check if two boxes overlap (overlap operator "&&") in SQL */ PG_FUNCTION_INFO_V1(pgl_ebox_overlap); Datum pgl_ebox_overlap(PG_FUNCTION_ARGS) { @@ -2115,6 +2128,29 @@ PG_RETURN_BOOL(pgl_boxes_overlap(box1, box2)); } +/* check if box and circle may overlap (lossy overl. operator "&&+") in SQL */ +PG_FUNCTION_INFO_V1(pgl_ebox_ecircle_may_overlap); +Datum pgl_ebox_ecircle_may_overlap(PG_FUNCTION_ARGS) { + pgl_box *box = (pgl_box *)PG_GETARG_POINTER(0); + pgl_circle *circle = (pgl_circle *)PG_GETARG_POINTER(1); + PG_RETURN_BOOL( + pgl_estimate_point_box_distance(&circle->center, box) <= circle->radius + ); +} + +/* check if box and cluster may overlap (lossy overl. operator "&&+") in SQL */ +PG_FUNCTION_INFO_V1(pgl_ebox_ecluster_may_overlap); +Datum pgl_ebox_ecluster_may_overlap(PG_FUNCTION_ARGS) { + pgl_box *box = (pgl_box *)PG_GETARG_POINTER(0); + pgl_cluster *cluster = (pgl_cluster *)PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); + bool retval = pgl_estimate_point_box_distance( + &cluster->bounding.center, + box + ) <= cluster->bounding.radius; + PG_FREE_IF_COPY(cluster, 1); + PG_RETURN_BOOL(retval); +} + /* check if two circles overlap (overlap operator "&&") in SQL */ PG_FUNCTION_INFO_V1(pgl_ecircle_overlap); Datum pgl_ecircle_overlap(PG_FUNCTION_ARGS) { @@ -2140,6 +2176,33 @@ PG_RETURN_BOOL(retval); } +/* check if circle and cluster may be overlap (l. ov. operator "&&+") in SQL */ +PG_FUNCTION_INFO_V1(pgl_ecircle_ecluster_may_overlap); +Datum pgl_ecircle_ecluster_may_overlap(PG_FUNCTION_ARGS) { + pgl_circle *circle = (pgl_circle *)PG_GETARG_POINTER(0); + pgl_cluster *cluster = (pgl_cluster *)PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); + bool retval = pgl_distance( + circle->center.lat, circle->center.lon, + cluster->bounding.center.lat, cluster->bounding.center.lon + ) <= circle->radius + cluster->bounding.radius; + PG_FREE_IF_COPY(cluster, 1); + PG_RETURN_BOOL(retval); +} + +/* check if two clusters may overlap (lossy overlap operator "&&+") in SQL */ +PG_FUNCTION_INFO_V1(pgl_ecluster_may_overlap); +Datum pgl_ecluster_may_overlap(PG_FUNCTION_ARGS) { + pgl_cluster *cluster1 = (pgl_cluster *)PG_DETOAST_DATUM(PG_GETARG_DATUM(0)); + pgl_cluster *cluster2 = (pgl_cluster *)PG_DETOAST_DATUM(PG_GETARG_DATUM(1)); + bool retval = pgl_distance( + cluster1->bounding.center.lat, cluster1->bounding.center.lon, + cluster2->bounding.center.lat, cluster2->bounding.center.lon + ) <= cluster1->bounding.radius + cluster2->bounding.radius; + PG_FREE_IF_COPY(cluster1, 0); + PG_FREE_IF_COPY(cluster2, 1); + PG_RETURN_BOOL(retval); +} + /* calculate distance between two points ("<->" operator) in SQL */ PG_FUNCTION_INFO_V1(pgl_epoint_distance); Datum pgl_epoint_distance(PG_FUNCTION_ARGS) { @@ -2285,6 +2348,8 @@ bool *recheck = (bool *)PG_GETARG_POINTER(4); /* demand recheck because index and query methods are lossy */ *recheck = true; + /* strategy number aliases for different operators using the same strategy */ + strategy %= 100; /* strategy number 11: equality of two points */ if (strategy == 11) { /* query datum is another point */ @@ -2653,6 +2718,8 @@ /* demand recheck because distance is just an estimation */ /* (real distance may be bigger) */ *recheck = true; + /* strategy number aliases for different operators using the same strategy */ + strategy %= 100; /* strategy number 31: distance to point */ if (strategy == 31) { /* query datum is a point */