pgLatLon
diff create_test_db.schema.sql @ 46:3cbce515387f
Added safety margins for distance calculation on index lookups (fixes bug that caused nearest-neighbor search to fail in certain cases); Improved "fair_distance" function
author | jbe |
---|---|
date | Mon Oct 31 13:06:31 2016 +0100 (2016-10-31) |
parents | 684a78d2f9f0 |
children |
line diff
1.1 --- a/create_test_db.schema.sql Tue Oct 25 22:15:17 2016 +0200 1.2 +++ b/create_test_db.schema.sql Mon Oct 31 13:06:31 2016 +0100 1.3 @@ -6,10 +6,81 @@ 1.4 "location" EPOINT NOT NULL, 1.5 "surrounding" ECIRCLE NOT NULL, 1.6 "multipoint" ECLUSTER NOT NULL, 1.7 - "triangle" ECLUSTER NOT NULL ); 1.8 + "triangle" ECLUSTER NOT NULL, 1.9 + "votes" INT4 NOT NULL ); 1.10 1.11 CREATE INDEX "test_location_key" ON "test" USING gist ("location"); 1.12 CREATE INDEX "test_surrounding_key" ON "test" USING gist ("surrounding"); 1.13 CREATE INDEX "test_multipoint_key" ON "test" USING gist ("multipoint"); 1.14 CREATE INDEX "test_triangle_key" ON "test" USING gist ("triangle"); 1.15 +CREATE INDEX "test_vote_key" ON "test" ("votes"); 1.16 1.17 + 1.18 +-- Below follows an example of how to perform a nearest-neighbor search with 1.19 +-- weighted geometric objects (distance scaled anti-proportionally through 1.20 +-- "votes" column). 1.21 +-- 1.22 +-- NOTE: The approach may be speeded up by providing new data types like 1.23 +-- "weighted_ecluster" with corresponding GiST index support in future 1.24 +-- versions of pgLatLon. 1.25 + 1.26 +CREATE TYPE "test_with_relevance" AS ( 1.27 + "id" INT4, 1.28 + "location" EPOINT, 1.29 + "surrounding" ECIRCLE, 1.30 + "multipoint" ECLUSTER, 1.31 + "triangle" ECLUSTER, 1.32 + "votes" INT4, 1.33 + "relevance" FLOAT8 ); 1.34 + 1.35 +CREATE FUNCTION "get_by_relevance" (epoint, int4 = 1, int4 = 10000) 1.36 + RETURNS SETOF "test_with_relevance" 1.37 + LANGUAGE plpgsql STABLE AS $$ 1.38 + DECLARE 1.39 + "max_votes" INT4; 1.40 + "tries" INT4 = 2; 1.41 + "all" INT4; 1.42 + "matches" INT4; 1.43 + BEGIN 1.44 + SELECT "votes" INTO "max_votes" FROM "test" ORDER BY "votes" DESC LIMIT 1; 1.45 + IF "max_votes" > 0 THEN 1.46 + LOOP 1.47 + RAISE DEBUG 'Considering % entries', "tries"; 1.48 + SELECT 1.49 + count(1), 1.50 + count(CASE WHEN "relevance" < "worst_case" THEN 1 ELSE NULL END) 1.51 + INTO "all", "matches" 1.52 + FROM ( 1.53 + SELECT 1.54 + CASE 1.55 + WHEN "votes" = 0 1.56 + THEN 'inf'::FLOAT8 1.57 + ELSE "fair_distance" / "votes" 1.58 + END AS "relevance", 1.59 + max("fair_distance") OVER () / "max_votes" AS "worst_case" 1.60 + FROM ( 1.61 + SELECT fair_distance("triangle", $1, $3), "votes" FROM "test" 1.62 + ORDER BY fair_distance 1.63 + LIMIT "tries" 1.64 + ) AS "subquery1" 1.65 + ) AS "subquery2"; 1.66 + EXIT WHEN "matches" >= $2 OR "all" < "tries"; 1.67 + "tries" := "tries" * 2; 1.68 + END LOOP; 1.69 + RETURN QUERY SELECT * FROM ( 1.70 + SELECT 1.71 + *, 1.72 + CASE 1.73 + WHEN "votes" = 0 1.74 + THEN 'inf'::FLOAT8 1.75 + ELSE fair_distance("triangle", $1, $3) / "votes" 1.76 + END AS "relevance" 1.77 + FROM "test" ORDER BY fair_distance("triangle", $1, $3) LIMIT "tries" 1.78 + ) AS "subquery" ORDER BY "relevance", "id" LIMIT $2; 1.79 + ELSE 1.80 + RETURN QUERY SELECT * FROM "test" ORDER BY "id" LIMIT $2; 1.81 + END IF; 1.82 + RETURN; 1.83 + END; 1.84 + $$; 1.85 +