pgLatLon

diff create_test_db.schema.sql @ 46:3cbce515387f

Added safety margins for distance calculation on index lookups (fixes bug that caused nearest-neighbor search to fail in certain cases); Improved "fair_distance" function
author jbe
date Mon Oct 31 13:06:31 2016 +0100 (2016-10-31)
parents 684a78d2f9f0
children
line diff
     1.1 --- a/create_test_db.schema.sql	Tue Oct 25 22:15:17 2016 +0200
     1.2 +++ b/create_test_db.schema.sql	Mon Oct 31 13:06:31 2016 +0100
     1.3 @@ -6,10 +6,81 @@
     1.4          "location"      EPOINT          NOT NULL,
     1.5          "surrounding"   ECIRCLE         NOT NULL,
     1.6          "multipoint"    ECLUSTER        NOT NULL,
     1.7 -        "triangle"      ECLUSTER        NOT NULL );
     1.8 +        "triangle"      ECLUSTER        NOT NULL,
     1.9 +        "votes"         INT4            NOT NULL );
    1.10  
    1.11  CREATE INDEX "test_location_key"    ON "test" USING gist ("location");
    1.12  CREATE INDEX "test_surrounding_key" ON "test" USING gist ("surrounding");
    1.13  CREATE INDEX "test_multipoint_key"  ON "test" USING gist ("multipoint");
    1.14  CREATE INDEX "test_triangle_key"    ON "test" USING gist ("triangle");
    1.15 +CREATE INDEX "test_vote_key" ON "test" ("votes");
    1.16  
    1.17 +
    1.18 +-- Below follows an example of how to perform a nearest-neighbor search with
    1.19 +-- weighted geometric objects (distance scaled anti-proportionally through
    1.20 +-- "votes" column).
    1.21 +--
    1.22 +-- NOTE: The approach may be speeded up by providing new data types like
    1.23 +--       "weighted_ecluster" with corresponding GiST index support in future
    1.24 +--       versions of pgLatLon.
    1.25 +
    1.26 +CREATE TYPE "test_with_relevance" AS (
    1.27 +        "id"            INT4,
    1.28 +        "location"      EPOINT,
    1.29 +        "surrounding"   ECIRCLE,
    1.30 +        "multipoint"    ECLUSTER,
    1.31 +        "triangle"      ECLUSTER,
    1.32 +        "votes"         INT4,
    1.33 +        "relevance"     FLOAT8 );
    1.34 +
    1.35 +CREATE FUNCTION "get_by_relevance" (epoint, int4 = 1, int4 = 10000)
    1.36 +  RETURNS SETOF "test_with_relevance"
    1.37 +  LANGUAGE plpgsql STABLE AS $$
    1.38 +    DECLARE
    1.39 +      "max_votes" INT4;
    1.40 +      "tries"     INT4 = 2;
    1.41 +      "all"       INT4;
    1.42 +      "matches"   INT4;
    1.43 +    BEGIN
    1.44 +      SELECT "votes" INTO "max_votes" FROM "test" ORDER BY "votes" DESC LIMIT 1;
    1.45 +      IF "max_votes" > 0 THEN
    1.46 +        LOOP
    1.47 +          RAISE DEBUG 'Considering % entries', "tries";
    1.48 +          SELECT
    1.49 +            count(1),
    1.50 +            count(CASE WHEN "relevance" < "worst_case" THEN 1 ELSE NULL END)
    1.51 +            INTO "all", "matches"
    1.52 +            FROM (
    1.53 +              SELECT
    1.54 +                CASE
    1.55 +                  WHEN "votes" = 0
    1.56 +                  THEN 'inf'::FLOAT8
    1.57 +                  ELSE "fair_distance" / "votes"
    1.58 +                END AS "relevance",
    1.59 +                max("fair_distance") OVER () / "max_votes" AS "worst_case"
    1.60 +              FROM (
    1.61 +                SELECT fair_distance("triangle", $1, $3), "votes" FROM "test"
    1.62 +                ORDER BY fair_distance
    1.63 +                LIMIT "tries"
    1.64 +              ) AS "subquery1"
    1.65 +            ) AS "subquery2";
    1.66 +          EXIT WHEN "matches" >= $2 OR "all" < "tries";
    1.67 +          "tries" := "tries" * 2;
    1.68 +        END LOOP;
    1.69 +        RETURN QUERY SELECT * FROM (
    1.70 +          SELECT
    1.71 +            *,
    1.72 +            CASE
    1.73 +              WHEN "votes" = 0
    1.74 +              THEN 'inf'::FLOAT8
    1.75 +              ELSE fair_distance("triangle", $1, $3) / "votes"
    1.76 +            END AS "relevance"
    1.77 +            FROM "test" ORDER BY fair_distance("triangle", $1, $3) LIMIT "tries"
    1.78 +        ) AS "subquery" ORDER BY "relevance", "id" LIMIT $2;
    1.79 +      ELSE
    1.80 +        RETURN QUERY SELECT * FROM "test" ORDER BY "id" LIMIT $2;
    1.81 +      END IF;
    1.82 +      RETURN;
    1.83 +    END;
    1.84 +  $$;
    1.85 +

Impressum / About Us