diff --git a/app/deps.ts b/app/deps.ts index c3a1a08f43c5d89d766ca771d4ee4b0f43b11b2c..515034b748ff2bf6378e364f24302b1b4f36bf07 100644 --- a/app/deps.ts +++ b/app/deps.ts @@ -3,4 +3,7 @@ export { createSchema, createYoga } from "npm:graphql-yoga@5.1.1"; export { useResponseCache } from "npm:@graphql-yoga/plugin-response-cache@1.0.0"; export { GraphQLError } from "npm:graphql@16.8.1"; export * as turf from "https://esm.sh/@turf/turf@6.5.0"; -export { Client } from "https://deno.land/x/postgres/mod.ts"; +export { + Client, + type QueryObjectResult, +} from "https://deno.land/x/postgres/mod.ts"; diff --git a/app/dev_deps.ts b/app/dev_deps.ts index df97030726db7e77b2c16df8e160b01135347258..dd30673135247255a2a88a65530e2d7b506744f8 100644 --- a/app/dev_deps.ts +++ b/app/dev_deps.ts @@ -1,6 +1,9 @@ export { assertSpyCall, + assertSpyCalls, returnsNext, + type Spy, + spy, stub, } from "https://deno.land/std@0.155.0/testing/mock.ts"; export type { Stub } from "https://deno.land/std@0.155.0/testing/mock.ts"; diff --git a/app/geo_api_client.ts b/app/geo_api_client.ts index 81e8c9831686a1776345dd5a95a79eb0fc419013..427e954c5527a8b44c73cf8d303d8eb5779f5509 100644 --- a/app/geo_api_client.ts +++ b/app/geo_api_client.ts @@ -1,7 +1,4 @@ -export type LatLng = { - lat: number; - lng: number; -}; +import { GeoLocation } from "./volunteering_db.ts"; type GeoAPIResponse = { data?: { @@ -16,28 +13,27 @@ type GeoAPIResponse = { }; }; -export const resolveLatLng = async ( - geoAPIEndpointUrl: string, - geolocationId: string, -): Promise<LatLng> => { - const graphQLQuery = - `{ placeDetails(id: "${geolocationId}") { geolocation } }`; - const response = await fetch(geoAPIEndpointUrl, { - "body": JSON.stringify({ query: graphQLQuery }), - "headers": { - "Accept": "application/graphql-response+json, application/json", - "Content-Type": "application/json", - }, - "method": "POST", - }); - const responseJSON = await response.json() as GeoAPIResponse; - const { lat, lon: lng } = - responseJSON.data?.placeDetails?.geolocation?.properties || {}; - if (lat && lng) { - return { lat, lng }; - } else { - return Promise.reject( - `Resolution of lat/lng failed (no data included in response for geolocationId=${geolocationId})`, - ); - } -}; +export const resolveGeoLocationViaGeoAPI = + (geoAPIEndpointUrl: string) => + async (geolocationId: string): Promise<GeoLocation> => { + const graphQLQuery = + `{ placeDetails(id: "${geolocationId}") { geolocation } }`; + const response = await fetch(geoAPIEndpointUrl, { + "body": JSON.stringify({ query: graphQLQuery }), + "headers": { + "Accept": "application/graphql-response+json, application/json", + "Content-Type": "application/json", + }, + "method": "POST", + }); + const responseJSON = await response.json() as GeoAPIResponse; + const { lat, lon } = + responseJSON.data?.placeDetails?.geolocation?.properties || {}; + if (lat && lon) { + return { lat, lon }; + } else { + return Promise.reject( + `Resolution of lat/lng failed (no data included in response for geolocationId=${geolocationId})`, + ); + } + }; diff --git a/app/server.ts b/app/server.ts index 7357dfaf245cb7fe3c60bbe9c2bb37209b83d071..adcc31d9579eb457d20e920ffbb83be5b010ddc0 100644 --- a/app/server.ts +++ b/app/server.ts @@ -21,6 +21,8 @@ import { } from "./voltastics.ts"; import { logger } from "./logging.ts"; import { VolunteeringDB, VolunteeringDBConfig } from "./volunteering_db.ts"; +import { resolveGeoLocationViaGeoAPI } from "./geo_api_client.ts"; +import { Client } from "https://deno.land/x/postgres@v0.19.3/client.ts"; const typeDefs = ` type Organizer { @@ -135,7 +137,7 @@ const createResolvers = ( ): Promise<EngagementsResponse> => config.fake ? Promise.resolve({ totalResults: 0, data: [] }) - : volunteeringDB.findRecommendations(config, parameters), + : volunteeringDB.findRecommendations(parameters), }, Mutation: { trackEngagementView: ( @@ -185,9 +187,28 @@ export const createGraphQLServer = (config: ServerConfig): GraphQLServer => { }), ] : []; + const client = new Client({ + hostname: config.volunteeringDB.hostname, + port: config.volunteeringDB.port, + database: config.volunteeringDB.database, + user: config.volunteeringDB.username, + password: config.volunteeringDB.password, + controls: { + debug: { + queries: false, + notices: false, + results: false, + queryInError: true, + }, + }, + connection: { + attempts: config.volunteeringDB.connectionAttempts, + }, + }); const volunteeringDB = new VolunteeringDB( - config.volunteeringDB, + client, config.imageProxyBaseUrl, + resolveGeoLocationViaGeoAPI(config.geoAPIEndpointUrl), ); const resolvers = createResolvers(config, volunteeringDB); return createYoga({ diff --git a/app/volunteering_db.ts b/app/volunteering_db.ts index 3479342c352ed8297a5481bc34910f00dd9f5b3c..0d0d8af3be520a3d82bee2681768f953036338dc 100644 --- a/app/volunteering_db.ts +++ b/app/volunteering_db.ts @@ -1,12 +1,11 @@ import { Client } from "./deps.ts"; +import type { QueryObjectResult } from "./deps.ts"; import { Engagement, EngagementRecommendationsParameters, EngagementsResponse, } from "./types.ts"; import { isNonEmptyString, safeJsonParse } from "./utils.ts"; -import { ServerConfig } from "./server.ts"; -import { LatLng, resolveLatLng } from "./geo_api_client.ts"; export type VolunteeringDBConfig = { hostname: string; @@ -166,140 +165,195 @@ const toEngagement = longitude: row.longitude, }); +export type GeoLocation = { + lat: number; + lon: number; +}; + +export type GeoLocationResolver = ( + geoLocationId: string, +) => Promise<GeoLocation>; + export class VolunteeringDB { private client: Client; private readonly imageProxyBaseUrl: string; + private resolveGeoLocation: GeoLocationResolver; - constructor(config: VolunteeringDBConfig, imageProxyBaseUrl: string) { - this.client = new Client({ - hostname: config.hostname, - port: config.port, - database: config.database, - user: config.username, - password: config.password, - controls: { - debug: { - queries: false, - notices: false, - results: false, - queryInError: true, - }, - }, - connection: { - attempts: config.connectionAttempts, - }, - }); + constructor( + client: Client, + imageProxyBaseUrl: string, + resolveGeoLocation: GeoLocationResolver, + ) { + this.client = client; this.imageProxyBaseUrl = imageProxyBaseUrl; + this.resolveGeoLocation = resolveGeoLocation; } async findRecommendations( - config: ServerConfig, - params: EngagementRecommendationsParameters, + { offset, limit, topics, skills, geolocationId }: + EngagementRecommendationsParameters, ): Promise<EngagementsResponse> { + const result = await this.queryRecos( + offset < 0 ? 0 : offset, + limit > 100 || limit < 1 ? 10 : limit, + topics, + skills, + geolocationId, + ).catch((reason) => { + console.error("Error retrieving recommendations. Reason: ", reason); + throw reason; + }); + const recos = result.rows.map(toEngagement(this.imageProxyBaseUrl)); + return Promise.resolve({ + totalResults: recos.length, + data: recos, + }); + } + + private async queryRecos( + offset: number, + limit: number, + topics: string[], + skills: string[], + geolocationId?: string, + ): Promise<QueryObjectResult<VolunteeringDBRow>> { const beforeResolve = Date.now(); - const latLng: LatLng | undefined = params.geolocationId - ? await resolveLatLng(config.geoAPIEndpointUrl, params.geolocationId) - .then((latLng) => { + const geoLocation = geolocationId + ? await this.resolveGeoLocation(geolocationId) + .then((geoLocation) => { const afterResolve = Date.now(); console.debug( `[${ (afterResolve - beforeResolve).toString().padStart(4, " ") } ms] Successfully resolved ${ - JSON.stringify(latLng) - } for geolocationId=${params.geolocationId}`, + JSON.stringify(geoLocation) + } for geolocationId=${geolocationId}`, ); - return latLng; + return geoLocation; }) .catch((error) => { console.warn(error); + if (topics.length == 0 && skills.length == 0) { + throw new Error( + "Can't retrieve recommendations: geo location resolution failed and no topics or skills given", + ); + } // gracefully catch error so that recommendations will still be delivered but without location factored in return undefined; }) : undefined; + const beforeQuery = Date.now(); - const recos = await this.queryRecommendations( - params.topics, - params.skills, - params.offset < 0 ? 0 : params.offset, - params.limit > 100 || params.limit < 1 ? 10 : params.limit, - latLng, - ) - .catch((reason) => { - console.error("Error executing query", reason); - throw reason; - }); - const afterQuery = Date.now(); - console.debug( - `[${ - (afterQuery - beforeQuery).toString().padStart(4, " ") - } ms] Successfully retrieved ${recos.length} recommendations from DB`, - ); - return Promise.resolve({ - totalResults: recos.length, - data: recos, - }); + const logQueryDuration = ( + result: QueryObjectResult<VolunteeringDBRow>, + ): QueryObjectResult<VolunteeringDBRow> => { + const afterQuery = Date.now(); + console.debug( + `[${ + (afterQuery - beforeQuery).toString().padStart(4, " ") + } ms] Successfully retrieved ${result.rows.length} recommendations from DB`, + ); + return result; + }; + + if (geoLocation && (topics.length > 0 || skills.length > 0)) { + return this.queryRecosBasedOnTopicsSkillsAndLocation( + offset, + limit, + topics, + skills, + geoLocation, + ).then(logQueryDuration); + } else if (geoLocation) { + return this.queryRecosBasedOnLocation(offset, limit, geoLocation).then( + logQueryDuration, + ); + } else if (topics.length > 0 || skills.length > 0) { + return this.queryRecosBasedOnTopicsAndSkills( + offset, + limit, + topics, + skills, + ).then(logQueryDuration); + } else { + return Promise.reject( + "It is required to provide at least one of (topics, skills, or location) in order to retrieve recommendations", + ); + } } - // Function to perform cosine similarity search - private async queryRecommendations( - topics: string[], - skills: string[], + private async queryRecosBasedOnTopicsAndSkills( offset: number, limit: number, - latLng?: LatLng, - ): Promise<Engagement[]> { + topics: string[], + skills: string[], + ): Promise<QueryObjectResult<VolunteeringDBRow>> { const queryVector = JSON.stringify(recosQueryVector(topics, skills)); + return await this.client.queryObject<VolunteeringDBRow>` + WITH vector_matches AS ( + SELECT *, 1 - (embedding_array <=> ${queryVector}) AS cosine_similarity + FROM volunteering_voltastics_with_classification + ORDER BY cosine_similarity DESC + OFFSET ${offset} + LIMIT ${limit} + ) + SELECT * + FROM vector_matches; + `; + } - // Define weight parameters + private async queryRecosBasedOnTopicsSkillsAndLocation( + offset: number, + limit: number, + topics: string[], + skills: string[], + geoLocation: GeoLocation, + ): Promise<QueryObjectResult<VolunteeringDBRow>> { const rankingWeightCosineSimilarity = 0.7; // Weight for cosine similarity const rankingWeightDistance = 0.3; // Weight for proximity const maxDistanceInMeters = 50_000; // in meters (e.g., 50 km) + const queryVector = JSON.stringify(recosQueryVector(topics, skills)); + const { lat, lon } = geoLocation; - await this.client.queryObject(`CREATE EXTENSION IF NOT EXISTS postgis;`); - - if (latLng) { - const { lat, lng } = latLng; - // Useful knowledge - // - PostGIS uses lon, lat (NOT lat, lon) - // - <#> uses index scans if used in an ORDER BY clause, ST_Distance does not - // - Database stores in GPS-Coordinates (SRID 4326) - // - In order to calculate a distance in meters, a geometry needs to be projected by transforming using - // ST_Transform(geom, 3857) where we use the SRID 3857 for Pseudo-Mercator - const result = await this.client.queryObject<VolunteeringDBRow>` - WITH calculations AS ( - SELECT *, - 1 - (embedding_array <=> ${queryVector}) AS cosine_similarity, - ST_Transform(location_gps, 3857) <#> ST_Transform(ST_SetSRID(ST_MakePoint(${lng.toString()}, ${lat.toString()}), 4326), 3857) AS distance_in_meters - FROM volunteering_voltastics_with_classification - WHERE longitude IS NOT NULL AND latitude IS NOT NULL - ), scored AS ( - SELECT *, - (${rankingWeightCosineSimilarity} * cosine_similarity) + - (${rankingWeightDistance} * (1 - LEAST(distance_in_meters, ${maxDistanceInMeters})) / ${maxDistanceInMeters}) AS weighted_score - FROM calculations - ) - SELECT * - FROM scored - ORDER BY weighted_score DESC - OFFSET ${offset} - LIMIT ${limit}; - `; - return result.rows.map(toEngagement(this.imageProxyBaseUrl)); - } + // Useful knowledge + // - PostGIS uses lon, lat (NOT lat, lon) + // - <#> uses index scans if used in an ORDER BY clause, ST_Distance does not + // - Database stores in GPS-Coordinates (SRID 4326) + // - In order to calculate a distance in meters, a geometry needs to be projected by transforming using + // ST_Transform(geom, 3857) where we use the SRID 3857 for Pseudo-Mercator + return await this.client.queryObject<VolunteeringDBRow>` + WITH calculations AS ( + SELECT *, + 1 - (embedding_array <=> ${queryVector}) AS cosine_similarity, + ST_Transform(location_gps, 3857) <#> ST_Transform(ST_SetSRID(ST_MakePoint(${lon.toString()}, ${lat.toString()}), 4326), 3857) AS distance_in_meters + FROM volunteering_voltastics_with_classification + ), scored AS ( + SELECT *, + (${rankingWeightCosineSimilarity} * cosine_similarity) + + (${rankingWeightDistance} * (1 - LEAST(distance_in_meters, ${maxDistanceInMeters})) / ${maxDistanceInMeters}) AS weighted_score + FROM calculations + ) + SELECT * + FROM scored + ORDER BY weighted_score DESC + OFFSET ${offset} + LIMIT ${limit}; + `; + } - // special syntax for Denos postgres client, resulting in an SQL injection-safe prepared statement - // see https://deno-postgres.com/#/?id=template-strings - const result = await this.client.queryObject<VolunteeringDBRow>` - WITH vector_matches AS ( - SELECT *, 1 - (embedding_array <=> ${queryVector}) AS cosine_similarity + private async queryRecosBasedOnLocation( + offset: number, + limit: number, + geoLocation: GeoLocation, + ): Promise<QueryObjectResult<VolunteeringDBRow>> { + const { lat, lon } = geoLocation; + return await this.client.queryObject<VolunteeringDBRow>` + SELECT *, + ST_Transform(location_gps, 3857) <#> ST_Transform(ST_SetSRID(ST_MakePoint(${lon.toString()}, ${lat.toString()}), 4326), 3857) AS distance_in_meters FROM volunteering_voltastics_with_classification - ORDER BY cosine_similarity DESC + ORDER BY distance_in_meters ASC OFFSET ${offset} - LIMIT ${limit} - ) - SELECT * - FROM vector_matches; - `; - return result.rows.map(toEngagement(this.imageProxyBaseUrl)); + LIMIT ${limit}; + `; } } diff --git a/app/volunteering_db_test.ts b/app/volunteering_db_test.ts index 4df332aa7127ff81862e60d8237fd1929da373c9..f26e4399ce7c54379dcc194a827c332f46d0fa51 100644 --- a/app/volunteering_db_test.ts +++ b/app/volunteering_db_test.ts @@ -1,5 +1,51 @@ -import { assertEquals, describe, it } from "./dev_deps.ts"; -import { exportedForTesting } from "./volunteering_db.ts"; +import type { Spy } from "./dev_deps.ts"; +import { + assertEquals, + assertRejects, + assertSpyCall, + assertSpyCalls, + describe, + it, + spy, +} from "./dev_deps.ts"; +import { + exportedForTesting, + GeoLocationResolver, + VolunteeringDB, +} from "./volunteering_db.ts"; +import { Client } from "./deps.ts"; + +type ResolveLocationSpy = Spy< + unknown, + [_geoLocationId: string], + Promise<{ lat: number; lon: number }> +>; + +const succeedingGeoLocationResolver: GeoLocationResolver = ( + _geoLocationId: string, +) => Promise.resolve({ lat: 123, lon: 321 }); +const failingGeoLocationResolver: GeoLocationResolver = ( + _geoLocationId: string, +) => Promise.reject("boom!"); + +const withMockedDependencies = ( + geoLocationResolver: GeoLocationResolver, + test: ( + vDB: VolunteeringDB, + resolveLocationSpy: ResolveLocationSpy, + ) => unknown | Promise<unknown>, +) => { + const geoLocationResolverSpy = spy(geoLocationResolver); + const mockClient = { + queryObject: () => Promise.resolve({ rows: [] }), + } as unknown as Client; + const volunteeringDB = new VolunteeringDB( + mockClient, + "mock-image-proxy-base-url", + geoLocationResolverSpy, + ); + test(volunteeringDB, geoLocationResolverSpy); +}; describe("VolunteeringDB", () => { it("correctly constructs a query vector", () => { @@ -28,4 +74,70 @@ describe("VolunteeringDB", () => { assertEquals(actualVector.length, expectedLength); assertEquals(actualVector, expectedVector); }); + it("resolves latitude/longitude using the GeoAPI client when geoLocationId is given", () => { + withMockedDependencies( + succeedingGeoLocationResolver, + (volunteeringDB, resolveLocationSpy) => { + volunteeringDB.findRecommendations({ + limit: 10, + offset: 0, + topics: [], + skills: [], + geolocationId: "mock-geolocation-id", + }); + assertSpyCall(resolveLocationSpy, 0, { + args: ["mock-geolocation-id"], + }); + }, + ); + }); + it("does not resolve latitude/longitude when geoLocationId is not given", () => { + withMockedDependencies( + succeedingGeoLocationResolver, + (volunteeringDB, resolveLocationSpy) => { + volunteeringDB.findRecommendations({ + limit: 10, + offset: 0, + topics: ["agriculture-food"], + skills: [], + }); + assertSpyCalls(resolveLocationSpy, 0); + }, + ); + }); + it("falls back to recommend based only on topics and skills if given and geo location resolution fails", () => { + withMockedDependencies( + failingGeoLocationResolver, + async (volunteeringDB, _resolveLocationSpy) => { + const result = await volunteeringDB.findRecommendations({ + limit: 10, + offset: 0, + topics: ["agriculture-food"], + skills: [], + geolocationId: "mock-geolocation-id", + }); + assertEquals(result.totalResults, 0); + assertEquals(result.data, []); + }, + ); + }); + it("fails with error message if no topics and skills are given and geo location resolution fails", () => { + withMockedDependencies( + failingGeoLocationResolver, + async (volunteeringDB, _resolveLocationSpy) => { + await assertRejects( + () => + volunteeringDB.findRecommendations({ + limit: 10, + offset: 0, + topics: [], + skills: [], + geolocationId: "mock-geolocation-id", + }), + Error, + "geo location resolution failed and no topics or skills given", + ); + }, + ); + }); });