From 2bca6e48c0ea8043c5300f4ebdefa5167e6472bf Mon Sep 17 00:00:00 2001 From: Adam Chlipala Date: Wed, 4 Dec 2019 09:19:55 -0500 Subject: SQL SIMILAR (via pg_trgm) --- lib/ur/basis.urs | 10 ++++++++++ src/cjr.sml | 2 +- src/cjr_print.sml | 21 +++++++++++++++++---- src/mono.sml | 2 +- src/mono_print.sml | 21 +++++++++++---------- src/monoize.sml | 49 +++++++++++++++++++++++++++++++++++++++++++++++-- src/mysql.sml | 3 ++- src/postgres.sml | 5 +++-- src/settings.sig | 3 ++- src/settings.sml | 6 ++++-- src/sqlite.sml | 3 ++- src/urweb.grm | 9 +++++++++ tests/filter.urp | 1 + tests/trgm.ur | 25 +++++++++++++++++++++++++ tests/trgm.urp | 6 ++++++ tests/trgm.urs | 1 + 16 files changed, 142 insertions(+), 25 deletions(-) create mode 100644 tests/trgm.ur create mode 100644 tests/trgm.urp create mode 100644 tests/trgm.urs diff --git a/lib/ur/basis.urs b/lib/ur/basis.urs index a97c2855..dda48d2b 100644 --- a/lib/ur/basis.urs +++ b/lib/ur/basis.urs @@ -623,6 +623,16 @@ val sql_known : t ::: Type -> sql_ufunc t bool val sql_lower : sql_ufunc string string val sql_upper : sql_ufunc string string +con sql_bfunc :: Type -> Type -> Type -> Type +val sql_bfunc : tables ::: {{Type}} -> agg ::: {{Type}} -> exps ::: {Type} + -> dom1 ::: Type -> dom2 ::: Type -> ran ::: Type + -> sql_bfunc dom1 dom2 ran + -> sql_exp tables agg exps dom1 + -> sql_exp tables agg exps dom2 + -> sql_exp tables agg exps ran +val sql_similarity : sql_bfunc string string float +(* Only supported by Postgres for now, via the pg_trgm module *) + val sql_nullable : tables ::: {{Type}} -> agg ::: {{Type}} -> exps ::: {Type} -> t ::: Type -> sql_injectable_prim t -> sql_exp tables agg exps t diff --git a/src/cjr.sml b/src/cjr.sml index e582e6ae..9b154428 100644 --- a/src/cjr.sml +++ b/src/cjr.sml @@ -115,7 +115,7 @@ datatype decl' = | DTable of string * (string * typ) list * string * (string * string) list | DSequence of string | DView of string * (string * typ) list * string - | DDatabase of {name : string, expunge : int, initialize : int} + | DDatabase of {name : string, expunge : int, initialize : int, usesSimilar : bool} | DPreparedStatements of (string * int) list | DJavaScript of string diff --git a/src/cjr_print.sml b/src/cjr_print.sml index d7b8017e..70ebdf43 100644 --- a/src/cjr_print.sml +++ b/src/cjr_print.sml @@ -3230,10 +3230,11 @@ fun p_file env (ds, ps) = val _ = foldl (fn (d, env) => ((case #1 d of - DDatabase {name = x, expunge = y, initialize = z} => (hasDb := true; - dbstring := x; - expunge := y; - initialize := z) + DDatabase {name = x, expunge = y, initialize = z, ...} => + (hasDb := true; + dbstring := x; + expunge := y; + initialize := z) | DJavaScript _ => hasJs := true | DTable (s, xts, _, _) => tables := (s, map (fn (x, t) => (x, sql_type_in env t)) xts) :: !tables @@ -3753,6 +3754,8 @@ fun declaresAsForeignKey xs s = fun p_sql env (ds, _) = let + val usesSimilar = ref false + val (pps, _) = ListUtil.foldlMap (fn (dAll as (d, _), env) => let @@ -3837,6 +3840,9 @@ fun p_sql env (ds, _) = string ";", newline, newline] + | DDatabase {usesSimilar = s, ...} => + (usesSimilar := s; + box []) | _ => box [] in (pp, E.declBinds env dAll) @@ -3849,6 +3855,13 @@ fun p_sql env (ds, _) = NONE => (ErrorMsg.error "Using file cache with database that doesn't support SHA512"; []) | SOME r => [string (#InitializeDb r), newline, newline]) + @ (if !usesSimilar then + case #supportsSimilar (Settings.currentDbms ()) of + NONE => (ErrorMsg.error "Using SIMILAR with database that doesn't support it"; + []) + | SOME r => [string (#InitializeDb r), newline, newline] + else + []) @ string (#sqlPrefix (Settings.currentDbms ())) :: pps) end diff --git a/src/mono.sml b/src/mono.sml index cdadded5..754fe283 100644 --- a/src/mono.sml +++ b/src/mono.sml @@ -142,7 +142,7 @@ datatype decl' = | DTable of string * (string * typ) list * exp * exp | DSequence of string | DView of string * (string * typ) list * exp - | DDatabase of {name : string, expunge : int, initialize : int} + | DDatabase of {name : string, expunge : int, initialize : int, usesSimilar : bool} | DJavaScript of string diff --git a/src/mono_print.sml b/src/mono_print.sml index a3b55ec0..1114a4f0 100644 --- a/src/mono_print.sml +++ b/src/mono_print.sml @@ -509,16 +509,17 @@ fun p_decl env (dAll as (d, _) : decl) = space, p_exp env e, string "*)"] - | DDatabase {name, expunge, initialize} => box [string "database", - space, - string name, - space, - string "(", - p_enamed env expunge, - string ",", - space, - p_enamed env initialize, - string ")"] + | DDatabase {name, expunge, initialize, ...} => + box [string "database", + space, + string name, + space, + string "(", + p_enamed env expunge, + string ",", + space, + p_enamed env initialize, + string ")"] | DJavaScript s => box [string "JavaScript(", string s, string ")"] diff --git a/src/monoize.sml b/src/monoize.sml index 4aeddcae..22b4e0e7 100644 --- a/src/monoize.sml +++ b/src/monoize.sml @@ -50,11 +50,13 @@ structure RM = BinaryMapFn(struct (L'.TRecord r2, E.dummySpan)) end) +val uses_similar = ref false + local val url_prefixes = ref [] in -fun reset () = url_prefixes := [] +fun reset () = (url_prefixes := []; uses_similar := false) fun addPrefix prefix = let @@ -355,6 +357,8 @@ fun monoType env = (L'.TFfi ("Basis", "string"), loc) | L.CApp ((L.CApp ((L.CFfi ("Basis", "sql_ufunc"), _), _), _), _) => (L'.TFfi ("Basis", "string"), loc) + | L.CApp ((L.CApp ((L.CApp ((L.CFfi ("Basis", "sql_bfunc"), _), _), _), _), _), _) => + (L'.TFfi ("Basis", "string"), loc) | L.CApp ((L.CApp ((L.CApp ((L.CFfi ("Basis", "sql_partition"), _), _), _), _), _), _) => (L'.TFfi ("Basis", "string"), loc) | L.CApp ((L.CApp ((L.CApp ((L.CApp ((L.CFfi ("Basis", "sql_window"), _), _), _), _), _), _), _), _) => @@ -2693,6 +2697,40 @@ fun monoExp (env, st, fm) (all as (e, loc)) = | L.ECApp ((L.EFfi ("Basis", "sql_known"), _), _) => ((L'.EFfi ("Basis", "sql_known"), loc), fm) + | L.ECApp ( + (L.ECApp ( + (L.ECApp ( + (L.ECApp ( + (L.ECApp ( + (L.ECApp ( + (L.EFfi ("Basis", "sql_bfunc"), _), + _), _), + _), _), + _), _), + _), _), + _), _), + _) => + let + val s = (L'.TFfi ("Basis", "string"), loc) + in + ((L'.EAbs ("f", s, (L'.TFun (s, s), loc), + (L'.EAbs ("x1", s, s, + (L'.EAbs ("x2", s, s, + strcat [(L'.ERel 2, loc), + str "(", + (L'.ERel 1, loc), + str ",", + (L'.ERel 0, loc), + str ")"]), loc)), loc)), loc), + fm) + end + | L.EFfi ("Basis", "sql_similarity") => + ((case #supportsSimilar (Settings.currentDbms ()) of + NONE => ErrorMsg.errorAt loc "The DBMS you've selected doesn't support SIMILAR." + | _ => ()); + uses_similar := true; + (str "similarity", fm)) + | (L.ECApp ( (L.ECApp ( (L.ECApp ( @@ -4593,7 +4631,8 @@ fun monoize env file = in (env, Fm.enter fm, (L'.DDatabase {name = s, expunge = nExp, - initialize = nIni}, loc) + initialize = nIni, + usesSimilar = false}, loc) :: (dExp, loc) :: (dIni, loc) :: ds) @@ -4617,6 +4656,12 @@ fun monoize env file = | _ => ds' @ Fm.decls fm @ (L'.DDatatype (!pvarDefs), loc) :: ds))) (env, Fm.empty mname, []) file + val ds = map (fn (L'.DDatabase r, loc) => + (L'.DDatabase {name = #name r, + expunge = #expunge r, + initialize = #initialize r, + usesSimilar = !uses_similar}, loc) + | x => x) ds val monoFile = (rev ds, []) in pvars := RM.empty; diff --git a/src/mysql.sml b/src/mysql.sml index ff1c379d..74954c0f 100644 --- a/src/mysql.sml +++ b/src/mysql.sml @@ -1612,6 +1612,7 @@ val () = addDbms {name = "mysql", requiresTimestampDefaults = true, supportsIsDistinctFrom = true, supportsSHA512 = SOME {InitializeDb = "", - GenerateHash = fn name => "SHA2(" ^ name ^ ", 512)"}} + GenerateHash = fn name => "SHA2(" ^ name ^ ", 512)"}, + supportsSimilar = NONE} end diff --git a/src/postgres.sml b/src/postgres.sml index 94f0e42e..3e53ed77 100644 --- a/src/postgres.sml +++ b/src/postgres.sml @@ -1155,8 +1155,9 @@ val () = addDbms {name = "postgres", windowFunctions = true, requiresTimestampDefaults = false, supportsIsDistinctFrom = true, - supportsSHA512 = SOME {InitializeDb = "CREATE EXTENSION pgcrypto;", - GenerateHash = fn name => "DIGEST(" ^ name ^ ", 'sha512')"}} + supportsSHA512 = SOME {InitializeDb = "CREATE EXTENSION IF NOT EXISTS pgcrypto;", + GenerateHash = fn name => "DIGEST(" ^ name ^ ", 'sha512')"}, + supportsSimilar = SOME {InitializeDb = "CREATE EXTENSION IF NOT EXISTS pg_trgm;"}} val () = setDbms "postgres" diff --git a/src/settings.sig b/src/settings.sig index a2a56407..6a409cdd 100644 --- a/src/settings.sig +++ b/src/settings.sig @@ -224,10 +224,11 @@ signature SETTINGS = sig requiresTimestampDefaults : bool, supportsIsDistinctFrom : bool, supportsSHA512 : {InitializeDb : string, - GenerateHash : string -> string} option + GenerateHash : string -> string} option, (* If supported, give the SQL code to * enable the feature in a particular * database and to compute a hash of a value. *) + supportsSimilar : {InitializeDb : string} option } val addDbms : dbms -> unit diff --git a/src/settings.sml b/src/settings.sml index a85e8053..c8cb049c 100644 --- a/src/settings.sml +++ b/src/settings.sml @@ -653,7 +653,8 @@ type dbms = { windowFunctions: bool, requiresTimestampDefaults : bool, supportsIsDistinctFrom : bool, - supportsSHA512 : {InitializeDb : string, GenerateHash : string -> string} option + supportsSHA512 : {InitializeDb : string, GenerateHash : string -> string} option, + supportsSimilar : {InitializeDb : string} option } val dbmses = ref ([] : dbms list) @@ -688,7 +689,8 @@ val curDb = ref ({name = "", windowFunctions = false, requiresTimestampDefaults = false, supportsIsDistinctFrom = false, - supportsSHA512 = NONE} : dbms) + supportsSHA512 = NONE, + supportsSimilar = NONE} : dbms) fun addDbms v = dbmses := v :: !dbmses fun setDbms s = diff --git a/src/sqlite.sml b/src/sqlite.sml index 9bb86ecf..0e97bf69 100644 --- a/src/sqlite.sml +++ b/src/sqlite.sml @@ -857,6 +857,7 @@ val () = addDbms {name = "sqlite", windowFunctions = false, requiresTimestampDefaults = false, supportsIsDistinctFrom = false, - supportsSHA512 = NONE} + supportsSHA512 = NONE, + supportsSimilar = NONE} end diff --git a/src/urweb.grm b/src/urweb.grm index afebff0a..dea7bdf5 100644 --- a/src/urweb.grm +++ b/src/urweb.grm @@ -2276,6 +2276,15 @@ sqlexp : TRUE (sql_inject (EVar (["Basis"], "True", In val e = (EApp (e, fname), loc) in (EApp (e, sqlexp), loc) + end) + | fname LPAREN sqlexp COMMA sqlexp RPAREN (let + val loc = s (fnameleft, RPARENright) + + val e = (EVar (["Basis"], "sql_bfunc", Infer), loc) + val e = (EApp (e, fname), loc) + val e = (EApp (e, sqlexp1), loc) + in + (EApp (e, sqlexp2), loc) end) | LPAREN query RPAREN (let val loc = s (LPARENleft, RPARENright) diff --git a/tests/filter.urp b/tests/filter.urp index 102a1871..ddf1a3df 100644 --- a/tests/filter.urp +++ b/tests/filter.urp @@ -1,4 +1,5 @@ debug database dbname=filter +sql filter.sql filter diff --git a/tests/trgm.ur b/tests/trgm.ur new file mode 100644 index 00000000..45783366 --- /dev/null +++ b/tests/trgm.ur @@ -0,0 +1,25 @@ +table turtles : { Nam : string } + +fun add name = + dml (INSERT INTO turtles(Nam) + VALUES ({[name]})) + +fun closest name = + List.mapQuery (SELECT * + FROM turtles + ORDER BY similarity(turtles.Nam, {[name]}) DESC + LIMIT 5) + (fn r => r.Turtles.Nam) + +val main = + name <- source ""; + results <- source []; + return + Name:
+