diff --git a/NEWS.md b/NEWS.md index 419fd3d..00f212d 100644 --- a/NEWS.md +++ b/NEWS.md @@ -1,5 +1,19 @@ # TidierDB.jl updates +## v0.3.4 - 2024 2024-09-23 +TidierDB works with nearly any exisiting SQL function, now there are docs about it. +- Docs on using any exisiting SQL function in TidierDB +- Docs on user defined functions (UDFs) in TidierDB +- Adds `agg()` to use any aggregate built into a database to be used in `@mutate`. support for `agg()` in across. (`@summarize` continues to all aggregate SQL functions without `agg()`) +- Adds `t(query)` as a more efficient alternative to reference tables. +``` +table = db_table(db, "name") +@chain t(table) ... +``` +- Bugfix: fixes MsSQL joins +- Bugfix: window functions +- Bugfix: json paths supported for `json` DuckDB functions + ## v0.3.3 - 2024-08-29 - Bugfix: `@mutate` allows type conversion as part of larger mutate expressions diff --git a/docs/examples/UserGuide/getting_started.jl b/docs/examples/UserGuide/getting_started.jl index 95c4526..fef2524 100644 --- a/docs/examples/UserGuide/getting_started.jl +++ b/docs/examples/UserGuide/getting_started.jl @@ -61,6 +61,6 @@ # end # ``` # --- -# Tip: Setting `t(table) = from_query(table)` will save some keystrokes. -# This means after saving the results of `db_table` you can start all chains/refer to the data with `t(table)` +# Tip: `t()` is an alias for `from_query` +# This means after saving the results of `db_table` use `t(table)` refer to the table or prior query # --- \ No newline at end of file diff --git a/docs/examples/UserGuide/udfs_ex.jl b/docs/examples/UserGuide/udfs_ex.jl new file mode 100644 index 0000000..71ac807 --- /dev/null +++ b/docs/examples/UserGuide/udfs_ex.jl @@ -0,0 +1,144 @@ +# TidierDB is unique in its statement parsing flexiblility. This means that using any built in SQL function or user defined functions (or UDFS) or is readily avaialable. +# To use any function built into a database in `@mutate` or in `@summarize`, simply correctly write the correctly, but replace `'` with `"`. This also applies to any UDF. The example below will illustrate UDFs in the context of DuckDB. + + +# ``` +# # Set up the connection +# using TidierDB #rexports DuckDB +# db = DuckDB.DB() +# con = DuckDB.connect(db) # this will be important for UDFs +# mtcars_path = "https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv" +# mtcars = db_tbable(con, mtcars_path); +# ``` +# ## aggregate function in `@summarize` +# Lets use the DuckDB `kurtosis` aggregate function +# ``` +# @chain t(mtcars) begin +# @group_by cyl +# @summarize(kurt = kurtosis(mpg)) +# @collect +# end +# 3×2 DataFrame +# Row │ cyl kurt +# │ Int64? Float64? +# ─────┼─────────────────── +# 1 │ 4 -1.43411 +# 2 │ 6 -1.82944 +# 3 │ 8 0.330061 +# ``` + +# ## aggregate functions in `@mutate` +# To aggregate sql functions that are builtin to any database, but exist outside of the TidierDB parser, simply wrap the function call in `agg()` +# ``` +# @chain t(mtcars) begin +# @group_by(cyl) +# @mutate(kurt = agg(kurtosis(mpg))) +# @select cyl mpg kurt +# @collect +# end + +# 32×3 DataFrame +# Row │ cyl mpg kurt +# │ Int64? Float64? Float64? +# ─────┼───────────────────────────── +# 1 │ 8 18.7 0.330061 +# 2 │ 8 14.3 0.330061 +# 3 │ 8 16.4 0.330061 +# 4 │ 8 17.3 0.330061 +# 5 │ 8 15.2 0.330061 +# 6 │ 8 10.4 0.330061 +# 7 │ 8 10.4 0.330061 +# ⋮ │ ⋮ ⋮ ⋮ +# 27 │ 6 21.0 -1.82944 +# 28 │ 6 21.4 -1.82944 +# 29 │ 6 18.1 -1.82944 +# 30 │ 6 19.2 -1.82944 +# 31 │ 6 17.8 -1.82944 +# 32 │ 6 19.7 -1.82944 +# 19 rows omitted +# end + +# ``` + +# ## DuckDB function chaining +# In DuckDB, functions can be chained together with `.`. TidierDB lets you leverage this. +# ``` +# @chain t(mtcars) begin +# @mutate(model2 = model.upper().string_split(" ").list_aggr("string_agg",".").concat(".")) +# @select model model2 +# @collect +# end +# 32×2 DataFrame +# Row │ model model2 +# │ String? String? +# ─────┼─────────────────────────────────────── +# 1 │ Mazda RX4 MAZDA.RX4. +# 2 │ Mazda RX4 Wag MAZDA.RX4.WAG. +# 3 │ Datsun 710 DATSUN.710. +# 4 │ Hornet 4 Drive HORNET.4.DRIVE. +# 5 │ Hornet Sportabout HORNET.SPORTABOUT. +# 6 │ Valiant VALIANT. +# 7 │ Duster 360 DUSTER.360. +# ⋮ │ ⋮ ⋮ +# 27 │ Porsche 914-2 PORSCHE.914-2. +# 28 │ Lotus Europa LOTUS.EUROPA. +# 29 │ Ford Pantera L FORD.PANTERA.L. +# 30 │ Ferrari Dino FERRARI.DINO. +# 31 │ Maserati Bora MASERATI.BORA. +# 32 │ Volvo 142E VOLVO.142E. +# 19 rows omitted +# ``` + +# ## `rowid` and pseudocolumns +# When a table is not being read directly from a file, `rowid` is avaialable for use. In general, TidierDB should support all pseudocolumns. +# ``` +# copy_to(db, mtcars_path, "mtcars"); # copying table in for demostration purposes +# @chain db_table(con, :mtcars) begin +# @filter(rowid == 4) +# @select(model:hp) +# @collect +# end +# 1×5 DataFrame +# Row │ model mpg cyl disp hp +# │ String? Float64? Int64? Float64? Int64? +# ─────┼─────────────────────────────────────────────────────── +# 1 │ Hornet Sportabout 18.7 8 360.0 175 +# ``` + +# ## UDF SQLite Example +# ``` +# using SQLite +# sql = connect(sqlite()); +# df = DataFrame(id = [string('A' + i ÷ 26, 'A' + i % 26) for i in 0:9], +# groups = [i % 2 == 0 ? "aa" : "bb" for i in 1:10], +# value = repeat(1:5, 2), +# percent = 0.1:0.1:1.0); +# +# copy_to(db, sql, "df_mem"); +# SQLite.@register sql function diff_of_squares(x, y) +# x^2 - y^2 +# end; +# +# @chain db_table(sql, "df_mem") begin +# @select(value, percent) +# @mutate(plus3 = diff_of_squares(value, percent)) +# @collect +# end +# 10×3 DataFrame +# Row │ value percent plus3 +# │ Int64 Float64 Float64 +# ─────┼───────────────────────── +# 1 │ 1 0.1 0.99 +# 2 │ 2 0.2 3.96 +# 3 │ 3 0.3 8.91 +# 4 │ 4 0.4 15.84 +# 5 │ 5 0.5 24.75 +# 6 │ 1 0.6 0.64 +# 7 │ 2 0.7 3.51 +# 8 │ 3 0.8 8.36 +# 9 │ 4 0.9 15.19 +# 10 │ 5 1.0 24.0 +# ``` + +# ## How to create UDF in DuckDB +# Example coming soon.. \ No newline at end of file diff --git a/docs/mkdocs.yml b/docs/mkdocs.yml index 75407c2..d6b2b6a 100644 --- a/docs/mkdocs.yml +++ b/docs/mkdocs.yml @@ -126,4 +126,5 @@ nav: - "Writing Functions/Macros with TidierDB Chains" : "examples/generated/UserGuide/functions_pass_to_DB.md" - "Working With Larger than RAM Datasets" : "examples/generated/UserGuide/outofmemex.md" - "TidierDB.jl vs Ibis" : "examples/generated/UserGuide/ibis_comp.md" + - "Flexible Syntax and UDFs" : "examples/generated/UserGuide/udfs_ex.md" - "Reference" : "reference.md" \ No newline at end of file diff --git a/src/TBD_macros.jl b/src/TBD_macros.jl index 627dba8..8609f31 100644 --- a/src/TBD_macros.jl +++ b/src/TBD_macros.jl @@ -775,6 +775,6 @@ end """ $docstring_show_tables """ -function show_tables(con::DuckDB.DB) +function show_tables(con::Union{DuckDB.DB, DuckDB.Connection}) return DataFrame(DBInterface.execute(con, "SHOW TABLES")) end diff --git a/src/TidierDB.jl b/src/TidierDB.jl index 6c2a2ea..d41af89 100644 --- a/src/TidierDB.jl +++ b/src/TidierDB.jl @@ -18,7 +18,7 @@ using GZip @distinct, @left_join, @right_join, @inner_join, @count, @window_order, @window_frame, @show_query, @collect, @slice_max, @slice_min, @slice_sample, @rename, copy_to, duckdb_open, duckdb_connect, @semi_join, @full_join, @anti_join, connect, from_query, @interpolate, add_interp_parameter!, update_con, @head, - clickhouse, duckdb, sqlite, mysql, mssql, postgres, athena, snowflake, gbq, oracle, databricks, SQLQuery, show_tables + clickhouse, duckdb, sqlite, mysql, mssql, postgres, athena, snowflake, gbq, oracle, databricks, SQLQuery, show_tables, t abstract type SQLBackend end @@ -154,10 +154,9 @@ function finalize_query(sqlquery::SQLQuery) complete_query = replace(complete_query, "&&" => " AND ", "||" => " OR ", "FROM )" => ")" , "SELECT SELECT " => "SELECT ", "SELECT SELECT " => "SELECT ", "DISTINCT SELECT " => "DISTINCT ", "SELECT SELECT SELECT " => "SELECT ", "PARTITION BY GROUP BY" => "PARTITION BY", "GROUP BY GROUP BY" => "GROUP BY", "HAVING HAVING" => "HAVING", - r"var\"(.*?)\"" => s"\1") + r"var\"(.*?)\"" => s"\1", r"\"\\\$" => "\"\$") complete_query = replace(complete_query, ", AS " => " AS ") - if current_sql_mode[] == postgres() || current_sql_mode[] == duckdb() || current_sql_mode[] == mysql() || current_sql_mode[] == mssql() || current_sql_mode[] == clickhouse() || current_sql_mode[] == athena() || current_sql_mode[] == gbq() || current_sql_mode[] == oracle() || current_sql_mode[] == snowflake() || current_sql_mode[] == databricks() complete_query = replace(complete_query, "\"" => "'", "==" => "=") end @@ -172,7 +171,7 @@ end # DuckDB -function get_table_metadata(conn::DuckDB.DB, table_name::String) +function get_table_metadata(conn::Union{DuckDB.DB, DuckDB.Connection}, table_name::String) set_sql_mode(duckdb()); query = """ diff --git a/src/db_parsing.jl b/src/db_parsing.jl index 1e913ab..b10ea95 100644 --- a/src/db_parsing.jl +++ b/src/db_parsing.jl @@ -244,14 +244,13 @@ end function parse_across(expr, metadata) columns_expr, funcs_expr = expr.args[2], expr.args[3] + + # Existing column selection logic remains unchanged if isa(columns_expr, String) - # Split the string on commas and trim any whitespace around the names - columns_exprs = map(Symbol, split(strip(columns_expr), ", ")) + columns_exprs = map(Symbol, split(strip(columns_expr), ",")) elseif isa(columns_expr, Expr) && columns_expr.head == :tuple - # If columns_expr is a tuple expression, extract its arguments columns_exprs = columns_expr.args else - # Handle single columns or other expressions by wrapping in an array columns_exprs = [columns_expr] end @@ -261,13 +260,12 @@ function parse_across(expr, metadata) for func in funcs for col_name in resolved_columns - func_name = isa(func, Symbol) ? func : func.args[1] - result_name = Symbol(string(func_name), "_", col_name) - - # Ensure column names are treated as symbols (identifiers) col_symbol = Meta.parse(col_name) # Convert string back to symbol - - new_expr = :($result_name = $func_name($col_symbol)) + func_filled = insert_col_into_func(func, col_symbol) + # Specify "agg" to be skipped in the result name + func_name_str = generate_func_name(func, ["agg"]) + result_name = Symbol(func_name_str, "_", col_name) + new_expr = Expr(:(=), result_name, func_filled) push!(result_exprs, new_expr) end end @@ -276,6 +274,48 @@ function parse_across(expr, metadata) return combined_expr end +function insert_col_into_func(func_expr, col_symbol) + if isa(func_expr, Symbol) + # Simple function name; create a call with the column symbol + return Expr(:call, func_expr, col_symbol) + elseif isa(func_expr, Expr) && func_expr.head == :call + # Function call; recursively insert the column symbol into arguments + func_name = func_expr.args[1] + args = func_expr.args[2:end] + new_args = [insert_col_into_func(arg, col_symbol) for arg in args] + return Expr(:call, func_name, new_args...) + else + # Other expressions; return as-is + return func_expr + end +end +function generate_func_name(func_expr, skip_funcs=String[]) + if isa(func_expr, Symbol) + return string(func_expr) + elseif isa(func_expr, Expr) && func_expr.head == :call + func_name_expr = func_expr.args[1] + if isa(func_name_expr, Symbol) + func_name = string(func_name_expr) + else + func_name = generate_func_name(func_name_expr, skip_funcs) + end + # Process nested function names + nested_names = [generate_func_name(arg, skip_funcs) for arg in func_expr.args[2:end]] + # Exclude function names in skip_funcs + if func_name in skip_funcs + # Skip adding this function name + return join(nested_names, "_") + else + # Remove empty strings from nested_names + nested_names = filter(n -> n != "", nested_names) + return join([func_name; nested_names], "_") + end + else + return "" + end +end + + function parse_interpolation2(expr) MacroTools.postwalk(expr) do x if @capture(x, !!variable_Symbol) diff --git a/src/docstrings.jl b/src/docstrings.jl index 28689d4..557c31d 100644 --- a/src/docstrings.jl +++ b/src/docstrings.jl @@ -1185,4 +1185,67 @@ julia> show_tables(db) # there are no tables in when first loading so df below i │ String ─────┴──────── ``` +""" + + +const docstring_from_query = +""" + from_query(query) + +This is an alias for `t()`. Refer to SQL query without changing the underlying struct. This is an alternate and convenient way to refer to an exisiting DB table + +# Arguments +- `query`: The SQL query to reference + +# Examples +```julia + +julia> df = DataFrame(id = [string('A' + i ÷ 26, 'A' + i % 26) for i in 0:9], + groups = [i % 2 == 0 ? "aa" : "bb" for i in 1:10], + value = repeat(1:5, 2), + percent = 0.1:0.1:1.0); + +julia> db = connect(duckdb()); + +julia> copy_to(db, df, "df_mem"); + +julia> df_mem = db_table(db, "df_mem"); + + +julia> @chain t(df_mem) @collect +10×4 DataFrame + Row │ id groups value percent + │ String String Int64 Float64 +─────┼──────────────────────────────── + 1 │ AA bb 1 0.1 + 2 │ AB aa 2 0.2 + 3 │ AC bb 3 0.3 + 4 │ AD aa 4 0.4 + 5 │ AE bb 5 0.5 + 6 │ AF aa 1 0.6 + 7 │ AG bb 2 0.7 + 8 │ AH aa 3 0.8 + 9 │ AI bb 4 0.9 + 10 │ AJ aa 5 1.0 + +julia> query_part = @chain t(df_mem) @select groups:percent; + +julia> @chain t(query_part) @filter(value == 4) @collect +2×3 DataFrame + Row │ groups value percent + │ String? Int64? Float64? +─────┼─────────────────────────── + 1 │ aa 4 0.4 + 2 │ bb 4 0.9 + +julia> from_query(df_mem) +SQLQuery("", "df_mem", "", "", "", "", "", "", false, false, 4×4 DataFrame + Row │ name type current_selxn table_name + │ String? String? Int64 String +─────┼───────────────────────────────────────────── + 1 │ id VARCHAR 1 df_mem + 2 │ groups VARCHAR 1 df_mem + 3 │ value BIGINT 1 df_mem + 4 │ percent DOUBLE 1 df_mem, false, DuckDB.DB(":memory:"), TidierDB.CTE[], 0, nothing, "", "") +``` """ \ No newline at end of file diff --git a/src/parsing_athena.jl b/src/parsing_athena.jl index ecb81de..f62f1e1 100644 --- a/src/parsing_athena.jl +++ b/src/parsing_athena.jl @@ -82,13 +82,26 @@ function expr_to_sql_trino(expr, sq; from_summarize::Bool) window_clause = construct_window_clause(sq) return "VAR_SAMP($(string(a))) $(window_clause)" end - #elseif @capture(x, sql_agg(str_)) - # if from_summarize - # return error("sql_agg is only needed with aggregate functions in @mutate") - # else - # window_clause = construct_window_clause(sq) - # return "$(str) $(window_clause)" - # end + elseif isa(x, Expr) && x.head == :call && x.args[1] == :agg + args = x.args[2:end] # Capture all arguments to agg + if from_summarize + return error("agg is only needed with aggregate functions in @mutate") + else + window_clause = construct_window_clause(sq) + # Create the SQL string representation of the agg function call + arg_str = join(map(string, args), ", ") + str = "$(arg_str)" + return "$(str) $(window_clause)" + end + elseif !isempty(sq.window_order) && isa(x, Expr) && x.head == :call + function_name = x.args[1] # This will be `lead` + args = x.args[2:end] # Capture all arguments from the second position onward + window_clause = construct_window_clause(sq) + + # Create the SQL string representation of the function call + arg_str = join(map(string, args), ", ") # Join arguments into a string + str = "$(function_name)($(arg_str))" # Construct the function call string + return "$(str) $(window_clause)" #stringr functions, have to use function that removes _ so capture can capture name elseif @capture(x, strreplaceall(str_, pattern_, replace_)) return :(REGEXP_REPLACE($str, $pattern, $replace, 'g')) diff --git a/src/parsing_clickhouse.jl b/src/parsing_clickhouse.jl index 6739e6b..ecd492b 100644 --- a/src/parsing_clickhouse.jl +++ b/src/parsing_clickhouse.jl @@ -82,13 +82,26 @@ function expr_to_sql_clickhouse(expr, sq; from_summarize::Bool) window_clause = construct_window_clause(sq) return "VAR_SAMP($(string(a))) $(window_clause)" end - #elseif @capture(x, sql_agg(str_)) - # if from_summarize - # return error("sql_agg is only needed with aggregate functions in @mutate") - # else - # window_clause = construct_window_clause(sq) - # return "$(str) $(window_clause)" - # end + elseif isa(x, Expr) && x.head == :call && x.args[1] == :agg + args = x.args[2:end] # Capture all arguments to agg + if from_summarize + return error("agg is only needed with aggregate functions in @mutate") + else + window_clause = construct_window_clause(sq) + # Create the SQL string representation of the agg function call + arg_str = join(map(string, args), ", ") + str = "$(arg_str)" + return "$(str) $(window_clause)" + end + elseif !isempty(sq.window_order) && isa(x, Expr) && x.head == :call + function_name = x.args[1] # This will be `lead` + args = x.args[2:end] # Capture all arguments from the second position onward + window_clause = construct_window_clause(sq) + + # Create the SQL string representation of the function call + arg_str = join(map(string, args), ", ") # Join arguments into a string + str = "$(function_name)($(arg_str))" # Construct the function call string + return "$(str) $(window_clause)" #stringr functions, have to use function that removes _ so capture can capture name elseif @capture(x, strreplaceall(str_, pattern_, replace_)) return :(replaceRegexpAll($str, $pattern, $replace)) diff --git a/src/parsing_duckdb.jl b/src/parsing_duckdb.jl index a8ce019..2b4c4c5 100644 --- a/src/parsing_duckdb.jl +++ b/src/parsing_duckdb.jl @@ -81,13 +81,26 @@ function expr_to_sql_duckdb(expr, sq; from_summarize::Bool) window_clause = construct_window_clause(sq) return "VAR_SAMP($(string(a))) $(window_clause)" end - #elseif @capture(x, sql_agg(str_)) - # if from_summarize - # return error("sql_agg is only needed with aggregate functions in @mutate") - # else - # window_clause = construct_window_clause(sq) - # return "$(str) $(window_clause)" - # end + elseif isa(x, Expr) && x.head == :call && x.args[1] == :agg + args = x.args[2:end] # Capture all arguments to agg + if from_summarize + return error("agg is only needed with aggregate functions in @mutate") + else + window_clause = construct_window_clause(sq) + # Create the SQL string representation of the agg function call + arg_str = join(map(string, args), ", ") + str = "$(arg_str)" + return "$(str) $(window_clause)" + end + elseif !isempty(sq.window_order) && isa(x, Expr) && x.head == :call + function_name = x.args[1] # This will be `lead` + args = x.args[2:end] # Capture all arguments from the second position onward + window_clause = construct_window_clause(sq) + + # Create the SQL string representation of the function call + arg_str = join(map(string, args), ", ") # Join arguments into a string + str = "$(function_name)($(arg_str))" # Construct the function call string + return "$(str) $(window_clause)" #stringr functions, have to use function that removes _ so capture can capture name elseif @capture(x, strreplaceall(str_, pattern_, replace_)) return :(REGEXP_REPLACE($str, $pattern, $replace, 'g')) diff --git a/src/parsing_gbq.jl b/src/parsing_gbq.jl index fdeec20..e990596 100644 --- a/src/parsing_gbq.jl +++ b/src/parsing_gbq.jl @@ -111,13 +111,26 @@ function expr_to_sql_gbq(expr, sq; from_summarize::Bool) window_clause = construct_window_clause(sq) return "VAR_SAMP($(string(a))) $(window_clause)" end - #elseif @capture(x, sql_agg(str_)) - # if from_summarize - # return error("sql_agg is only needed with aggregate functions in @mutate") - # else - # window_clause = construct_window_clause(sq) - # return "$(str) $(window_clause)" - # end + elseif isa(x, Expr) && x.head == :call && x.args[1] == :agg + args = x.args[2:end] # Capture all arguments to agg + if from_summarize + return error("agg is only needed with aggregate functions in @mutate") + else + window_clause = construct_window_clause(sq) + # Create the SQL string representation of the agg function call + arg_str = join(map(string, args), ", ") + str = "$(arg_str)" + return "$(str) $(window_clause)" + end + elseif !isempty(sq.window_order) && isa(x, Expr) && x.head == :call + function_name = x.args[1] # This will be `lead` + args = x.args[2:end] # Capture all arguments from the second position onward + window_clause = construct_window_clause(sq) + + # Create the SQL string representation of the function call + arg_str = join(map(string, args), ", ") # Join arguments into a string + str = "$(function_name)($(arg_str))" # Construct the function call string + return "$(str) $(window_clause)" #stringr functions, have to use function that removes _ so capture can capture name elseif @capture(x, strreplaceall(str_, pattern_, replace_)) return :(REGEXP_REPLACE($str, $pattern, $replace, 'g')) diff --git a/src/parsing_mssql.jl b/src/parsing_mssql.jl index 4f07dd7..4f01641 100644 --- a/src/parsing_mssql.jl +++ b/src/parsing_mssql.jl @@ -82,13 +82,26 @@ function expr_to_sql_mssql(expr, sq; from_summarize::Bool) window_clause = construct_window_clause(sq) return "VAR_SAMP($(string(a))) $(window_clause)" end - #elseif @capture(x, sql_agg(str_)) - # if from_summarize - # return error("sql_agg is only needed with aggregate functions in @mutate") - # else - # window_clause = construct_window_clause(sq) - # return "$(str) $(window_clause)" - # end + elseif isa(x, Expr) && x.head == :call && x.args[1] == :agg + args = x.args[2:end] # Capture all arguments to agg + if from_summarize + return error("agg is only needed with aggregate functions in @mutate") + else + window_clause = construct_window_clause(sq) + # Create the SQL string representation of the agg function call + arg_str = join(map(string, args), ", ") + str = "$(arg_str)" + return "$(str) $(window_clause)" + end + elseif !isempty(sq.window_order) && isa(x, Expr) && x.head == :call + function_name = x.args[1] # This will be `lead` + args = x.args[2:end] # Capture all arguments from the second position onward + window_clause = construct_window_clause(sq) + + # Create the SQL string representation of the function call + arg_str = join(map(string, args), ", ") # Join arguments into a string + str = "$(function_name)($(arg_str))" # Construct the function call string + return "$(str) $(window_clause)" #stringr functions, have to use function that removes _ so capture can capture name elseif @capture(x, strreplaceall(str_, pattern_, replace_)) return :(REPLACE($str, $pattern, $replace)) diff --git a/src/parsing_mysql.jl b/src/parsing_mysql.jl index 8a150fe..3ed9989 100644 --- a/src/parsing_mysql.jl +++ b/src/parsing_mysql.jl @@ -82,13 +82,26 @@ function expr_to_sql_mysql(expr, sq; from_summarize::Bool) window_clause = construct_window_clause(sq) return "VAR_SAMP($(string(a))) $(window_clause)" end - #elseif @capture(x, sql_agg(str_)) - # if from_summarize - # return error("sql_agg is only needed with aggregate functions in @mutate") - # else - # window_clause = construct_window_clause(sq) - # return "$(str) $(window_clause)" - # end + elseif isa(x, Expr) && x.head == :call && x.args[1] == :agg + args = x.args[2:end] # Capture all arguments to agg + if from_summarize + return error("agg is only needed with aggregate functions in @mutate") + else + window_clause = construct_window_clause(sq) + # Create the SQL string representation of the agg function call + arg_str = join(map(string, args), ", ") + str = "$(arg_str)" + return "$(str) $(window_clause)" + end + elseif !isempty(sq.window_order) && isa(x, Expr) && x.head == :call + function_name = x.args[1] # This will be `lead` + args = x.args[2:end] # Capture all arguments from the second position onward + window_clause = construct_window_clause(sq) + + # Create the SQL string representation of the function call + arg_str = join(map(string, args), ", ") # Join arguments into a string + str = "$(function_name)($(arg_str))" # Construct the function call string + return "$(str) $(window_clause)" #stringr functions, have to use function that removes _ so capture can capture name elseif @capture(x, strreplaceall(str_, pattern_, replace_)) return :(REGEXP_REPLACE($str, $pattern, $replace)) diff --git a/src/parsing_oracle.jl b/src/parsing_oracle.jl index b6c94f2..140f865 100644 --- a/src/parsing_oracle.jl +++ b/src/parsing_oracle.jl @@ -82,13 +82,26 @@ function expr_to_sql_oracle(expr, sq; from_summarize::Bool) window_clause = construct_window_clause(sq) return "VAR_SAMP($(string(a))) $(window_clause)" end - #elseif @capture(x, sql_agg(str_)) - # if from_summarize - # return error("sql_agg is only needed with aggregate functions in @mutate") - # else - # window_clause = construct_window_clause(sq) - # return "$(str) $(window_clause)" - # end + elseif @capture(x, Expr(:call, :agg, args...)) + if from_summarize + return error("agg is only needed with aggregate functions in @mutate") + else + window_clause = construct_window_clause(sq) + + # Create the SQL string representation of the aggregate function call + arg_str = join(map(string, args), ", ") # Join arguments into a string + str = "agg($(arg_str))" # Construct the function call string + return "$(str) $(window_clause)" + end + elseif !isempty(sq.window_order) && isa(x, Expr) && x.head == :call + function_name = x.args[1] # This will be `lead` + args = x.args[2:end] # Capture all arguments from the second position onward + window_clause = construct_window_clause(sq) + + # Create the SQL string representation of the function call + arg_str = join(map(string, args), ", ") # Join arguments into a string + str = "$(function_name)($(arg_str))" # Construct the function call string + return "$(str) $(window_clause)" #stringr functions, have to use function that removes _ so capture can capture name elseif @capture(x, strreplaceall(str_, pattern_, replace_)) return :(REPLACE($str, $pattern, $replace)) diff --git a/src/parsing_postgres.jl b/src/parsing_postgres.jl index 7aeb318..b7b94a6 100644 --- a/src/parsing_postgres.jl +++ b/src/parsing_postgres.jl @@ -82,13 +82,26 @@ function expr_to_sql_postgres(expr, sq; from_summarize::Bool) window_clause = construct_window_clause(sq) return "VAR_SAMP($(string(a))) $(window_clause)" end - #elseif @capture(x, sql_agg(str_)) - # if from_summarize - # return error("sql_agg is only needed with aggregate functions in @mutate") - # else - # window_clause = construct_window_clause(sq) - # return "$(str) $(window_clause)" - # end + elseif isa(x, Expr) && x.head == :call && x.args[1] == :agg + args = x.args[2:end] # Capture all arguments to agg + if from_summarize + return error("agg is only needed with aggregate functions in @mutate") + else + window_clause = construct_window_clause(sq) + # Create the SQL string representation of the agg function call + arg_str = join(map(string, args), ", ") + str = "$(arg_str)" + return "$(str) $(window_clause)" + end + elseif !isempty(sq.window_order) && isa(x, Expr) && x.head == :call + function_name = x.args[1] # This will be `lead` + args = x.args[2:end] # Capture all arguments from the second position onward + window_clause = construct_window_clause(sq) + + # Create the SQL string representation of the function call + arg_str = join(map(string, args), ", ") # Join arguments into a string + str = "$(function_name)($(arg_str))" # Construct the function call string + return "$(str) $(window_clause)" #stringr functions, have to use function that removes _ so capture can capture name elseif @capture(x, strreplaceall(str_, pattern_, replace_)) return :(REGEXP_REPLACE($str, $pattern, $replace, 'g')) diff --git a/src/parsing_snowflake.jl b/src/parsing_snowflake.jl index 30a10b6..a24f150 100644 --- a/src/parsing_snowflake.jl +++ b/src/parsing_snowflake.jl @@ -83,13 +83,26 @@ function expr_to_sql_snowflake(expr, sq; from_summarize::Bool) window_clause = construct_window_clause(sq) return "VAR_SAMP($(string(a))) $(window_clause)" end - #elseif @capture(x, sql_agg(str_)) - # if from_summarize - # return error("sql_agg is only needed with aggregate functions in @mutate") - # else - # window_clause = construct_window_clause(sq) - # return "$(str) $(window_clause)" - # end + elseif isa(x, Expr) && x.head == :call && x.args[1] == :agg + args = x.args[2:end] # Capture all arguments to agg + if from_summarize + return error("agg is only needed with aggregate functions in @mutate") + else + window_clause = construct_window_clause(sq) + # Create the SQL string representation of the agg function call + arg_str = join(map(string, args), ", ") + str = "$(arg_str)" + return "$(str) $(window_clause)" + end + elseif !isempty(sq.window_order) && isa(x, Expr) && x.head == :call + function_name = x.args[1] # This will be `lead` + args = x.args[2:end] # Capture all arguments from the second position onward + window_clause = construct_window_clause(sq) + + # Create the SQL string representation of the function call + arg_str = join(map(string, args), ", ") # Join arguments into a string + str = "$(function_name)($(arg_str))" # Construct the function call string + return "$(str) $(window_clause)" #stringr functions, have to use function that removes _ so capture can capture name elseif @capture(x, strreplaceall(str_, pattern_, replace_)) return :(REGEXP_REPLACE($str, $pattern, $replace, 'g')) diff --git a/src/parsing_sqlite.jl b/src/parsing_sqlite.jl index c6a2af1..3ba1790 100644 --- a/src/parsing_sqlite.jl +++ b/src/parsing_sqlite.jl @@ -55,11 +55,15 @@ function expr_to_sql_lite(expr, sq; from_summarize::Bool) window_clause = construct_window_clause(sq, from_cumsum = true) return "SUM($(string(a))) $(window_clause)" end - elseif @capture(x, sql_agg(str_)) + elseif isa(x, Expr) && x.head == :call && x.args[1] == :agg + args = x.args[2:end] # Capture all arguments to agg if from_summarize - return error("sql_agg is only needed with aggregate functions in @mutate") + return error("agg is only needed with aggregate functions in @mutate") else window_clause = construct_window_clause(sq) + # Create the SQL string representation of the agg function call + arg_str = join(map(string, args), ", ") + str = "$(arg_str)" return "$(str) $(window_clause)" end # exc_capture_bug used above to allow proper _ function name capturing diff --git a/src/structs.jl b/src/structs.jl index 77e786d..9ba4df3 100644 --- a/src/structs.jl +++ b/src/structs.jl @@ -101,4 +101,6 @@ function from_query(query::TidierDB.SQLQuery) ch_settings = query.ch_settings ) return new_query -end \ No newline at end of file +end + +t(table) = from_query(table) \ No newline at end of file