Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix option handling in pipeline.jl #70

Merged
merged 9 commits into from
Aug 9, 2024
104 changes: 52 additions & 52 deletions src/pipeline.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,22 +46,25 @@
Store() = Store(":memory:")
DEFAULT = Store()

function tmp_tbl_name(source::String)
"""
get_tbl_name(source::String, tmp::Bool)

Generate table name from a filename by removing special characters.
If `tmp` is true, then the table name is prefixed by 't_'.

"""
function get_tbl_name(source::String, tmp::Bool)
suvayu marked this conversation as resolved.
Show resolved Hide resolved
name, _ = splitext(basename(source))
name = replace(name, r"[ ()\[\]{}\\+,.-]+" => "_")
"t_$(name)"
tmp ? "t_$(name)" : name
end

# TODO: support "CREATE OR REPLACE" & "IF NOT EXISTS" for all create_* functions

function _create_tbl_impl(con::DB, query::String; name::String, tmp::Bool, show::Bool)
if length(name) > 0
DBInterface.execute(con, "CREATE $(tmp ? "TEMP" : "") TABLE $name AS $query")
return show ? DF.DataFrame(DBInterface.execute(con, "SELECT * FROM $name")) : name
else # only show
res = DBInterface.execute(con, query)
return DF.DataFrame(res)
end
create_table_cmd = "CREATE" * (tmp ? " TEMP" : "") * " TABLE"
DBInterface.execute(con, "$create_table_cmd $name AS $query")
return show ? DF.DataFrame(DBInterface.execute(con, "SELECT * FROM $name")) : name
end

"""
Expand All @@ -87,9 +90,8 @@
setting the `tmp` flag, i.e. the table is session scoped. It is
deleted when you close the connection with DuckDB.

When `show` is `false`, and `name` was not provided, a table name
autotomatically generated from the basename of the filename is used.
This also unconditionally sets the temporary table flag to `true`.
When `show` is `false`, and `name` was not provided, a table name is
automatically generated from the basename of the filename.

To enforce data types of a column, you can provide the keyword
argument `types` as a dictionary with column names as keys, and
Expand All @@ -105,18 +107,17 @@
types = Dict(),
)
check_file(source) ? true : throw(FileNotFoundError(source))
if length(name) == 0
name = get_tbl_name(source, tmp)
end

kwargs = Dict{Symbol, String}()
if length(types) > 0
kwargs[:types] = "{" * join(("'$key': '$value'" for (key, value) in types), ",") * "}"
end
query = fmt_select(fmt_read(source; _read_opts..., kwargs...))

if (length(name) == 0) && !show
tmp = true
name = tmp_tbl_name(source)
end

return _create_tbl_impl(con, query; name = name, tmp = tmp, show = show)
return _create_tbl_impl(con, query; name, tmp, show)
end

"""
Expand All @@ -126,7 +127,7 @@
alt_source::String;
on::Vector{Symbol},
cols::Vector{Symbol},
variant::String = "",
name::String = "",
fill::Bool = true,
fill_values::Union{Missing,Dict} = missing,
tmp::Bool = false,
Expand All @@ -140,7 +141,7 @@
Either sources can be a table in DuckDB, or a file source as in the
single source variant.

The resulting table is saved as the table `variant`. The name of the
The resulting table is saved as the table `name`. The name of the
created table is returned. The behaviour for `tmp`, and `show` are
identical to the single source variant.

Expand Down Expand Up @@ -168,21 +169,20 @@
alt_source::String;
on::Vector{Symbol},
cols::Vector{Symbol},
variant::String = "",
name::String = "",
fill::Bool = true,
fill_values::Union{Missing, Dict} = missing,
tmp::Bool = false,
show::Bool = false,
)
if check_file(alt_source) && length(name) == 0
name = get_tbl_name(alt_source, tmp)
end

sources = [fmt_source(con, src) for src in (base_source, alt_source)]
query = fmt_join(sources...; on = on, cols = cols, fill = fill, fill_values = fill_values)

if (length(variant) == 0) && !show
tmp = true
variant = tmp_tbl_name(alt_source)
end

return _create_tbl_impl(con, query; name = variant, tmp = tmp, show = show)
return _create_tbl_impl(con, query; name, tmp, show)
end

function _get_index(con::DB, source::String, on::Symbol)
Expand Down Expand Up @@ -216,15 +216,16 @@
source::String,
cols::Dict{Symbol,Vector{T}};
on::Symbol,
variant::String = "",
name::String,
tmp::Bool = false,
show::Bool = false,
) where T <: Union{Int64, Float64, String, Bool}

Create a table from a source (either a DuckDB table or a file), where
a column can be set to the vector provided by `vals`. This transform
is very similar to `create_tbl`, except that the alternate source is a
data structure in Julia.
columns can be set to vectors provided in a dictionary `cols`. The
keys are the new column names, and the vector values are the column
entries. This transform is very similar to `create_tbl`, except that
the alternate source is a data structure in Julia.

The resulting table is saved as the table `name`. The name of the
created table is returned.
Expand All @@ -237,14 +238,17 @@
source::String,
cols::Dict{Symbol, Vector{T}};
on::Symbol,
variant::String = "",
name::String,
tmp::Bool = false,
show::Bool = false,
) where {T <: Union{Int64, Float64, String, Bool}}
# TODO: is it worth it to have the ability to set multiple
# columns? If such a feature is required, we can use
# cols::Dict{Symbol, Vector{Any}}, and get the cols and vals
# as: keys(cols), and values(cols)
if check_file(source) && length(name) == 0
name = get_tbl_name(source, tmp)

Check warning on line 250 in src/pipeline.jl

View check run for this annotation

Codecov / codecov/patch

src/pipeline.jl#L250

Added line #L250 was not covered by tests
end

# for now, support only one column
if length(cols) > 1
Expand All @@ -271,7 +275,7 @@
vals;
on = on,
col = first(keys(cols)),
variant = variant,
name = name,
tmp = tmp,
show = show,
)
Expand All @@ -283,17 +287,18 @@
source::String,
cols::Dict{Symbol, T};
on::Symbol,
col::Symbol,
suvayu marked this conversation as resolved.
Show resolved Hide resolved
name::String = "",
where_::String = "",
variant::String = "",
tmp::Bool = false,
show::Bool = false,
) where T

Create a table from a source (either a DuckDB table or a file), where
a column can be set to the value provided by `value`. Unlike the
vector variant of this function, all values of the column are set to
this value.
a column can be set to the values provided by the dictionary `cols`.
The keys are the column names, whereas the values are the column
entries. Note that in this case, all entries in a column are set to
the same value. Unlike the vector variant of this function, all
values of the column are set to this value.

All other options and behaviour are same as the vector variant of this
function.
Expand All @@ -304,36 +309,32 @@
source::String,
cols::Dict{Symbol, T};
on::Symbol,
name::String = "",
where_::String = "",
variant::String = "",
tmp::Bool = false,
show::Bool = false,
) where {T}
# FIXME: accept NamedTuple|Dict as cols in stead of value & col
if check_file(source) && length(name) == 0
name = get_tbl_name(source, tmp)
end

source = fmt_source(con, source)
subquery = fmt_select(source; cols...)
if length(where_) > 0
subquery *= " WHERE $(where_)"
end

# FIXME: resolve String|Symbol schizophrenic API
query = fmt_join(source, "($subquery)"; on = [on], cols = [keys(cols)...], fill = true)

if (length(variant) == 0) && !show
tmp = true
variant = tmp_tbl_name(source)
end

return _create_tbl_impl(con, query; name = variant, tmp = tmp, show = show)
return _create_tbl_impl(con, query; name = name, tmp = tmp, show = show)
abelsiqueira marked this conversation as resolved.
Show resolved Hide resolved
end

function set_tbl_col(
con::DB,
source::String;
on::Symbol,
col::Symbol,
name::String,
apply::Function,
variant::String = "",
tmp::Bool = false,
show::Bool = false,
) end
Expand All @@ -349,9 +350,8 @@
src = fmt_source(con, source)
query = "SELECT * FROM $src WHERE $expression"

if (length(name) == 0) && !show
tmp = true
name = tmp_tbl_name(source)
if check_file(source) && length(name) == 0
name = get_tbl_name(source, tmp)

Check warning on line 354 in src/pipeline.jl

View check run for this annotation

Codecov / codecov/patch

src/pipeline.jl#L353-L354

Added lines #L353 - L354 were not covered by tests
end

return _create_tbl_impl(con, query; name = name, tmp = tmp, show = show)
Expand Down
Loading
Loading