Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix option handling in pipeline.jl #70

Merged
merged 9 commits into from
Aug 9, 2024
84 changes: 37 additions & 47 deletions src/pipeline.jl
Original file line number Diff line number Diff line change
Expand Up @@ -46,22 +46,17 @@
Store() = Store(":memory:")
DEFAULT = Store()

function tmp_tbl_name(source::String)
function get_tbl_name(source::String, tmp::Bool)
suvayu marked this conversation as resolved.
Show resolved Hide resolved
name, _ = splitext(basename(source))
name = replace(name, r"[ ()\[\]{}\\+,.-]+" => "_")
"t_$(name)"
tmp ? "t_$(name)" : name
end

# TODO: support "CREATE OR REPLACE" & "IF NOT EXISTS" for all create_* functions

function _create_tbl_impl(con::DB, query::String; name::String, tmp::Bool, show::Bool)
if length(name) > 0
DBInterface.execute(con, "CREATE $(tmp ? "TEMP" : "") TABLE $name AS $query")
return show ? DF.DataFrame(DBInterface.execute(con, "SELECT * FROM $name")) : name
else # only show
res = DBInterface.execute(con, query)
return DF.DataFrame(res)
end
DBInterface.execute(con, "CREATE $(tmp ? "TEMP" : "") TABLE $name AS $query")
suvayu marked this conversation as resolved.
Show resolved Hide resolved
return show ? DF.DataFrame(DBInterface.execute(con, "SELECT * FROM $name")) : name
end

"""
Expand All @@ -87,9 +82,8 @@
setting the `tmp` flag, i.e. the table is session scoped. It is
deleted when you close the connection with DuckDB.

When `show` is `false`, and `name` was not provided, a table name
autotomatically generated from the basename of the filename is used.
This also unconditionally sets the temporary table flag to `true`.
When `show` is `false`, and `name` was not provided, a table name is
automatically generated from the basename of the filename.

To enforce data types of a column, you can provide the keyword
argument `types` as a dictionary with column names as keys, and
Expand All @@ -111,12 +105,11 @@
end
query = fmt_select(fmt_read(source; _read_opts..., kwargs...))

if (length(name) == 0) && !show
tmp = true
name = tmp_tbl_name(source)
if (length(name) == 0)
suvayu marked this conversation as resolved.
Show resolved Hide resolved
name = get_tbl_name(source, tmp)
end

return _create_tbl_impl(con, query; name = name, tmp = tmp, show = show)
return _create_tbl_impl(con, query; name, tmp, show)
end

"""
Expand All @@ -126,7 +119,7 @@
alt_source::String;
on::Vector{Symbol},
cols::Vector{Symbol},
variant::String = "",
name::String = "",
fill::Bool = true,
fill_values::Union{Missing,Dict} = missing,
tmp::Bool = false,
Expand All @@ -140,7 +133,7 @@
Either sources can be a table in DuckDB, or a file source as in the
single source variant.

The resulting table is saved as the table `variant`. The name of the
The resulting table is saved as the table `name`. The name of the
created table is returned. The behaviour for `tmp`, and `show` are
identical to the single source variant.

Expand Down Expand Up @@ -168,7 +161,7 @@
alt_source::String;
on::Vector{Symbol},
cols::Vector{Symbol},
variant::String = "",
name::String = "",
fill::Bool = true,
fill_values::Union{Missing, Dict} = missing,
tmp::Bool = false,
Expand All @@ -177,12 +170,11 @@
sources = [fmt_source(con, src) for src in (base_source, alt_source)]
query = fmt_join(sources...; on = on, cols = cols, fill = fill, fill_values = fill_values)

if (length(variant) == 0) && !show
tmp = true
variant = tmp_tbl_name(alt_source)
if (length(name) == 0)
suvayu marked this conversation as resolved.
Show resolved Hide resolved
name = get_tbl_name(alt_source, tmp)
end

return _create_tbl_impl(con, query; name = variant, tmp = tmp, show = show)
return _create_tbl_impl(con, query; name, tmp, show)
end

function _get_index(con::DB, source::String, on::Symbol)
Expand Down Expand Up @@ -216,15 +208,16 @@
source::String,
cols::Dict{Symbol,Vector{T}};
on::Symbol,
variant::String = "",
name::String,
tmp::Bool = false,
show::Bool = false,
) where T <: Union{Int64, Float64, String, Bool}

Create a table from a source (either a DuckDB table or a file), where
a column can be set to the vector provided by `vals`. This transform
is very similar to `create_tbl`, except that the alternate source is a
data structure in Julia.
columns can be set to vectors provided in a dictionary `cols`. The
keys are the new column names, and the vector values are the column
entries. This transform is very similar to `create_tbl`, except that
the alternate source is a data structure in Julia.

The resulting table is saved as the table `name`. The name of the
created table is returned.
Expand All @@ -237,7 +230,7 @@
source::String,
cols::Dict{Symbol, Vector{T}};
on::Symbol,
variant::String = "",
name::String,
tmp::Bool = false,
show::Bool = false,
) where {T <: Union{Int64, Float64, String, Bool}}
Expand Down Expand Up @@ -271,7 +264,7 @@
vals;
on = on,
col = first(keys(cols)),
variant = variant,
name = name,
tmp = tmp,
show = show,
)
Expand All @@ -283,17 +276,18 @@
source::String,
cols::Dict{Symbol, T};
on::Symbol,
col::Symbol,
suvayu marked this conversation as resolved.
Show resolved Hide resolved
name::String,
where_::String = "",
variant::String = "",
tmp::Bool = false,
show::Bool = false,
) where T

Create a table from a source (either a DuckDB table or a file), where
a column can be set to the value provided by `value`. Unlike the
vector variant of this function, all values of the column are set to
this value.
a column can be set to the values provided by the dictionary `cols`.
The keys are the column names, whereas the values are the column
entries. Note that in this case, all entries in a column are set to
the same value. Unlike the vector variant of this function, all
values of the column are set to this value.

All other options and behaviour are same as the vector variant of this
function.
Expand All @@ -304,36 +298,33 @@
source::String,
cols::Dict{Symbol, T};
on::Symbol,
name::String,
where_::String = "",
variant::String = "",
tmp::Bool = false,
show::Bool = false,
) where {T}
if (length(name) == 0)
suvayu marked this conversation as resolved.
Show resolved Hide resolved
name = get_tbl_name(source, tmp)

Check warning on line 307 in src/pipeline.jl

View check run for this annotation

Codecov / codecov/patch

src/pipeline.jl#L307

Added line #L307 was not covered by tests
end

# FIXME: accept NamedTuple|Dict as cols in stead of value & col
source = fmt_source(con, source)
subquery = fmt_select(source; cols...)
if length(where_) > 0
subquery *= " WHERE $(where_)"
end

# FIXME: resolve String|Symbol schizophrenic API
query = fmt_join(source, "($subquery)"; on = [on], cols = [keys(cols)...], fill = true)

if (length(variant) == 0) && !show
tmp = true
variant = tmp_tbl_name(source)
end

return _create_tbl_impl(con, query; name = variant, tmp = tmp, show = show)
return _create_tbl_impl(con, query; name = name, tmp = tmp, show = show)
abelsiqueira marked this conversation as resolved.
Show resolved Hide resolved
end

function set_tbl_col(
con::DB,
source::String;
on::Symbol,
col::Symbol,
name::String,
apply::Function,
variant::String = "",
tmp::Bool = false,
show::Bool = false,
) end
Expand All @@ -349,9 +340,8 @@
src = fmt_source(con, source)
query = "SELECT * FROM $src WHERE $expression"

if (length(name) == 0) && !show
tmp = true
name = tmp_tbl_name(source)
if (length(name) == 0)
suvayu marked this conversation as resolved.
Show resolved Hide resolved
name = get_tbl_name(source, tmp)

Check warning on line 344 in src/pipeline.jl

View check run for this annotation

Codecov / codecov/patch

src/pipeline.jl#L343-L344

Added lines #L343 - L344 were not covered by tests
end

return _create_tbl_impl(con, query; name = name, tmp = tmp, show = show)
Expand Down
Loading
Loading