Skip to content

Commit

Permalink
feat(transformer): ✨ Add fit, transform and fit_transform to al…
Browse files Browse the repository at this point in the history
…l `Transformers`, along with API and caching simplificatons.
  • Loading branch information
Erik Båvenstrand committed Jul 26, 2023
1 parent 574bde4 commit 5cc4ebc
Show file tree
Hide file tree
Showing 18 changed files with 399 additions and 253 deletions.
67 changes: 33 additions & 34 deletions examples/Australia Rain Classification.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -21,7 +21,7 @@
},
{
"cell_type": "code",
"execution_count": 3,
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -50,7 +50,7 @@
},
{
"cell_type": "code",
"execution_count": 4,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -78,16 +78,16 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2023-07-17 16:51:03] [\u001b[1;32mINFO\u001b[0m] Attempting to fetch Kaggle API credentials from environment variables 'KAGGLE_USERNAME' and 'KAGGLE_KEY'. \u001b[1m(kaggle_ingester.py:74)\u001b[0m\n",
"[2023-07-17 16:51:03] [\u001b[1;32mINFO\u001b[0m] Kaggle API credentials not found in environment variables, attempting to fetch from fallback path at ~/.kaggle/kaggle.json. \u001b[1m(kaggle_ingester.py:82)\u001b[0m\n",
"[2023-07-17 16:51:03] [\u001b[1;32mINFO\u001b[0m] Kaggle credentials successfully fetched. \u001b[1m(kaggle_ingester.py:91)\u001b[0m\n"
"[2023-07-26 14:51:32] [\u001b[1;32mINFO\u001b[0m] Attempting to fetch Kaggle API credentials from environment variables 'KAGGLE_USERNAME' and 'KAGGLE_KEY'. \u001b[1m(kaggle_ingester.py:74)\u001b[0m\n",
"[2023-07-26 14:51:32] [\u001b[1;32mINFO\u001b[0m] Kaggle API credentials not found in environment variables, attempting to fetch from fallback path at ~/.kaggle/kaggle.json. \u001b[1m(kaggle_ingester.py:82)\u001b[0m\n",
"[2023-07-26 14:51:32] [\u001b[1;32mINFO\u001b[0m] Kaggle credentials successfully fetched. \u001b[1m(kaggle_ingester.py:91)\u001b[0m\n"
]
}
],
Expand Down Expand Up @@ -170,14 +170,14 @@
" ),\n",
" TransformStep(\n",
" composite_transformer,\n",
" fit=True,\n",
" action=\"fit_transform\",\n",
" inputs=[\"df_selected_train_validate\"],\n",
" outputs=[\"df_transformed_train_validate\"],\n",
" outputs=[\"composite_transformer\", \"df_transformed_train_validate\"],\n",
" cache_group=\"fit_transform_train_validate\",\n",
" ),\n",
" TransformStep(\n",
" composite_transformer,\n",
" fit=False,\n",
" action=\"transform\",\n",
" inputs=[\"df_selected_test\"],\n",
" outputs=[\"df_transformed_test\"],\n",
" cache_group=\"fit_transform_test\",\n",
Expand All @@ -203,30 +203,29 @@
"name": "stdout",
"output_type": "stream",
"text": [
"[2023-07-17 16:48:43] [\u001b[1;32mINFO\u001b[0m] No data container provided. Creating an empty one. \u001b[1m(pipeline.py:77)\u001b[0m\n",
"[2023-07-17 16:48:43] [\u001b[1;32mINFO\u001b[0m] Executing step 1/7: IngestStep. \u001b[1m(pipeline.py:81)\u001b[0m\n",
"[2023-07-17 16:48:43] [\u001b[1;32mINFO\u001b[0m] \u001b[32mCache Hit\u001b[0m: Local dataset is up to date with Kaggle, skipping download. \u001b[1m(kaggle_ingester.py:279)\u001b[0m\n",
"[2023-07-17 16:48:43] [\u001b[1;32mINFO\u001b[0m] Finished step 1/7 execution. \u001b[1m(pipeline.py:83)\u001b[0m\n",
"[2023-07-17 16:48:43] [\u001b[1;32mINFO\u001b[0m] Executing step 2/7: ConvertStep. \u001b[1m(pipeline.py:81)\u001b[0m\n",
"[2023-07-17 16:48:44] [\u001b[1;32mINFO\u001b[0m] \u001b[32mCache Hit\u001b[0m (LRUCache) CSVToVaexConverter.convert: Using cached output. \u001b[1m(cache_mixin.py:160)\u001b[0m\n",
"[2023-07-17 16:48:44] [\u001b[1;32mINFO\u001b[0m] Finished step 2/7 execution. \u001b[1m(pipeline.py:83)\u001b[0m\n",
"[2023-07-17 16:48:44] [\u001b[1;32mINFO\u001b[0m] Executing step 3/7: SplitStep. \u001b[1m(pipeline.py:81)\u001b[0m\n",
"[2023-07-17 16:48:44] [\u001b[1;32mINFO\u001b[0m] \u001b[32mCache Hit\u001b[0m (LRUCache) RandomSplitter.split: Using cached output. \u001b[1m(cache_mixin.py:160)\u001b[0m\n",
"[2023-07-17 16:48:44] [\u001b[1;32mINFO\u001b[0m] Finished step 3/7 execution. \u001b[1m(pipeline.py:83)\u001b[0m\n",
"[2023-07-17 16:48:44] [\u001b[1;32mINFO\u001b[0m] Executing step 4/7: FeatureSelectStep. \u001b[1m(pipeline.py:81)\u001b[0m\n",
"[2023-07-17 16:48:45] [\u001b[1;32mINFO\u001b[0m] \u001b[32mCache Hit\u001b[0m (LRUCache) CompositeFeatureSelector.select_features: Using cached output. \u001b[1m(cache_mixin.py:160)\u001b[0m\n",
"[2023-07-17 16:48:45] [\u001b[1;32mINFO\u001b[0m] Loading feature selector from data/jsphyg/weather-dataset-rattle-package/feature_select/CompositeFeatureSelector.select_features.fit_transform_train_validate.8423e84e51cd97f9969e6d6930e0e515.feature_selector. \u001b[1m(base_feature_selector.py:179)\u001b[0m\n",
"[2023-07-17 16:48:45] [\u001b[1;32mINFO\u001b[0m] Finished step 4/7 execution. \u001b[1m(pipeline.py:83)\u001b[0m\n",
"[2023-07-17 16:48:45] [\u001b[1;32mINFO\u001b[0m] Executing step 5/7: FeatureSelectStep. \u001b[1m(pipeline.py:81)\u001b[0m\n",
"[2023-07-17 16:48:45] [\u001b[1;32mINFO\u001b[0m] \u001b[32mCache Hit\u001b[0m (LRUCache) CompositeFeatureSelector.select_features: Using cached output. \u001b[1m(cache_mixin.py:160)\u001b[0m\n",
"[2023-07-17 16:48:45] [\u001b[1;32mINFO\u001b[0m] Finished step 5/7 execution. \u001b[1m(pipeline.py:83)\u001b[0m\n",
"[2023-07-17 16:48:45] [\u001b[1;32mINFO\u001b[0m] Executing step 6/7: TransformStep. \u001b[1m(pipeline.py:81)\u001b[0m\n",
"[2023-07-17 16:48:45] [\u001b[1;32mINFO\u001b[0m] \u001b[32mCache Hit\u001b[0m (LRUCache) CompositeTransformer.transform: Using cached output. \u001b[1m(cache_mixin.py:160)\u001b[0m\n",
"[2023-07-17 16:48:45] [\u001b[1;32mINFO\u001b[0m] Loading transformer from data/jsphyg/weather-dataset-rattle-package/transform/CompositeTransformer.transform.fit_transform_train_validate.457fa6ca1a7c824883a780ef87784946.transformer. \u001b[1m(base_transformer.py:124)\u001b[0m\n",
"[2023-07-17 16:48:45] [\u001b[1;32mINFO\u001b[0m] Finished step 6/7 execution. \u001b[1m(pipeline.py:83)\u001b[0m\n",
"[2023-07-17 16:48:45] [\u001b[1;32mINFO\u001b[0m] Executing step 7/7: TransformStep. \u001b[1m(pipeline.py:81)\u001b[0m\n",
"[2023-07-17 16:48:45] [\u001b[1;32mINFO\u001b[0m] \u001b[32mCache Hit\u001b[0m (LRUCache) CompositeTransformer.transform: Using cached output. \u001b[1m(cache_mixin.py:160)\u001b[0m\n",
"[2023-07-17 16:48:45] [\u001b[1;32mINFO\u001b[0m] Finished step 7/7 execution. \u001b[1m(pipeline.py:83)\u001b[0m\n"
"[2023-07-26 14:51:39] [\u001b[1;32mINFO\u001b[0m] No data container provided. Creating an empty one. \u001b[1m(pipeline.py:77)\u001b[0m\n",
"[2023-07-26 14:51:39] [\u001b[1;32mINFO\u001b[0m] Executing step 1/7: IngestStep. \u001b[1m(pipeline.py:81)\u001b[0m\n",
"[2023-07-26 14:51:39] [\u001b[1;32mINFO\u001b[0m] \u001b[32mCache Hit\u001b[0m: Local dataset is up to date with Kaggle, skipping download. \u001b[1m(kaggle_ingester.py:279)\u001b[0m\n",
"[2023-07-26 14:51:39] [\u001b[1;32mINFO\u001b[0m] Finished step 1/7 execution. \u001b[1m(pipeline.py:83)\u001b[0m\n",
"[2023-07-26 14:51:39] [\u001b[1;32mINFO\u001b[0m] Executing step 2/7: ConvertStep. \u001b[1m(pipeline.py:81)\u001b[0m\n",
"[2023-07-26 14:51:40] [\u001b[1;32mINFO\u001b[0m] \u001b[32mCache Hit\u001b[0m (LRUCache) CSVToVaexConverter.convert: Using cached output. \u001b[1m(cache_mixin.py:160)\u001b[0m\n",
"[2023-07-26 14:51:40] [\u001b[1;32mINFO\u001b[0m] Finished step 2/7 execution. \u001b[1m(pipeline.py:83)\u001b[0m\n",
"[2023-07-26 14:51:40] [\u001b[1;32mINFO\u001b[0m] Executing step 3/7: SplitStep. \u001b[1m(pipeline.py:81)\u001b[0m\n",
"[2023-07-26 14:51:40] [\u001b[1;32mINFO\u001b[0m] \u001b[32mCache Hit\u001b[0m (LRUCache) RandomSplitter.split: Using cached output. \u001b[1m(cache_mixin.py:160)\u001b[0m\n",
"[2023-07-26 14:51:40] [\u001b[1;32mINFO\u001b[0m] Finished step 3/7 execution. \u001b[1m(pipeline.py:83)\u001b[0m\n",
"[2023-07-26 14:51:40] [\u001b[1;32mINFO\u001b[0m] Executing step 4/7: FeatureSelectStep. \u001b[1m(pipeline.py:81)\u001b[0m\n",
"[2023-07-26 14:51:40] [\u001b[1;32mINFO\u001b[0m] \u001b[32mCache Hit\u001b[0m (LRUCache) CompositeFeatureSelector.select_features: Using cached output. \u001b[1m(cache_mixin.py:160)\u001b[0m\n",
"[2023-07-26 14:51:40] [\u001b[1;32mINFO\u001b[0m] Loading feature selector from data/jsphyg/weather-dataset-rattle-package/feature_select/CompositeFeatureSelector.select_features.fit_transform_train_validate.8423e84e51cd97f9969e6d6930e0e515.feature_selector. \u001b[1m(base_feature_selector.py:179)\u001b[0m\n",
"[2023-07-26 14:51:40] [\u001b[1;32mINFO\u001b[0m] Finished step 4/7 execution. \u001b[1m(pipeline.py:83)\u001b[0m\n",
"[2023-07-26 14:51:40] [\u001b[1;32mINFO\u001b[0m] Executing step 5/7: FeatureSelectStep. \u001b[1m(pipeline.py:81)\u001b[0m\n",
"[2023-07-26 14:51:40] [\u001b[1;32mINFO\u001b[0m] \u001b[32mCache Hit\u001b[0m (LRUCache) CompositeFeatureSelector.select_features: Using cached output. \u001b[1m(cache_mixin.py:160)\u001b[0m\n",
"[2023-07-26 14:51:40] [\u001b[1;32mINFO\u001b[0m] Finished step 5/7 execution. \u001b[1m(pipeline.py:83)\u001b[0m\n",
"[2023-07-26 14:51:40] [\u001b[1;32mINFO\u001b[0m] Executing step 6/7: TransformStep. \u001b[1m(pipeline.py:81)\u001b[0m\n",
"[2023-07-26 14:51:40] [\u001b[1;32mINFO\u001b[0m] \u001b[32mCache Hit\u001b[0m (LRUCache) CompositeTransformer.fit_transform: Using cached output. \u001b[1m(cache_mixin.py:160)\u001b[0m\n",
"[2023-07-26 14:51:40] [\u001b[1;32mINFO\u001b[0m] Finished step 6/7 execution. \u001b[1m(pipeline.py:83)\u001b[0m\n",
"[2023-07-26 14:51:40] [\u001b[1;32mINFO\u001b[0m] Executing step 7/7: TransformStep. \u001b[1m(pipeline.py:81)\u001b[0m\n",
"[2023-07-26 14:51:40] [\u001b[1;32mINFO\u001b[0m] \u001b[32mCache Hit\u001b[0m (LRUCache) CompositeTransformer.transform: Using cached output. \u001b[1m(cache_mixin.py:160)\u001b[0m\n",
"[2023-07-26 14:51:40] [\u001b[1;32mINFO\u001b[0m] Finished step 7/7 execution. \u001b[1m(pipeline.py:83)\u001b[0m\n"
]
}
],
Expand Down
Loading

0 comments on commit 5cc4ebc

Please sign in to comment.