Skip to content

Commit

Permalink
fixed: SageMaker logic
Browse files Browse the repository at this point in the history
  • Loading branch information
iusztinpaul committed Aug 17, 2024
1 parent 5197515 commit 4adc27d
Show file tree
Hide file tree
Showing 7 changed files with 29 additions and 23 deletions.
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -165,4 +165,5 @@ cython_debug/
.vscode/**/launch.json

# Data
output/
output/
sagemaker_*.json
9 changes: 6 additions & 3 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,11 @@ help:

create-sagemaker-role:
@echo "Creating the SageMaker role..."
poetry run python llm_engineering/core/aws/create_sagemaker_role.py
poetry run python llm_engineering/core/aws/roles/create_sagemaker_role.py

create-sagemaker-execution-role:
@echo "Creating the SageMaker execution role..."
poetry run python llm_engineering/core/aws/create_sagemaker_execution_role.py
poetry run python llm_engineering/core/aws/roles/create_execution_role.py

deploy-inference-endpoint:
@echo "Deploying the inference endpoint..."
Expand All @@ -27,4 +27,7 @@ delete-inference-endpoint:
exit 1; \
fi
@echo "Deleting the inference endpoint and config..."
poetry run python llm_engineering/model/delete_inference_endpoint.py $(ENDPOINT_NAME)
poetry run python llm_engineering/model/delete_inference_endpoint.py $(ENDPOINT_NAME)

test-inference:
poetry run python -m llm_engineering.model.inference.test
15 changes: 7 additions & 8 deletions llm_engineering/core/aws/roles/create_sagemaker_role.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import json
from pathlib import Path

import boto3
from loguru import logger

from llm_engineering.settings import settings

Expand Down Expand Up @@ -33,19 +35,16 @@ def create_sagemaker_user(username, region_name="eu-central-1"):
response = iam.create_access_key(UserName=username)
access_key = response["AccessKey"]

print(f"User '{username}' created successfully.")
print(f"Access Key ID: {access_key['AccessKeyId']}")
print(f"Secret Access Key: {access_key['SecretAccessKey']}")
logger.info(f"User '{username}' successfully created.")
logger.info("Access Key ID and Secret Access Key successfully created.")

# Return the access key info
return {"AccessKeyId": access_key["AccessKeyId"], "SecretAccessKey": access_key["SecretAccessKey"]}


if __name__ == "__main__":
new_user = create_sagemaker_user("sagemaker-deployer-2")
new_user = create_sagemaker_user("sagemaker-deployer-3")

# Save the access keys to a file
with open("sagemaker_user_credentials.json", "w") as f:
with Path("sagemaker_user_credentials.json").open("w") as f:
json.dump(new_user, f)

print("Credentials saved to 'sagemaker_user_credentials.json'")
logger.info("Credentials saved to 'sagemaker_user_credentials.json'")
6 changes: 2 additions & 4 deletions llm_engineering/model/deploy/huggingface/config.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import json

from sagemaker.compute_resource_requirements.resource_requirements import (
ResourceRequirements,
)
from sagemaker.compute_resource_requirements.resource_requirements import ResourceRequirements

from llm_engineering.settings import settings

Expand All @@ -13,7 +11,7 @@
"MAX_TOTAL_TOKENS": json.dumps(settings.MAX_TOTAL_TOKENS), # Max length of the generation (including input text)
"MAX_BATCH_TOTAL_TOKENS": json.dumps(settings.MAX_BATCH_TOTAL_TOKENS),
"HUGGING_FACE_HUB_TOKEN": settings.HUGGING_FACE_HUB_TOKEN,
"MAX_BATCH_PREFILL_TOKENS": "25000",
"MAX_BATCH_PREFILL_TOKENS": "10000",
# 'HF_MODEL_QUANTIZE': "bitsandbytes",
}

Expand Down
1 change: 1 addition & 0 deletions llm_engineering/model/inference/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def execute(self) -> str:
},
)
extraction = self.llm.inference()[0]["generated_text"]

return extraction


Expand Down
6 changes: 5 additions & 1 deletion llm_engineering/model/inference/test.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from loguru import logger

from llm_engineering.model.inference.inference import LLMInferenceSagemakerEndpoint
from llm_engineering.model.inference.run import InferenceExecutor
from llm_engineering.settings import settings
Expand All @@ -8,4 +10,6 @@
llm = LLMInferenceSagemakerEndpoint(
endpoint_name=settings.SAGEMAKER_ENDPOINT_INFERENCE, inference_component_name=None
)
InferenceExecutor(llm, text, prompt).execute()
answer = InferenceExecutor(llm, text, prompt).execute()

logger.info(answer)
12 changes: 6 additions & 6 deletions llm_engineering/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class Config:

# Selenium Drivers
SELENIUM_BROWSER_BINARY_PATH: str | None = None
SELENIUM_BROWSER_DRIVER_PATH: str
SELENIUM_BROWSER_DRIVER_PATH: str | None = None

# LinkedIn Credentials
LINKEDIN_USERNAME: str | None = None
Expand Down Expand Up @@ -42,14 +42,14 @@ class Config:
COMET_WORKSPACE: str | None = None
COMET_PROJECT: str | None = None

ARN_ROLE: str
ARN_ROLE: str | None = None
HUGGING_FACE_HUB_TOKEN: str

HF_MODEL_ID: str = "test"
GPU_INSTANCE_TYPE: str = "test"
HF_MODEL_ID: str = "crumb/nano-mistral"
GPU_INSTANCE_TYPE: str = "ml.g5.xlarge"
SM_NUM_GPUS: int = 1
MAX_INPUT_LENGTH: int = 20000
MAX_TOTAL_TOKENS: int = 32000
MAX_INPUT_LENGTH: int = 8000
MAX_TOTAL_TOKENS: int = 12000
MAX_BATCH_TOTAL_TOKENS: int = 12000
COPIES: int = 4 # Number of replicas
GPUS: int = 1 # Number of GPUs
Expand Down

0 comments on commit 4adc27d

Please sign in to comment.