{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import os\n",
"import io\n",
"import pandas as pd\n",
"from dotenv import load_dotenv\n",
"from azure.identity import DefaultAzureCredential\n",
"from azure.storage.blob import ContainerClient"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# Load all environment variables\n",
"load_dotenv()\n",
"account_url = os.getenv('AZ_STORAGE_EP')\n",
"container_name = os.getenv('AZ_STORAGE_CONTAINER')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# Authenticate\n",
"default_credential = DefaultAzureCredential()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"For the first time, you might need to authenticate via the Azure CLI\n",
"\n",
"Download it from https://learn.microsoft.com/en-us/cli/azure/install-azure-cli-windows?tabs=azure-cli\n",
"\n",
"Install then run `az login` in your terminal. Once you have logged in with your browser try the `DefaultAzureCredential()` again!"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"# Connect to container\n",
"container_client = ContainerClient(account_url, container_name, default_credential)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"newdir/cats.parquet\n",
"newdir/ronald.jpeg\n"
]
}
],
"source": [
"# List files in container - should be empty\n",
"blob_list = container_client.list_blob_names()\n",
"for blob in blob_list:\n",
" if blob.startswith('newdir'):\n",
" print(blob)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# Upload file to container\n",
"with open(file='data/cats.csv', mode=\"rb\") as data:\n",
" blob_client = container_client.upload_blob(name='newdir/cats.csv', \n",
" data=data, \n",
" overwrite=True)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"newdir/cats.csv\n",
"newdir/cats.parquet\n",
"newdir/ronald.jpeg\n"
]
}
],
"source": [
"# # Check files have uploaded - List files in container again\n",
"blob_list = container_client.list_blobs()\n",
"for blob in blob_list:\n",
" if blob['name'].startswith('newdir'):\n",
" print(blob['name'])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Name | \n",
" Physical_characteristics | \n",
" Behaviour | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Ronald | \n",
" White and ginger | \n",
" Lazy and greedy but undoubtedly cutest and best | \n",
"
\n",
" \n",
" 1 | \n",
" Kaspie | \n",
" Small calico | \n",
" Sweet and very shy but adventurous | \n",
"
\n",
" \n",
" 2 | \n",
" Hennimore | \n",
" Pale orange | \n",
" Unhinged and always in a state of panic | \n",
"
\n",
" \n",
" 3 | \n",
" Thug cat | \n",
" Black and white - very large | \n",
" Local bully | \n",
"
\n",
" \n",
" 4 | \n",
" Son of Stripey | \n",
" Grey tabby | \n",
" Very vocal | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Name Physical_characteristics \\\n",
"0 Ronald White and ginger \n",
"1 Kaspie Small calico \n",
"2 Hennimore Pale orange \n",
"3 Thug cat Black and white - very large \n",
"4 Son of Stripey Grey tabby \n",
"\n",
" Behaviour \n",
"0 Lazy and greedy but undoubtedly cutest and best \n",
"1 Sweet and very shy but adventurous \n",
"2 Unhinged and always in a state of panic \n",
"3 Local bully \n",
"4 Very vocal "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Download file from Azure container to temporary filepath\n",
"\n",
"# Connect to blob\n",
"blob_client = container_client.get_blob_client('newdir/cats.csv')\n",
"\n",
"# Write to local file from blob\n",
"temp_filepath = os.path.join('temp_data', 'cats.csv')\n",
"with open(file=temp_filepath, mode=\"wb\") as sample_blob:\n",
" download_stream = blob_client.download_blob()\n",
" sample_blob.write(download_stream.readall())\n",
"cat_data = pd.read_csv(temp_filepath)\n",
"cat_data.head()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Name | \n",
" Physical_characteristics | \n",
" Behaviour | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Ronald | \n",
" White and ginger | \n",
" Lazy and greedy but undoubtedly cutest and best | \n",
"
\n",
" \n",
" 1 | \n",
" Kaspie | \n",
" Small calico | \n",
" Sweet and very shy but adventurous | \n",
"
\n",
" \n",
" 2 | \n",
" Hennimore | \n",
" Pale orange | \n",
" Unhinged and always in a state of panic | \n",
"
\n",
" \n",
" 3 | \n",
" Thug cat | \n",
" Black and white - very large | \n",
" Local bully | \n",
"
\n",
" \n",
" 4 | \n",
" Son of Stripey | \n",
" Grey tabby | \n",
" Very vocal | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Name Physical_characteristics \\\n",
"0 Ronald White and ginger \n",
"1 Kaspie Small calico \n",
"2 Hennimore Pale orange \n",
"3 Thug cat Black and white - very large \n",
"4 Son of Stripey Grey tabby \n",
"\n",
" Behaviour \n",
"0 Lazy and greedy but undoubtedly cutest and best \n",
"1 Sweet and very shy but adventurous \n",
"2 Unhinged and always in a state of panic \n",
"3 Local bully \n",
"4 Very vocal "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Load directly from Azure - no local copy\n",
"\n",
"download_stream = blob_client.download_blob()\n",
"stream_object = io.BytesIO(download_stream.readall())\n",
"cat_data = pd.read_csv(stream_object)\n",
"cat_data"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"# !!!!!!!!! Delete from Azure container !!!!!!!!!\n",
"blob_client = container_client.get_blob_client('newdir/cats.csv')\n",
"blob_client.delete_blob()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"newdir/cats.parquet\n",
"newdir/ronald.jpeg\n"
]
}
],
"source": [
"blob_list = container_client.list_blobs()\n",
"for blob in blob_list:\n",
" if blob['name'].startswith('newdir'):\n",
" print(blob['name'])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "base",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}