{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import io\n", "import pandas as pd\n", "from dotenv import load_dotenv\n", "from azure.identity import DefaultAzureCredential\n", "from azure.storage.blob import ContainerClient" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# Load all environment variables\n", "load_dotenv()\n", "account_url = os.getenv('AZ_STORAGE_EP')\n", "container_name = os.getenv('AZ_STORAGE_CONTAINER')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Authenticate\n", "default_credential = DefaultAzureCredential()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "For the first time, you might need to authenticate via the Azure CLI\n", "\n", "Download it from https://learn.microsoft.com/en-us/cli/azure/install-azure-cli-windows?tabs=azure-cli\n", "\n", "Install then run `az login` in your terminal. Once you have logged in with your browser try the `DefaultAzureCredential()` again!" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# Connect to container\n", "container_client = ContainerClient(account_url, container_name, default_credential)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "newdir/cats.parquet\n", "newdir/ronald.jpeg\n" ] } ], "source": [ "# List files in container - should be empty\n", "blob_list = container_client.list_blob_names()\n", "for blob in blob_list:\n", " if blob.startswith('newdir'):\n", " print(blob)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# Upload file to container\n", "with open(file='data/cats.csv', mode=\"rb\") as data:\n", " blob_client = container_client.upload_blob(name='newdir/cats.csv', \n", " data=data, \n", " overwrite=True)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "newdir/cats.csv\n", "newdir/cats.parquet\n", "newdir/ronald.jpeg\n" ] } ], "source": [ "# # Check files have uploaded - List files in container again\n", "blob_list = container_client.list_blobs()\n", "for blob in blob_list:\n", " if blob['name'].startswith('newdir'):\n", " print(blob['name'])" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NamePhysical_characteristicsBehaviour
0RonaldWhite and gingerLazy and greedy but undoubtedly cutest and best
1KaspieSmall calicoSweet and very shy but adventurous
2HennimorePale orangeUnhinged and always in a state of panic
3Thug catBlack and white - very largeLocal bully
4Son of StripeyGrey tabbyVery vocal
\n", "
" ], "text/plain": [ " Name Physical_characteristics \\\n", "0 Ronald White and ginger \n", "1 Kaspie Small calico \n", "2 Hennimore Pale orange \n", "3 Thug cat Black and white - very large \n", "4 Son of Stripey Grey tabby \n", "\n", " Behaviour \n", "0 Lazy and greedy but undoubtedly cutest and best \n", "1 Sweet and very shy but adventurous \n", "2 Unhinged and always in a state of panic \n", "3 Local bully \n", "4 Very vocal " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Download file from Azure container to temporary filepath\n", "\n", "# Connect to blob\n", "blob_client = container_client.get_blob_client('newdir/cats.csv')\n", "\n", "# Write to local file from blob\n", "temp_filepath = os.path.join('temp_data', 'cats.csv')\n", "with open(file=temp_filepath, mode=\"wb\") as sample_blob:\n", " download_stream = blob_client.download_blob()\n", " sample_blob.write(download_stream.readall())\n", "cat_data = pd.read_csv(temp_filepath)\n", "cat_data.head()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NamePhysical_characteristicsBehaviour
0RonaldWhite and gingerLazy and greedy but undoubtedly cutest and best
1KaspieSmall calicoSweet and very shy but adventurous
2HennimorePale orangeUnhinged and always in a state of panic
3Thug catBlack and white - very largeLocal bully
4Son of StripeyGrey tabbyVery vocal
\n", "
" ], "text/plain": [ " Name Physical_characteristics \\\n", "0 Ronald White and ginger \n", "1 Kaspie Small calico \n", "2 Hennimore Pale orange \n", "3 Thug cat Black and white - very large \n", "4 Son of Stripey Grey tabby \n", "\n", " Behaviour \n", "0 Lazy and greedy but undoubtedly cutest and best \n", "1 Sweet and very shy but adventurous \n", "2 Unhinged and always in a state of panic \n", "3 Local bully \n", "4 Very vocal " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Load directly from Azure - no local copy\n", "\n", "download_stream = blob_client.download_blob()\n", "stream_object = io.BytesIO(download_stream.readall())\n", "cat_data = pd.read_csv(stream_object)\n", "cat_data" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "# !!!!!!!!! Delete from Azure container !!!!!!!!!\n", "blob_client = container_client.get_blob_client('newdir/cats.csv')\n", "blob_client.delete_blob()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "newdir/cats.parquet\n", "newdir/ronald.jpeg\n" ] } ], "source": [ "blob_list = container_client.list_blobs()\n", "for blob in blob_list:\n", " if blob['name'].startswith('newdir'):\n", " print(blob['name'])" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.2" } }, "nbformat": 4, "nbformat_minor": 2 }