Essential Python Snippets for Data Scientists
2 min readJul 25, 2024
In this article, I’d like to introduce my most frequently-used Python code snippets on Jupyter notebook. These snippets can enhance your productivity when working with data and models. Let’s dive in!
Display full output in Jupyter notebook cell
# Display full output
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"
Check current work directory and change it to a new one
# check current directory
!pwd
# set up new working directory
import os
work_directory = "new_dir/new_subdir"
os.chdir(work_directory)
# get a ready-to-use path for reading a file
os.path.join(work_directory, "new_file.txt")
Check the memory usage and clear cache
# Check memory usage
import psutil
psutil.virtual_memory()
# delete variables that are no longer needed
del <variables_not_needed>
# garbage collection to free up the memory
import gc
gc.collect()
Check GPU memory usage
# Check GPU memory usage
from pynvml import *
nvmlInit()
h = nvmlDeviceGetHandleByIndex(0)
info = nvmlDeviceGetMemoryInfo(h)
print(f"total :{info.total}")
print(f"total :{info.free}")
print(f"total :{info.used}")
Check the number of GPUs available
import torch
# Number of GPUs available
torch.cuda.device_count()
Pytorch clear the cache
model = None
gc.collect()
torch.cuda.empty_cache()
Check the package version
import torch
torch.__version__
Load and save models using pickles
import pickle
model_directory = "new_directory"
model_file_name = "model.pkl"
# save a model
with open(f"{model_directory}/{model_file_name}", "wb") as f:
pickle.dump(model, f)
# load a model
with open(f"{model_directory}/{model_file_name}", "rb") as f:
model = pickle.load(f)
Output current time and calculate the time elapsed in a function
# Get current time
from datatime import datetime
datetime.now(timezone.utc)
start_time = datetime.now(timezone.utc)
#
#xxxxx
#
end_time = datetime.now(timezone.utc)
time_elapsed = end_time - start_time
Set up logging
# Set up logging
import logging.handlers
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)
# handlers
consoleHandler = logging.StreamHandler()
fileHandler = logging.handlers.RotatingFileHandler(filename = "LogsProjectXyz.log", maxBytes = 1000000, backupCount=1)
consoleHandler.setLevel(logging.INFO)
fileHandler.setLevel(logging.DEBUG)
# formatting
formatter = logging.Formatter(fmt = '%(filename)s | %(lineno)d | %(funcName)s | %(asctime)s | %(levelname)s: %(message)s',
datefmt='%d-%b-%y %H:%M:%S')
consoleHandler.setFormatter(formatter)
fileHandler.setFormatter(formatter)
# add handelers to the logger
logger.addHandler(consoleHandler)
logger.addHandler(fileHandler)