Essential Python Snippets for Data Scientists

lzhangstat
2 min readJul 25, 2024

--

In this article, I’d like to introduce my most frequently-used Python code snippets on Jupyter notebook. These snippets can enhance your productivity when working with data and models. Let’s dive in!

Display full output in Jupyter notebook cell

# Display full output
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

Check current work directory and change it to a new one

# check current directory
!pwd
# set up new working directory
import os
work_directory = "new_dir/new_subdir"
os.chdir(work_directory)
# get a ready-to-use path for reading a file
os.path.join(work_directory, "new_file.txt")

Check the memory usage and clear cache

# Check memory usage
import psutil
psutil.virtual_memory()
# delete variables that are no longer needed
del <variables_not_needed>
# garbage collection to free up the memory
import gc
gc.collect()

Check GPU memory usage

# Check GPU memory usage
from pynvml import *
nvmlInit()
h = nvmlDeviceGetHandleByIndex(0)
info = nvmlDeviceGetMemoryInfo(h)
print(f"total :{info.total}")
print(f"total :{info.free}")
print(f"total :{info.used}")

Check the number of GPUs available

import torch
# Number of GPUs available
torch.cuda.device_count()

Pytorch clear the cache

model = None
gc.collect()
torch.cuda.empty_cache()

Check the package version

import torch
torch.__version__

Load and save models using pickles

import pickle
model_directory = "new_directory"
model_file_name = "model.pkl"

# save a model
with open(f"{model_directory}/{model_file_name}", "wb") as f:
pickle.dump(model, f)

# load a model
with open(f"{model_directory}/{model_file_name}", "rb") as f:
model = pickle.load(f)

Output current time and calculate the time elapsed in a function

# Get current time
from datatime import datetime
datetime.now(timezone.utc)

start_time = datetime.now(timezone.utc)
#
#xxxxx
#
end_time = datetime.now(timezone.utc)
time_elapsed = end_time - start_time

Set up logging

# Set up logging

import logging.handlers
logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

# handlers
consoleHandler = logging.StreamHandler()
fileHandler = logging.handlers.RotatingFileHandler(filename = "LogsProjectXyz.log", maxBytes = 1000000, backupCount=1)
consoleHandler.setLevel(logging.INFO)
fileHandler.setLevel(logging.DEBUG)

# formatting
formatter = logging.Formatter(fmt = '%(filename)s | %(lineno)d | %(funcName)s | %(asctime)s | %(levelname)s: %(message)s',
datefmt='%d-%b-%y %H:%M:%S')
consoleHandler.setFormatter(formatter)
fileHandler.setFormatter(formatter)

# add handelers to the logger
logger.addHandler(consoleHandler)
logger.addHandler(fileHandler)

--

--

lzhangstat
lzhangstat

Written by lzhangstat

Stat, math, machine learning and much more!

No responses yet