pip install polars[gpu] --extra-index-url=https://pypi.nvidia.com

# Download and unzip files if they do not exist
!if [ ! -f "./uk_pop.zip" ]; then curl "https://data.rapids.ai/teaching-kit/uk_pop.zip" -o ./uk_pop.zip; else echo "Population dataset already downloaded"; fi
!if [ ! -f "./uk_pop.csv" ]; then unzip -d ./ ./uk_pop.zip ; else echo "Population dataset found and ready"; fi

import polars as pl
import time

start_time = time.time()

data = pl.read_csv('./uk_pop.csv')

polars_df = pl.DataFrame(data)
polars_time = time.time() - start_time

print(f"Time Taken: {polars_time:.4f} seconds")

data.head()

start_time = time.time()

#load data
polars_df = pl.read_csv('./uk_pop.csv')

# Filter for ages above 0
filtered_df = polars_df.filter(pl.col('age') > 0.0)

#Sort by name
sorted_df = filtered_df.sort('name', descending=True)

print(sorted_df.head())
polars_time = time.time() - start_time
print(f"Time Taken: {polars_time:.4f} seconds")

import polars as pl
import time

start_time = time.time()

# Create a lazy DataFrame
lazy_df = pl.scan_csv('./uk_pop.csv')

# Define the lazy operations
lazy_result = (
    lazy_df
    .filter(pl.col('age') > 0.0)
    .sort('name', descending=True)
)

# Execute the lazy query and collect the results
result = lazy_result.collect()

print(result.head())
polars_time = time.time() - start_time
print(f"Time Taken: {polars_time:.4f} seconds")

# Show unoptimized Graph
lazy_result.show_graph(optimized=False)

# Show optimized Graph
lazy_result.show_graph(optimized=True)

lazy_df = pl.scan_csv('./uk_pop.csv').collect(engine="gpu")

import polars as pl
import time

gpu_engine = pl.GPUEngine(
    device=0, # This is the default
    raise_on_fail=True, # Fail loudly if we can't run on the GPU.
)

lazy_df = pl.scan_csv('./uk_pop.csv').collect(engine=gpu_engine)

start_time = time.time()

# Create a lazy DataFrame
lazy_df = pl.scan_csv('./uk_pop.csv')

# Define the lazy operations
lazy_result = (
    lazy_df
    .filter(pl.col('age') > 0.0)
    .sort('name', descending=True)
)

# Switch to gpu_engine
result = lazy_result.collect(engine=gpu_engine)

print(result.head())
polars_time = time.time() - start_time
print(f"Time Taken: {polars_time:.4f} seconds")

from polars.testing import assert_frame_equal

# Run on the CPU
result_cpu = lazy_result.collect()

# Run on the GPU
result_gpu = lazy_result.collect(engine="gpu")

# assert both result are equal - Will error if not equal, return None otherwise
if (assert_frame_equal(result_gpu, result_cpu) == None):
    print("The test frames are equal")

result = (
    lazy_df
    .with_columns(pl.col('age').rolling_mean(window_size=7).alias('age_rolling_mean'))
    .filter(pl.col('age') > 0.0)
    .collect(engine=gpu_engine)
)
print(result[::7])

gpu_engine_with_fallback = pl.GPUEngine(
    device=0, # This is the default
    raise_on_fail=False, # Fallback to CPU if we can't run on the GPU (this is the default)
)

result = (
    lazy_df
    .with_columns(pl.col('age').rolling_mean(window_size=7).alias('age_rolling_mean'))
    .filter(pl.col('age') > 0.0)
    .collect(engine=gpu_engine_with_fallback)
)
print(result[::7])

# Create the lazy query with column pruning
lazy_query = (
    lazy_df
    .select(["county", "lat", "long"])  # Column pruning: select only necessary columns
    .group_by("county")
    .agg([
        pl.col("lat").mean().alias("avg_latitude"),
        pl.col("long").mean().alias("avg_longitude")
    ])
    .sort("county")
)

# Execute the query
result = lazy_query.collect()

print("\nAverage latitude and longitude for each county:")
print(result.head())  # Display first few rows

import IPython
app = IPython.Application.instance()
app.kernel.do_shutdown(True)

Lab: Introduction to the Polars GPU Engine¶

Table of Contents¶

Table of Contents¶

Introduction to the Polars Library¶

Creating a DataFrame¶

Running Basic Operations¶

Basic Polars Operations¶

Polars Eager Execution API Reference¶

Exercise #1 - Load Data¶

Exercise #2 - Calculate Average Age of Population¶

Exercise #3 - Group By and Aggregation¶

Exercise #4 - Gender Distribution¶

Lazy Execution¶

Polars Lazy Execution API Reference¶

Execution Graph¶

Exercise #5 - Creating a Lazy Dataframe¶

Exercise #6 - Query Creation¶

Polars GPU engine¶

Accelerate Previous Code¶

Verify Results Across Engines¶

Fallback¶

Exercise #7 - Enable GPU Engine¶

Conclusion¶