Examples¶
This section provides real-world examples demonstrating how to use pyBDL for common data analysis tasks.
Basic Data Retrieval¶
Getting Started¶
from pybdl import BDL, BDLConfig
# Initialize client
bdl = BDL()
# List available administrative levels
levels = bdl.levels.list_levels()
print("Administrative levels:")
print(levels[['id', 'name']])
# List available years
years = bdl.years.list_years()
print(f"\nAvailable years: {years['id'].min()} - {years['id'].max()}")
Fetching levels: 1 pages [00:00, 15.30 pages/s, items=8]
Administrative levels:
id name
0 0 Poziom Polski
1 1 Poziom Makroregionów
2 2 Poziom Województw
3 3 Poziom Regionów
4 4 Poziom Podregionów
5 5 Poziom Powiatów
6 6 Poziom Gmin
7 7 Poziom miejscowości statystycznej
Fetching years: 1 pages [00:00, 12.90 pages/s, items=31]
Available years: 1995 - 2025
Finding Variables¶
# Search for population-related variables
population_vars = bdl.variables.search_variables(name="population")
print(f"Found {len(population_vars)} population-related variables")
print(population_vars[['id', 'n1']].head())
# Get details for a specific variable
var_details = bdl.variables.get_variable("3643")
print(f"\nVariable details:")
print(var_details[['id', 'n1', 'n2']])
Fetching search: 18 pages [00:42, 2.37s/ pages, items=1756]
Found 1756 population-related variables
id n1
0 9179 concerning self-taxation of the population
1 1365239 total net migration per 1000 population
2 498816 net migration in internal movement per 1000 po...
3 745534 net migration abroad per 1000 population
4 453193 net migration per 1000 population
Variable details:
id n1 n2
0 3643 total 30-39
Retrieving Data¶
# Get data for a variable at voivodeship level (level 2)
data = bdl.data.get_data_by_variable(
variable_id="3643",
years=[2021],
unit_level=2 # Voivodeship level
)
print(f"Retrieved {len(data)} data points")
print(data[['unit_name', 'year', 'val']].head())
Fetching 3643: 1 pages [00:00, 13.40 pages/s, items=16]
Retrieved 16 data points
unit_name year val
0 MAŁOPOLSKIE 2021 2
1 ŚLĄSKIE 2021 6
2 LUBUSKIE 2021 0
3 WIELKOPOLSKIE 2021 2
4 ZACHODNIOPOMORSKIE 2021 5
data_aggr = bdl.aggregates.list_aggregates()
print(data_aggr)
Fetching aggregates: 1 pages [00:00, 14.59 pages/s, items=8]
id name level \
0 1 TOTAL 7
1 2 URBAN GMINAS 5
2 3 URBAN-RURAL GMINAS 5
3 4 RURAL GMINAS 5
4 7 URBAN AREAS 5
5 8 RURAL AREAS 5
6 91 NP- Górnośląsko-Zagłębiowska Metropolia 3
7 92 NP- Metropolia Krakowska 3
description
0 Aggregates for items collected on the level: <...
1 It is a sum of data for urban gminas (unit typ...
2 It is a sum of data for urban-rural gminas (un...
3 It is a sum of these data for rural gminas (un...
4 It is a sum of data for urban areas according...
5 It is a sum of data for rural areas, i.e. rura...
6 NaN
7 NaN
Population Analysis by Region¶
Finding Population Variables¶
# Search for population variables
pop_vars = bdl.variables.search_variables(name="population")
# Filter for total population (usually contains "total" or "ogółem")
total_pop = pop_vars[
pop_vars['n2'].str.contains('total', case=False, na=False)
]
print(f"Found {len(total_pop)} total population variables")
Fetching search: 18 pages [00:00, 516.38 pages/s, items=1756]
Found 321 total population variables
Getting Regional Population Data¶
# Get population data for all voivodeships in 2021
pop_data = bdl.data.get_data_by_variable(
variable_id="3643", # Example: total population variable
years=[2021],
unit_level=2 # Voivodeship level
)
# Sort by population
pop_sorted = pop_data.sort_values('val', ascending=False)
print("Top 5 voivodeships by population:")
print(pop_sorted[['unit_name', 'val']].head())
Fetching 3643: 1 pages [00:00, 216.45 pages/s, items=16]
Top 5 voivodeships by population:
unit_name val
15 MAZOWIECKIE 12
1 ŚLĄSKIE 6
4 ZACHODNIOPOMORSKIE 5
8 POMORSKIE 4
10 ŁÓDZKIE 4
Economic Indicator Comparison¶
Finding Economic Variables¶
# Search for unemployment variables
unemployment_vars = bdl.variables.search_variables(name="unemployment")
print(f"Found {len(unemployment_vars)} unemployment variables")
# Search for GDP-related variables
gdp_vars = bdl.variables.search_variables(name="GDP")
print(f"Found {len(gdp_vars)} GDP-related variables")
Fetching search: 5 pages [00:00, 424.99 pages/s, items=458]
Found 458 unemployment variables
Fetching search: 1 pages [00:03, 3.34s/ pages, items=11]
Found 11 GDP-related variables
Comparing Voivodeships¶
# Get unemployment data for all voivodeships
unemployment_data = bdl.data.get_data_by_variable(
variable_id="1234", # Example unemployment variable ID
years=[2021],
unit_level=2
)
# Sort and display
sorted_unemployment = unemployment_data.sort_values('val', ascending=False)
print("Unemployment by voivodeship (2021):")
print(sorted_unemployment[['unit_name', 'val']].head(10))
Fetching 1234: 1 pages [00:00, 3.31 pages/s, items=16]
Unemployment by voivodeship (2021):
unit_name val
15 MAZOWIECKIE 766241
1 ŚLĄSKIE 594875
0 MAŁOPOLSKIE 544790
3 WIELKOPOLSKIE 411015
5 DOLNOŚLĄSKIE 334400
10 ŁÓDZKIE 315051
8 POMORSKIE 297680
12 LUBELSKIE 286248
13 PODKARPACKIE 268623
7 KUJAWSKO-POMORSKIE 199304
Time Series Analysis¶
Preparing Data for Visualization¶
import pandas as pd
# Get time series data for a specific variable and region
time_series = bdl.data.get_data_by_variable(
variable_id="3643",
unit_level=2,
unit_parent_id="020000000000" # Example: Mazovian Voivodeship
)
# Filter to recent years
recent_years = time_series[time_series['year'] >= 2015]
# Sort by year
recent_years = recent_years.sort_values('year')
# Prepare for plotting
plot_data = recent_years[['year', 'val']].set_index('year')
print(plot_data.head())
Fetching 3643: 1 pages [00:00, 17.97 pages/s, items=3]
val
year
2015 2
2015 3
2015 3
2016 1
2016 2
Error Handling¶
Handling Missing Data¶
# Check if data exists before processing
data = bdl.data.get_data_by_variable("3643", years=[2021], unit_level=2)
if data.empty:
print("No data available for this variable/year/level combination")
else:
# Process data
print(f"Found {len(data)} records")
print(data.head())
Fetching 3643: 1 pages [00:00, 217.25 pages/s, items=16]
Found 16 records
year val unit_id unit_name attr_id
0 2021 2 11200000000 MAŁOPOLSKIE 1
1 2021 6 12400000000 ŚLĄSKIE 1
2 2021 0 20800000000 LUBUSKIE 0
3 2021 2 23000000000 WIELKOPOLSKIE 1
4 2021 5 23200000000 ZACHODNIOPOMORSKIE 1
Best Practices¶
Use the access layer: Prefer
bdl.dataoverbdl.api.datafor DataFrame outputEnable caching: Use
use_cache=Truefor repeated queriesHandle pagination: Use
max_pages=Noneto get complete datasetsUse enrichment: Let the library automatically add human-readable names
Async for bulk operations: Use async methods when fetching multiple datasets
Error handling: Always handle
RateLimitErrorand check for empty results
See also
Access Layer for access layer documentation
Rate Limiting for rate limiting details
Configuration for configuration options