Examples

This section provides real-world examples demonstrating how to use pyBDL for common data analysis tasks.

Basic Data Retrieval

Getting Started

from pybdl import BDL, BDLConfig

# Initialize client
bdl = BDL()

# List available administrative levels
levels = bdl.levels.list_levels()
print("Administrative levels:")
print(levels[['id', 'name']])

# List available years
years = bdl.years.list_years()
print(f"\nAvailable years: {years['id'].min()} - {years['id'].max()}")
Fetching levels: 1 pages [00:00, 15.30 pages/s, items=8]
Administrative levels:
   id                               name
0   0                      Poziom Polski
1   1               Poziom Makroregionów
2   2                  Poziom Województw
3   3                    Poziom Regionów
4   4                 Poziom Podregionów
5   5                    Poziom Powiatów
6   6                        Poziom Gmin
7   7  Poziom miejscowości statystycznej
Fetching years: 1 pages [00:00, 12.90 pages/s, items=31]
Available years: 1995 - 2025

Finding Variables

# Search for population-related variables
population_vars = bdl.variables.search_variables(name="population")
print(f"Found {len(population_vars)} population-related variables")
print(population_vars[['id', 'n1']].head())

# Get details for a specific variable
var_details = bdl.variables.get_variable("3643")
print(f"\nVariable details:")
print(var_details[['id', 'n1', 'n2']])
Fetching search: 18 pages [00:42,  2.37s/ pages, items=1756]
Found 1756 population-related variables
        id                                                 n1
0     9179         concerning self-taxation of the population
1  1365239            total net migration per 1000 population
2   498816  net migration in internal movement per 1000 po...
3   745534           net migration abroad per 1000 population
4   453193                  net migration per 1000 population

Variable details:
     id     n1     n2
0  3643  total  30-39

Retrieving Data

# Get data for a variable at voivodeship level (level 2)
data = bdl.data.get_data_by_variable(
    variable_id="3643",
    years=[2021],
    unit_level=2  # Voivodeship level
)

print(f"Retrieved {len(data)} data points")
print(data[['unit_name', 'year', 'val']].head())
Fetching 3643: 1 pages [00:00, 13.40 pages/s, items=16]
Retrieved 16 data points
            unit_name  year  val
0         MAŁOPOLSKIE  2021    2
1             ŚLĄSKIE  2021    6
2            LUBUSKIE  2021    0
3       WIELKOPOLSKIE  2021    2
4  ZACHODNIOPOMORSKIE  2021    5

data_aggr = bdl.aggregates.list_aggregates()
print(data_aggr)
Fetching aggregates: 1 pages [00:00, 14.59 pages/s, items=8]
   id                                     name  level  \
0   1                                    TOTAL      7   
1   2                             URBAN GMINAS      5   
2   3                       URBAN-RURAL GMINAS      5   
3   4                             RURAL GMINAS      5   
4   7                              URBAN AREAS      5   
5   8                              RURAL AREAS      5   
6  91  NP- Górnośląsko-Zagłębiowska Metropolia      3   
7  92                 NP- Metropolia Krakowska      3   

                                         description  
0  Aggregates for items collected on the level: <...  
1  It is a sum of data for urban gminas (unit typ...  
2  It is a sum of data for urban-rural gminas (un...  
3  It is a sum of these data for rural gminas (un...  
4  It is a sum of  data for urban areas according...  
5  It is a sum of data for rural areas, i.e. rura...  
6                                                NaN  
7                                                NaN  

Population Analysis by Region

Finding Population Variables

# Search for population variables
pop_vars = bdl.variables.search_variables(name="population")

# Filter for total population (usually contains "total" or "ogółem")
total_pop = pop_vars[
    pop_vars['n2'].str.contains('total', case=False, na=False)
]
print(f"Found {len(total_pop)} total population variables")
Fetching search: 18 pages [00:00, 516.38 pages/s, items=1756]
Found 321 total population variables

Getting Regional Population Data

# Get population data for all voivodeships in 2021
pop_data = bdl.data.get_data_by_variable(
    variable_id="3643",  # Example: total population variable
    years=[2021],
    unit_level=2  # Voivodeship level
)

# Sort by population
pop_sorted = pop_data.sort_values('val', ascending=False)
print("Top 5 voivodeships by population:")
print(pop_sorted[['unit_name', 'val']].head())
Fetching 3643: 1 pages [00:00, 216.45 pages/s, items=16]
Top 5 voivodeships by population:
             unit_name  val
15         MAZOWIECKIE   12
1              ŚLĄSKIE    6
4   ZACHODNIOPOMORSKIE    5
8            POMORSKIE    4
10             ŁÓDZKIE    4

Economic Indicator Comparison

Finding Economic Variables

# Search for unemployment variables
unemployment_vars = bdl.variables.search_variables(name="unemployment")
print(f"Found {len(unemployment_vars)} unemployment variables")

# Search for GDP-related variables
gdp_vars = bdl.variables.search_variables(name="GDP")
print(f"Found {len(gdp_vars)} GDP-related variables")
Fetching search: 5 pages [00:00, 424.99 pages/s, items=458]
Found 458 unemployment variables
Fetching search: 1 pages [00:03,  3.34s/ pages, items=11]
Found 11 GDP-related variables

Comparing Voivodeships

# Get unemployment data for all voivodeships
unemployment_data = bdl.data.get_data_by_variable(
    variable_id="1234",  # Example unemployment variable ID
    years=[2021],
    unit_level=2
)

# Sort and display
sorted_unemployment = unemployment_data.sort_values('val', ascending=False)
print("Unemployment by voivodeship (2021):")
print(sorted_unemployment[['unit_name', 'val']].head(10))
Fetching 1234: 1 pages [00:00,  3.31 pages/s, items=16]
Unemployment by voivodeship (2021):
             unit_name     val
15         MAZOWIECKIE  766241
1              ŚLĄSKIE  594875
0          MAŁOPOLSKIE  544790
3        WIELKOPOLSKIE  411015
5         DOLNOŚLĄSKIE  334400
10             ŁÓDZKIE  315051
8            POMORSKIE  297680
12           LUBELSKIE  286248
13        PODKARPACKIE  268623
7   KUJAWSKO-POMORSKIE  199304

Time Series Analysis

Preparing Data for Visualization

import pandas as pd

# Get time series data for a specific variable and region
time_series = bdl.data.get_data_by_variable(
    variable_id="3643",
    unit_level=2,
    unit_parent_id="020000000000"  # Example: Mazovian Voivodeship
)

# Filter to recent years
recent_years = time_series[time_series['year'] >= 2015]

# Sort by year
recent_years = recent_years.sort_values('year')

# Prepare for plotting
plot_data = recent_years[['year', 'val']].set_index('year')
print(plot_data.head())
Fetching 3643: 1 pages [00:00, 17.97 pages/s, items=3]
      val
year     
2015    2
2015    3
2015    3
2016    1
2016    2

Error Handling

Handling Missing Data

# Check if data exists before processing
data = bdl.data.get_data_by_variable("3643", years=[2021], unit_level=2)

if data.empty:
    print("No data available for this variable/year/level combination")
else:
    # Process data
    print(f"Found {len(data)} records")
    print(data.head())
Fetching 3643: 1 pages [00:00, 217.25 pages/s, items=16]
Found 16 records
   year  val      unit_id           unit_name  attr_id
0  2021    2  11200000000         MAŁOPOLSKIE        1
1  2021    6  12400000000             ŚLĄSKIE        1
2  2021    0  20800000000            LUBUSKIE        0
3  2021    2  23000000000       WIELKOPOLSKIE        1
4  2021    5  23200000000  ZACHODNIOPOMORSKIE        1

Best Practices

  1. Use the access layer: Prefer bdl.data over bdl.api.data for DataFrame output

  2. Enable caching: Use use_cache=True for repeated queries

  3. Handle pagination: Use max_pages=None to get complete datasets

  4. Use enrichment: Let the library automatically add human-readable names

  5. Async for bulk operations: Use async methods when fetching multiple datasets

  6. Error handling: Always handle RateLimitError and check for empty results

See also