openpyxl을 활용한 동적 Excel 보고서 생성

개요

Python의 openpyxl 라이브러리를 활용하여 계층적 구조의 데이터를 동적으로 Excel 파일로 변환하는 방법을 살펴봅니다. 특히 1차 필드와 2차 필드가 혼재된 복잡한 데이터 구조를 자동으로 처리하는 솔루션을 구현합니다.

핵심 요구사항 분석

필드 삽입 위치를 하드코딩하지 않고 데이터 기반으로 자동 배치
1차 필드와 2차 필드 간의 계층 관계 유지 (셀 병합 및 열 폭 조정)
순번 필드는 데이터가 차지하는 행 수에 따라 자동 병합
전체 셀 가운데 정렬 적용

구현 코드

핵심 유틸리티 모듈

from openpyxl import Workbook, load_workbook
from openpyxl.styles import Alignment
from loguru import logger


def create_workbook():
    """신규 워크북 생성"""
    return Workbook()


def add_sheet(workbook, sheet_title):
    """워크북에 시트 추가"""
    return workbook.create_sheet(title=sheet_title)


def open_existing_workbook(file_path):
    """기존 파일 로드"""
    return load_workbook(file_path)


def fetch_sheet_by_name(workbook, sheet_title):
    """시트명으로 특정 시트 조회"""
    return workbook[sheet_title]


def persist_workbook(workbook, output_path):
    """파일로 저장"""
    workbook.save(output_path)


def combine_cell_range(worksheet, top_row, left_col, bottom_row, right_col):
    """지정 범위 셀 병합"""
    worksheet.merge_cells(
        start_row=top_row,
        start_column=left_col,
        end_row=bottom_row,
        end_column=right_col
    )


def obtain_row_boundary(worksheet):
    """데이터가 존재하는 마지막 행 번호 반환"""
    return worksheet.max_row


def obtain_column_boundary(worksheet):
    """데이터가 존재하는 마지막 열 번호 반환"""
    return worksheet.max_column


def insert_row_data(worksheet, record):
    """리스트 데이터를 행 단위로 추가"""
    worksheet.append(record)


def apply_center_alignment(worksheet):
    """전체 데이터 범위 가운데 정렬 적용"""
    last_row = obtain_row_boundary(worksheet)
    last_col = obtain_column_boundary(worksheet)
    
    logger.info(f"처리 범위: {last_row}행 × {last_col}열")
    
    center_style = Alignment(horizontal='center', vertical='center')
    
    for row_cells in worksheet.iter_rows(
        min_row=1, max_row=last_row,
        min_col=1, max_col=last_col
    ):
        for cell in row_cells:
            cell.alignment = center_style

계층 구조 분석 및 필드 추출

def analyze_hierarchical_structure(source_record):
    """
    중첩된 딕셔너리에서 1차/2차 필드 분리 추출
    
    Args:
        source_record: {'상품명': '값', '상세': [{'속성1': '값1'}, ...]}
    
    Returns:
        {
            'primary_keys': ['상품명', '상세', ...],
            'nested_keys': {'상세': ['속성1', '속성2'], ...}
        }
    """
    result = {
        'primary_keys': [],
        'nested_keys': {}
    }
    
    if not source_record:
        return result
    
    for field_name, field_value in source_record.items():
        if isinstance(field_value, list) and field_value:
            # 2차 필드 존재: 첫 항목의 키들을 하위 필드로 등록
            result['nested_keys'][field_name] = list(field_value[0].keys())
        
        result['primary_keys'].append(field_name)
    
    return result

헤더 자동 생성 및 병합

def render_header_section(worksheet, primary_fields, sub_fields_map, offset_col=1):
    """
    1차/2차 필드를 2단계 헤더로 구성
    
    Args:
        primary_fields: 1차 필드명 목록
        sub_fields_map: {1차필드명: [2차필드명 목록]} 형태
        offset_col: 삽입 시작 열 (기본값 1)
    """
    current_col = offset_col
    
    for field in primary_fields:
        # 1차 필드명을 1행에 기록
        worksheet.cell(row=1, column=current_col, value=field)
        
        if field not in sub_fields_map:
            # 단순 필드: 1열 차지
            current_col += 1
        else:
            # 복합 필드: 2차 필드 수만큼 열 확장 및 병합
            sub_items = sub_fields_map[field]
            if not sub_items:
                current_col += 1
                continue
            
            span_count = len(sub_items)
            # 1차 필드명이 2차 필드 전체를 덮도록 병합
            combine_cell_range(
                worksheet,
                top_row=1, left_col=current_col,
                bottom_row=1, right_col=current_col + span_count - 1
            )
            
            # 2차 필드명을 2행에 순차 기록
            sub_col = current_col
            for sub_name in sub_items:
                worksheet.cell(row=2, column=sub_col, value=sub_name)
                sub_col += 1
            
            current_col += span_count

데이터 본문 자동 기록

def populate_data_rows(worksheet, record_data, start_row, start_col=1):
    """
    단일 레코드를 Excel에 기록 (2차 필드는 여러 행으로 확장)
    
    Args:
        record_data: {'필드명': 단순값 또는 [{...}, {...}]}
        start_row: 기록 시작 행 번호
        start_col: 기록 시작 열 번호
    """
    write_col = start_col
    
    for field_key, field_val in record_data.items():
        
        if not isinstance(field_val, list):
            # 단순 값: 현재 행에 직접 기록
            worksheet.cell(row=start_row, column=write_col, value=field_val)
            write_col += 1
            
        else:
            # 목록 값: 각 항목을 별도 행으로 분리 기록
            nested_row = start_row
            
            for item in field_val:
                nested_col = write_col
                cell_values = list(item.values())
                
                for cell_val in cell_values:
                    worksheet.cell(row=nested_row, column=nested_col, value=cell_val)
                    nested_col += 1
                
                nested_row += 1
            
            # 다음 필드를 위한 열 위치 갱신
            write_col = nested_col
    
    # 순번 필드(1열) 병합: 데이터가 차지한 전체 행 범위
    final_row = obtain_row_boundary(worksheet)
    if final_row > start_row:
        combine_cell_range(
            worksheet,
            top_row=start_row, left_col=1,
            bottom_row=final_row, right_col=1
        )

실행 예시

def execute_report_generation(extra_columns):
    """
    전체 프로세스 실행
    
    Args:
        extra_columns: 데이터 외 추가 헤더 (예: ['순번'])
    """
    extra_count = len(extra_columns)
    
    # 샘플 데이터: 상품 정보와 중첩된 상세 속성
    inventory_data = [
        {
            '상품명': '유기농 사과',
            '식별코드': 'P001',
            '입고이력': [
                {'창고위치': 'A구역', '입고일': '2024-01-15'},
                {'창고위치': 'B구역', '입고일': '2024-01-20'}
            ],
            '품질정보': [
                {'등급': '특상', '중량': '500g', '단가': 3500},
                {'등급': '상', '중량': '450g', '단가': 2800},
                {'등급': '보통', '중량': '400g', '단가': 2200}
            ]
        },
        {
            '상품명': '무농약 배',
            '식별코드': 'P002',
            '입고이력': [
                {'창고위치': 'C구역', '입고일': '2024-02-01'}
            ],
            '품질정보': [
                {'등급': '특상', '중량': '600g', '단가': 4200},
                {'등급': '상', '중량': '550g', '단가': 3600}
            ]
        }
    ]
    
    # 워크북 초기화
    wb = create_workbook()
    ws = add_sheet(wb, '재고현황')
    
    # 사용자 정의 필드 헤더 기록
    insert_row_data(ws, extra_columns)
    
    # 데이터 구조 분석 및 헤더 생성
    structure = analyze_hierarchical_structure(inventory_data[0])
    primary = structure['primary_keys']
    nested = structure['nested_keys']
    
    logger.info(f"1차 필드: {primary}")
    logger.info(f"2차 필드 맵: {nested}")
    
    render_header_section(ws, primary, nested, offset_col=extra_count + 1)
    
    # 데이터 본문 기록
    for seq, entry in enumerate(inventory_data, start=1):
        next_row = obtain_row_boundary(ws) + 1
        
        # 순번 값 삽입
        ws.cell(row=next_row, column=1, value=seq)
        
        populate_data_rows(ws, entry, next_row, start_col=extra_count + 1)
    
    # 서식 적용 및 저장
    apply_center_alignment(ws)
    persist_workbook(wb, 'inventory_report.xlsx')


# 실행
execute_report_generation(['순번'])

동작 원리 요약

위 구현은 데이터의 형태를 먼저 분석하여 동적으로 열 구조를 결정합니다. 2차 필드가 존재하는 경우 자동으로 병합 범위를 계산하고, 실제 데이터 행 수에 따라 순번 셀의 병합 영역을 조정합니다. 이를 통해 데이터 스키마가 변경되어도 코드 수정 없이 유연하게 대응할 수 있습니다.

태그: openpyxl Excel python 자동화 데이터처리

6월 8일 18:50에 게시됨

괴물 클럽