Sync all local changes for remote dev server migration
This commit is contained in:
84
backend/scripts/dump_xlsx_schema.py
Normal file
84
backend/scripts/dump_xlsx_schema.py
Normal file
@@ -0,0 +1,84 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
Dump the structural schema of a compliance xlsx file as JSON.
|
||||
Usage: python3 dump_xlsx_schema.py <path_to_xlsx>
|
||||
|
||||
Output:
|
||||
{
|
||||
"sheets": [
|
||||
{
|
||||
"name": "SheetName",
|
||||
"columns": ["Col A", "Col B", ...],
|
||||
"row_count": 150,
|
||||
"metric_values": ["2.3.4i", "5.2.4", ...] // only if a Metric column exists
|
||||
},
|
||||
...
|
||||
]
|
||||
}
|
||||
|
||||
Dependencies: openpyxl (already in requirements.txt)
|
||||
"""
|
||||
import sys
|
||||
import json
|
||||
from openpyxl import load_workbook
|
||||
|
||||
|
||||
def main():
|
||||
if len(sys.argv) < 2:
|
||||
print(json.dumps({'error': 'No file path provided'}))
|
||||
sys.exit(1)
|
||||
|
||||
filepath = sys.argv[1]
|
||||
|
||||
try:
|
||||
wb = load_workbook(filepath, read_only=True, data_only=True)
|
||||
except Exception as e:
|
||||
print(json.dumps({'error': f'Cannot open file: {str(e)}'}))
|
||||
sys.exit(1)
|
||||
|
||||
sheets = []
|
||||
for sheet_name in wb.sheetnames:
|
||||
ws = wb[sheet_name]
|
||||
rows = list(ws.iter_rows(max_row=1, values_only=True))
|
||||
columns = [str(c).strip() for c in rows[0] if c is not None] if rows else []
|
||||
|
||||
# Count data rows (excluding header)
|
||||
row_count = 0
|
||||
for _ in ws.iter_rows(min_row=2, values_only=True):
|
||||
row_count += 1
|
||||
|
||||
# Extract metric values if a Metric column exists in the Summary sheet
|
||||
metric_values = []
|
||||
if sheet_name == 'Summary':
|
||||
# Summary has header at row 4 (0-indexed row 3), read from row 5 onward
|
||||
header_rows = list(ws.iter_rows(min_row=4, max_row=4, values_only=True))
|
||||
if header_rows:
|
||||
summary_cols = [str(c).strip() if c else '' for c in header_rows[0]]
|
||||
metric_idx = None
|
||||
for i, col in enumerate(summary_cols):
|
||||
if col == 'Metric':
|
||||
metric_idx = i
|
||||
break
|
||||
if metric_idx is not None:
|
||||
for row in ws.iter_rows(min_row=5, values_only=True):
|
||||
if row[metric_idx] is not None:
|
||||
val = str(row[metric_idx]).strip()
|
||||
if val and val != 'Metric':
|
||||
metric_values.append(val)
|
||||
|
||||
entry = {
|
||||
'name': sheet_name,
|
||||
'columns': columns,
|
||||
'row_count': row_count,
|
||||
}
|
||||
if metric_values:
|
||||
entry['metric_values'] = sorted(set(metric_values))
|
||||
|
||||
sheets.append(entry)
|
||||
|
||||
wb.close()
|
||||
print(json.dumps({'sheets': sheets}, indent=2))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
Reference in New Issue
Block a user