92 lines
2.7 KiB
Python
92 lines
2.7 KiB
Python
|
|
#!/usr/bin/env python3
|
||
|
|
"""
|
||
|
|
Extract the structural schema of a compliance xlsx file as JSON.
|
||
|
|
Usage: python3 extract_xlsx_schema.py <path_to_xlsx>
|
||
|
|
|
||
|
|
Output:
|
||
|
|
{
|
||
|
|
"sheets": [
|
||
|
|
{
|
||
|
|
"name": "Summary",
|
||
|
|
"columns": ["Metric", "Non-Compliant", "..."],
|
||
|
|
"metric_values": ["2.3.4i", "5.2.4", "..."]
|
||
|
|
},
|
||
|
|
{
|
||
|
|
"name": "2.3.4i",
|
||
|
|
"columns": ["Preferred - Hostname", "GRANITE - IPv4_Address", "..."]
|
||
|
|
}
|
||
|
|
]
|
||
|
|
}
|
||
|
|
|
||
|
|
- Uses openpyxl in read-only mode.
|
||
|
|
- Extracts sheet names, first-row column headers per sheet, and unique metric
|
||
|
|
values from the Summary sheet (header at row 4, data from row 5 onward).
|
||
|
|
- On error, returns { "error": "..." } on stdout and exits with non-zero code.
|
||
|
|
|
||
|
|
Dependencies: openpyxl (already in requirements.txt)
|
||
|
|
"""
|
||
|
|
import sys
|
||
|
|
import json
|
||
|
|
from openpyxl import load_workbook
|
||
|
|
|
||
|
|
|
||
|
|
def main():
|
||
|
|
if len(sys.argv) < 2:
|
||
|
|
print(json.dumps({"error": "No file path provided"}))
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
filepath = sys.argv[1]
|
||
|
|
|
||
|
|
try:
|
||
|
|
wb = load_workbook(filepath, read_only=True, data_only=True)
|
||
|
|
except Exception as e:
|
||
|
|
print(json.dumps({"error": f"Cannot open file: {str(e)}"}))
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
if not wb.sheetnames:
|
||
|
|
print(json.dumps({"error": "Workbook contains no sheets"}))
|
||
|
|
wb.close()
|
||
|
|
sys.exit(1)
|
||
|
|
|
||
|
|
sheets = []
|
||
|
|
for sheet_name in wb.sheetnames:
|
||
|
|
ws = wb[sheet_name]
|
||
|
|
|
||
|
|
# Extract first-row column headers
|
||
|
|
rows = list(ws.iter_rows(max_row=1, values_only=True))
|
||
|
|
columns = [str(c).strip() for c in rows[0] if c is not None] if rows else []
|
||
|
|
|
||
|
|
entry = {
|
||
|
|
"name": sheet_name,
|
||
|
|
"columns": columns,
|
||
|
|
}
|
||
|
|
|
||
|
|
# Extract metric values from the Summary sheet
|
||
|
|
# Summary has header at row 4, data from row 5 onward
|
||
|
|
if sheet_name == "Summary":
|
||
|
|
metric_values = []
|
||
|
|
header_rows = list(ws.iter_rows(min_row=4, max_row=4, values_only=True))
|
||
|
|
if header_rows:
|
||
|
|
summary_cols = [str(c).strip() if c else "" for c in header_rows[0]]
|
||
|
|
metric_idx = None
|
||
|
|
for i, col in enumerate(summary_cols):
|
||
|
|
if col == "Metric":
|
||
|
|
metric_idx = i
|
||
|
|
break
|
||
|
|
if metric_idx is not None:
|
||
|
|
for row in ws.iter_rows(min_row=5, values_only=True):
|
||
|
|
if row[metric_idx] is not None:
|
||
|
|
val = str(row[metric_idx]).strip()
|
||
|
|
if val and val != "Metric":
|
||
|
|
metric_values.append(val)
|
||
|
|
entry["metric_values"] = sorted(set(metric_values))
|
||
|
|
|
||
|
|
sheets.append(entry)
|
||
|
|
|
||
|
|
wb.close()
|
||
|
|
print(json.dumps({"sheets": sheets}))
|
||
|
|
|
||
|
|
|
||
|
|
if __name__ == "__main__":
|
||
|
|
main()
|