Categories We Write About

Build a CSV validator tool

Here’s a complete Python-based CSV validator tool that checks the structure and content of CSV files. It validates:

  • File existence and format

  • Header consistency

  • Row length uniformity

  • Optional: Value type checks (e.g., integers, emails)

  • Custom rules (e.g., required columns)

python
import csv import os import re class CSVValidator: def __init__(self, filepath, required_columns=None, column_types=None): self.filepath = filepath self.required_columns = required_columns or [] self.column_types = column_types or {} self.errors = [] def validate(self): if not os.path.isfile(self.filepath): self.errors.append(f"File not found: {self.filepath}") return False if not self.filepath.lower().endswith('.csv'): self.errors.append("Invalid file format. Only CSV files are allowed.") return False try: with open(self.filepath, newline='', encoding='utf-8') as csvfile: reader = csv.reader(csvfile) headers = next(reader, None) if headers is None: self.errors.append("CSV file is empty.") return False self._validate_headers(headers) for i, row in enumerate(reader, start=2): # Start from 2 to account for header self._validate_row_length(headers, row, i) self._validate_column_types(headers, row, i) except Exception as e: self.errors.append(f"Error reading CSV file: {e}") return False return len(self.errors) == 0 def _validate_headers(self, headers): missing = [col for col in self.required_columns if col not in headers] if missing: self.errors.append(f"Missing required columns: {', '.join(missing)}") def _validate_row_length(self, headers, row, line_num): if len(row) != len(headers): self.errors.append(f"Line {line_num}: Expected {len(headers)} values, found {len(row)}.") def _validate_column_types(self, headers, row, line_num): for i, header in enumerate(headers): expected_type = self.column_types.get(header) if not expected_type: continue value = row[i].strip() if expected_type == "int": if not value.isdigit(): self.errors.append(f"Line {line_num}, column '{header}': Expected integer, got '{value}'.") elif expected_type == "email": if not re.match(r"[^@]+@[^@]+.[^@]+", value): self.errors.append(f"Line {line_num}, column '{header}': Invalid email '{value}'.") def get_errors(self): return self.errors # Example Usage if __name__ == "__main__": filepath = "example.csv" required_columns = ["Name", "Email", "Age"] column_types = { "Email": "email", "Age": "int" } validator = CSVValidator(filepath, required_columns, column_types) if validator.validate(): print("CSV is valid.") else: print("CSV validation failed with the following errors:") for error in validator.get_errors(): print(" -", error)

Features to Extend Later:

  • Configurable delimiter

  • Date/time format validation

  • Support for quoting and escaping rules

  • Web-based frontend (e.g., Flask or Streamlit)

Let me know if you want this turned into a GUI tool or web app.

Share This Page:

Enter your email below to join The Palos Publishing Company Email List

We respect your email privacy

Comments

Leave a Reply

Your email address will not be published. Required fields are marked *

Categories We Write About