CSV File Validator Tool

<#
.SYNOPSIS
CSV File Validator Tool

.DESCRIPTION
This script provides a tool to validate CSV files, check for structural issues,
analyze data types, and provide detailed information about the CSV content.

.NOTES
File Name      : CSVValidator.ps1
Author         : [Your Name]
Prerequisite   : PowerShell V5.1 or later
Version        : 1.0
Date           : [Current Date]

.EXAMPLE
.\CSVValidator.ps1
#>

function Show-Menu {
    Clear-Host
    Write-Host "=== CSV File Validator Tool ===" -ForegroundColor Cyan
    Write-Host "1. Validate CSV File"
    Write-Host "2. Analyze CSV Structure"
    Write-Host "3. Check for Data Inconsistencies"
    Write-Host "4. Generate CSV Summary"
    Write-Host "5. Compare Two CSV Files"
    Write-Host "6. Exit"
}

function Validate-CSVFile {
    $filePath = Read-Host "Enter the path to the CSV file"
    if (-not (Test-Path $filePath)) {
        Write-Host "File not found." -ForegroundColor Red
        return
    }

    try {
        $csv = Import-Csv $filePath
        $rowCount = $csv.Count
        $columnCount = ($csv | Get-Member -MemberType NoteProperty).Count

        Write-Host "CSV is valid." -ForegroundColor Green
        Write-Host "Number of rows: $rowCount"
        Write-Host "Number of columns: $columnCount"

        $headers = ($csv | Get-Member -MemberType NoteProperty).Name
        Write-Host "Headers: $($headers -join ', ')"

        $size = (Get-Item $filePath).Length
        Write-Host "File Size: $size bytes"
    }
    catch {
        Write-Host "Invalid CSV. Error details:" -ForegroundColor Red
        Write-Host $_.Exception.Message
    }
}

function Analyze-CSVStructure {
    $filePath = Read-Host "Enter the path to the CSV file"
    if (-not (Test-Path $filePath)) {
        Write-Host "File not found." -ForegroundColor Red
        return
    }

    try {
        $csv = Import-Csv $filePath
        $headers = ($csv | Get-Member -MemberType NoteProperty).Name

        Write-Host "CSV Structure Analysis:" -ForegroundColor Yellow
        foreach ($header in $headers) {
            $values = $csv.$header | Where-Object { $_ -ne "" }
            $dataType = Guess-DataType $values
            $uniqueValues = ($values | Select-Object -Unique).Count
            $nullCount = ($csv.$header | Where-Object { $_ -eq "" }).Count

            Write-Host "`nColumn: $header"
            Write-Host "  Data Type: $dataType"
            Write-Host "  Unique Values: $uniqueValues"
            Write-Host "  Null/Empty Values: $nullCount"
            Write-Host "  Sample Values: $($values | Select-Object -First 5 -Unique)"
        }
    }
    catch {
        Write-Host "Error analyzing CSV structure: $_" -ForegroundColor Red
    }
}

function Guess-DataType($values) {
    $sampleSize = [Math]::Min($values.Count, 100)
    $sample = $values | Select-Object -First $sampleSize

    $isNumeric = $sample | ForEach-Object { $_ -as [double] } | Where-Object { $_ -ne $null }
    $isDate = $sample | ForEach-Object { $_ -as [datetime] } | Where-Object { $_ -ne $null }

    if ($isNumeric.Count -eq $sampleSize) { return "Numeric" }
    elseif ($isDate.Count -eq $sampleSize) { return "Date" }
    else { return "String" }
}

function Check-DataInconsistencies {
    $filePath = Read-Host "Enter the path to the CSV file"
    if (-not (Test-Path $filePath)) {
        Write-Host "File not found." -ForegroundColor Red
        return
    }

    try {
        $csv = Import-Csv $filePath
        $headers = ($csv | Get-Member -MemberType NoteProperty).Name

        Write-Host "Checking for Data Inconsistencies:" -ForegroundColor Yellow
        foreach ($header in $headers) {
            $values = $csv.$header
            $dataType = Guess-DataType $values

            $inconsistencies = @()
            switch ($dataType) {
                "Numeric" {
                    $inconsistencies = $values | Where-Object { $_ -ne "" -and ($_ -as [double]) -eq $null }
                }
                "Date" {
                    $inconsistencies = $values | Where-Object { $_ -ne "" -and ($_ -as [datetime]) -eq $null }
                }
            }

            if ($inconsistencies.Count -gt 0) {
                Write-Host "`nInconsistencies found in column '$header' (Expected $dataType):"
                Write-Host ($inconsistencies | Select-Object -First 5) -ForegroundColor Red
                if ($inconsistencies.Count -gt 5) {
                    Write-Host "... and $($inconsistencies.Count - 5) more."
                }
            }
        }
    }
    catch {
        Write-Host "Error checking data inconsistencies: $_" -ForegroundColor Red
    }
}

function Generate-CSVSummary {
    $filePath = Read-Host "Enter the path to the CSV file"
    if (-not (Test-Path $filePath)) {
        Write-Host "File not found." -ForegroundColor Red
        return
    }

    try {
        $csv = Import-Csv $filePath
        $rowCount = $csv.Count
        $headers = ($csv | Get-Member -MemberType NoteProperty).Name

        Write-Host "CSV Summary:" -ForegroundColor Yellow
        Write-Host "File: $filePath"
        Write-Host "Total Rows: $rowCount"
        Write-Host "Total Columns: $($headers.Count)"
        Write-Host "`nColumn Summary:"

        foreach ($header in $headers) {
            $values = $csv.$header
            $nonEmptyValues = $values | Where-Object { $_ -ne "" }
            $uniqueValues = ($nonEmptyValues | Select-Object -Unique).Count
            $nullCount = ($values | Where-Object { $_ -eq "" }).Count
            $dataType = Guess-DataType $nonEmptyValues

            Write-Host "`n$header:"
            Write-Host "  Data Type: $dataType"
            Write-Host "  Unique Values: $uniqueValues"
            Write-Host "  Null/Empty Count: $nullCount"
            Write-Host "  Fill Rate: $([math]::Round(($rowCount - $nullCount) / $rowCount * 100, 2))%"

            if ($dataType -eq "Numeric") {
                $numericValues = $nonEmptyValues | ForEach-Object { $_ -as [double] }
                $min = ($numericValues | Measure-Object -Minimum).Minimum
                $max = ($numericValues | Measure-Object -Maximum).Maximum
                $avg = ($numericValues | Measure-Object -Average).Average
                Write-Host "  Min: $min"
                Write-Host "  Max: $max"
                Write-Host "  Average: $([math]::Round($avg, 2))"
            }
        }
    }
    catch {
        Write-Host "Error generating CSV summary: $_" -ForegroundColor Red
    }
}

function Compare-CSVFiles {
    $filePath1 = Read-Host "Enter the path to the first CSV file"
    $filePath2 = Read-Host "Enter the path to the second CSV file"

    if (-not (Test-Path $filePath1) -or -not (Test-Path $filePath2)) {
        Write-Host "One or both files not found." -ForegroundColor Red
        return
    }

    try {
        $csv1 = Import-Csv $filePath1
        $csv2 = Import-Csv $filePath2

        $headers1 = ($csv1 | Get-Member -MemberType NoteProperty).Name
        $headers2 = ($csv2 | Get-Member -MemberType NoteProperty).Name

        Write-Host "Comparing CSV Files:" -ForegroundColor Yellow
        Write-Host "File 1: $filePath1"
        Write-Host "File 2: $filePath2"

        Write-Host "`nColumn Comparison:"
        $commonHeaders = $headers1 | Where-Object { $headers2 -contains $_ }
        $uniqueHeaders1 = $headers1 | Where-Object { $headers2 -notcontains $_ }
        $uniqueHeaders2 = $headers2 | Where-Object { $headers1 -notcontains $_ }

        Write-Host "Common Columns: $($commonHeaders -join ', ')"
        Write-Host "Columns only in File 1: $($uniqueHeaders1 -join ', ')"
        Write-Host "Columns only in File 2: $($uniqueHeaders2 -join ', ')"

        Write-Host "`nRow Count Comparison:"
        Write-Host "File 1 Row Count: $($csv1.Count)"
        Write-Host "File 2 Row Count: $($csv2.Count)"

        Write-Host "`nData Comparison for Common Columns:"
        foreach ($header in $commonHeaders) {
            $diff = Compare-Object $csv1.$header $csv2.$header
            if ($diff) {
                Write-Host "Differences found in column '$header':"
                Write-Host "  Values only in File 1: $($diff | Where-Object { $_.SideIndicator -eq '<=' } | Select-Object -ExpandProperty InputObject)"
                Write-Host "  Values only in File 2: $($diff | Where-Object { $_.SideIndicator -eq '=>' } | Select-Object -ExpandProperty InputObject)"
            }
            else {
                Write-Host "No differences found in column '$header'"
            }
        }
    }
    catch {
        Write-Host "Error comparing CSV files: $_" -ForegroundColor Red
    }
}

# Main program loop
do {
    Show-Menu
    $choice = Read-Host "`nEnter your choice (1-6)"

    switch ($choice) {
        "1" { Validate-CSVFile }
        "2" { Analyze-CSVStructure }
        "3" { Check-DataInconsistencies }
        "4" { Generate-CSVSummary }
        "5" { Compare-CSVFiles }
        "6" { Write-Host "Exiting program..." -ForegroundColor Yellow; break }
        default { Write-Host "Invalid choice. Please try again." -ForegroundColor Red }
    }

    if ($choice -ne "6") {
        Read-Host "`nPress Enter to continue..."
    }
} while ($choice -ne "6")

This CSV File Validator Tool includes:

  1. A menu-driven interface for easy navigation.
  2. Functions to perform various CSV-related tasks:
    • Validate CSV files and provide basic statistics
    • Analyze and display the structure of CSV files
    • Check for data inconsistencies
    • Generate a comprehensive summary of the CSV content
    • Compare two CSV files

Key features:

  1. CSV Validation:
    • Checks if the CSV is valid and can be parsed
    • Provides row count, column count, headers, and file size
  2. CSV Structure Analysis:
    • Analyzes each column for data type, unique values, and null/empty values
    • Provides sample values for each column
  3. Data Inconsistency Check:
    • Identifies values that don’t match the expected data type for each column
  4. CSV Summary Generation:
    • Provides a comprehensive summary of the CSV, including data types, unique values, fill rates, and basic statistics for numeric columns
  5. CSV Comparison:
    • Compares two CSV files for structural differences and data discrepancies

This tool is particularly useful for:

  • Data analysts validating CSV datasets
  • QA engineers checking CSV file integrity
  • Developers working with CSV data imports/exports
  • Anyone needing to quickly analyze or compare CSV files

To use this script effectively:

  1. Run the script in PowerShell
  2. Use the menu options to select the desired function
  3. Provide the path to the CSV file(s) when prompted
  4. Review the output for validation results, structure analysis, inconsistencies, summaries, or file comparisons

This script provides a comprehensive set of tools for working with CSV files, making it easier to validate, understand, and compare CSV data without having to manually inspect the files or use multiple tools.

0 replies

Leave a Reply

Want to join the discussion?
Feel free to contribute!

Leave a Reply

Your email address will not be published. Required fields are marked *