CSV File Validator Tool
<# .SYNOPSIS CSV File Validator Tool .DESCRIPTION This script provides a tool to validate CSV files, check for structural issues, analyze data types, and provide detailed information about the CSV content. .NOTES File Name : CSVValidator.ps1 Author : [Your Name] Prerequisite : PowerShell V5.1 or later Version : 1.0 Date : [Current Date] .EXAMPLE .\CSVValidator.ps1 #> function Show-Menu { Clear-Host Write-Host "=== CSV File Validator Tool ===" -ForegroundColor Cyan Write-Host "1. Validate CSV File" Write-Host "2. Analyze CSV Structure" Write-Host "3. Check for Data Inconsistencies" Write-Host "4. Generate CSV Summary" Write-Host "5. Compare Two CSV Files" Write-Host "6. Exit" } function Validate-CSVFile { $filePath = Read-Host "Enter the path to the CSV file" if (-not (Test-Path $filePath)) { Write-Host "File not found." -ForegroundColor Red return } try { $csv = Import-Csv $filePath $rowCount = $csv.Count $columnCount = ($csv | Get-Member -MemberType NoteProperty).Count Write-Host "CSV is valid." -ForegroundColor Green Write-Host "Number of rows: $rowCount" Write-Host "Number of columns: $columnCount" $headers = ($csv | Get-Member -MemberType NoteProperty).Name Write-Host "Headers: $($headers -join ', ')" $size = (Get-Item $filePath).Length Write-Host "File Size: $size bytes" } catch { Write-Host "Invalid CSV. Error details:" -ForegroundColor Red Write-Host $_.Exception.Message } } function Analyze-CSVStructure { $filePath = Read-Host "Enter the path to the CSV file" if (-not (Test-Path $filePath)) { Write-Host "File not found." -ForegroundColor Red return } try { $csv = Import-Csv $filePath $headers = ($csv | Get-Member -MemberType NoteProperty).Name Write-Host "CSV Structure Analysis:" -ForegroundColor Yellow foreach ($header in $headers) { $values = $csv.$header | Where-Object { $_ -ne "" } $dataType = Guess-DataType $values $uniqueValues = ($values | Select-Object -Unique).Count $nullCount = ($csv.$header | Where-Object { $_ -eq "" }).Count Write-Host "`nColumn: $header" Write-Host " Data Type: $dataType" Write-Host " Unique Values: $uniqueValues" Write-Host " Null/Empty Values: $nullCount" Write-Host " Sample Values: $($values | Select-Object -First 5 -Unique)" } } catch { Write-Host "Error analyzing CSV structure: $_" -ForegroundColor Red } } function Guess-DataType($values) { $sampleSize = [Math]::Min($values.Count, 100) $sample = $values | Select-Object -First $sampleSize $isNumeric = $sample | ForEach-Object { $_ -as [double] } | Where-Object { $_ -ne $null } $isDate = $sample | ForEach-Object { $_ -as [datetime] } | Where-Object { $_ -ne $null } if ($isNumeric.Count -eq $sampleSize) { return "Numeric" } elseif ($isDate.Count -eq $sampleSize) { return "Date" } else { return "String" } } function Check-DataInconsistencies { $filePath = Read-Host "Enter the path to the CSV file" if (-not (Test-Path $filePath)) { Write-Host "File not found." -ForegroundColor Red return } try { $csv = Import-Csv $filePath $headers = ($csv | Get-Member -MemberType NoteProperty).Name Write-Host "Checking for Data Inconsistencies:" -ForegroundColor Yellow foreach ($header in $headers) { $values = $csv.$header $dataType = Guess-DataType $values $inconsistencies = @() switch ($dataType) { "Numeric" { $inconsistencies = $values | Where-Object { $_ -ne "" -and ($_ -as [double]) -eq $null } } "Date" { $inconsistencies = $values | Where-Object { $_ -ne "" -and ($_ -as [datetime]) -eq $null } } } if ($inconsistencies.Count -gt 0) { Write-Host "`nInconsistencies found in column '$header' (Expected $dataType):" Write-Host ($inconsistencies | Select-Object -First 5) -ForegroundColor Red if ($inconsistencies.Count -gt 5) { Write-Host "... and $($inconsistencies.Count - 5) more." } } } } catch { Write-Host "Error checking data inconsistencies: $_" -ForegroundColor Red } } function Generate-CSVSummary { $filePath = Read-Host "Enter the path to the CSV file" if (-not (Test-Path $filePath)) { Write-Host "File not found." -ForegroundColor Red return } try { $csv = Import-Csv $filePath $rowCount = $csv.Count $headers = ($csv | Get-Member -MemberType NoteProperty).Name Write-Host "CSV Summary:" -ForegroundColor Yellow Write-Host "File: $filePath" Write-Host "Total Rows: $rowCount" Write-Host "Total Columns: $($headers.Count)" Write-Host "`nColumn Summary:" foreach ($header in $headers) { $values = $csv.$header $nonEmptyValues = $values | Where-Object { $_ -ne "" } $uniqueValues = ($nonEmptyValues | Select-Object -Unique).Count $nullCount = ($values | Where-Object { $_ -eq "" }).Count $dataType = Guess-DataType $nonEmptyValues Write-Host "`n$header:" Write-Host " Data Type: $dataType" Write-Host " Unique Values: $uniqueValues" Write-Host " Null/Empty Count: $nullCount" Write-Host " Fill Rate: $([math]::Round(($rowCount - $nullCount) / $rowCount * 100, 2))%" if ($dataType -eq "Numeric") { $numericValues = $nonEmptyValues | ForEach-Object { $_ -as [double] } $min = ($numericValues | Measure-Object -Minimum).Minimum $max = ($numericValues | Measure-Object -Maximum).Maximum $avg = ($numericValues | Measure-Object -Average).Average Write-Host " Min: $min" Write-Host " Max: $max" Write-Host " Average: $([math]::Round($avg, 2))" } } } catch { Write-Host "Error generating CSV summary: $_" -ForegroundColor Red } } function Compare-CSVFiles { $filePath1 = Read-Host "Enter the path to the first CSV file" $filePath2 = Read-Host "Enter the path to the second CSV file" if (-not (Test-Path $filePath1) -or -not (Test-Path $filePath2)) { Write-Host "One or both files not found." -ForegroundColor Red return } try { $csv1 = Import-Csv $filePath1 $csv2 = Import-Csv $filePath2 $headers1 = ($csv1 | Get-Member -MemberType NoteProperty).Name $headers2 = ($csv2 | Get-Member -MemberType NoteProperty).Name Write-Host "Comparing CSV Files:" -ForegroundColor Yellow Write-Host "File 1: $filePath1" Write-Host "File 2: $filePath2" Write-Host "`nColumn Comparison:" $commonHeaders = $headers1 | Where-Object { $headers2 -contains $_ } $uniqueHeaders1 = $headers1 | Where-Object { $headers2 -notcontains $_ } $uniqueHeaders2 = $headers2 | Where-Object { $headers1 -notcontains $_ } Write-Host "Common Columns: $($commonHeaders -join ', ')" Write-Host "Columns only in File 1: $($uniqueHeaders1 -join ', ')" Write-Host "Columns only in File 2: $($uniqueHeaders2 -join ', ')" Write-Host "`nRow Count Comparison:" Write-Host "File 1 Row Count: $($csv1.Count)" Write-Host "File 2 Row Count: $($csv2.Count)" Write-Host "`nData Comparison for Common Columns:" foreach ($header in $commonHeaders) { $diff = Compare-Object $csv1.$header $csv2.$header if ($diff) { Write-Host "Differences found in column '$header':" Write-Host " Values only in File 1: $($diff | Where-Object { $_.SideIndicator -eq '<=' } | Select-Object -ExpandProperty InputObject)" Write-Host " Values only in File 2: $($diff | Where-Object { $_.SideIndicator -eq '=>' } | Select-Object -ExpandProperty InputObject)" } else { Write-Host "No differences found in column '$header'" } } } catch { Write-Host "Error comparing CSV files: $_" -ForegroundColor Red } } # Main program loop do { Show-Menu $choice = Read-Host "`nEnter your choice (1-6)" switch ($choice) { "1" { Validate-CSVFile } "2" { Analyze-CSVStructure } "3" { Check-DataInconsistencies } "4" { Generate-CSVSummary } "5" { Compare-CSVFiles } "6" { Write-Host "Exiting program..." -ForegroundColor Yellow; break } default { Write-Host "Invalid choice. Please try again." -ForegroundColor Red } } if ($choice -ne "6") { Read-Host "`nPress Enter to continue..." } } while ($choice -ne "6")
This CSV File Validator Tool includes:
- A menu-driven interface for easy navigation.
- Functions to perform various CSV-related tasks:
- Validate CSV files and provide basic statistics
- Analyze and display the structure of CSV files
- Check for data inconsistencies
- Generate a comprehensive summary of the CSV content
- Compare two CSV files
Key features:
- CSV Validation:
- Checks if the CSV is valid and can be parsed
- Provides row count, column count, headers, and file size
- CSV Structure Analysis:
- Analyzes each column for data type, unique values, and null/empty values
- Provides sample values for each column
- Data Inconsistency Check:
- Identifies values that don’t match the expected data type for each column
- CSV Summary Generation:
- Provides a comprehensive summary of the CSV, including data types, unique values, fill rates, and basic statistics for numeric columns
- CSV Comparison:
- Compares two CSV files for structural differences and data discrepancies
This tool is particularly useful for:
- Data analysts validating CSV datasets
- QA engineers checking CSV file integrity
- Developers working with CSV data imports/exports
- Anyone needing to quickly analyze or compare CSV files
To use this script effectively:
- Run the script in PowerShell
- Use the menu options to select the desired function
- Provide the path to the CSV file(s) when prompted
- Review the output for validation results, structure analysis, inconsistencies, summaries, or file comparisons
This script provides a comprehensive set of tools for working with CSV files, making it easier to validate, understand, and compare CSV data without having to manually inspect the files or use multiple tools.