CSV File Validator Tool
<# .SYNOPSIS CSV File Validator Tool .DESCRIPTION This script provides a tool to validate CSV files, check for structural issues, analyze data types, and provide detailed information about the CSV content. .NOTES File Name : CSVValidator.ps1 Author : [Your Name] Prerequisite : PowerShell V5.1 or later Version : 1.0 Date : [Current Date] .EXAMPLE .\CSVValidator.ps1 #> function Show-Menu { Clear-Host Write-Host "=== CSV File Validator Tool ===" -ForegroundColor Cyan Write-Host "1. Validate CSV File" Write-Host "2. Analyze CSV Structure" Write-Host "3. Check for Data Inconsistencies" Write-Host "4. Generate CSV Summary" Write-Host "5. Compare Two CSV Files" Write-Host "6. Exit" } function Validate-CSVFile { $filePath = Read-Host "Enter the path to the CSV file" if (-not (Test-Path $filePath)) { Write-Host "File not found." -ForegroundColor Red return } try { $csv = Import-Csv $filePath $rowCount = $csv.Count $columnCount = ($csv | Get-Member -MemberType NoteProperty).Count Write-Host "CSV is valid." -ForegroundColor Green Write-Host "Number of rows: $rowCount" Write-Host "Number of columns: $columnCount" $headers = ($csv | Get-Member -MemberType NoteProperty).Name Write-Host "Headers: $($headers -join ', ')" $size = (Get-Item $filePath).Length Write-Host "File Size: $size bytes" } catch { Write-Host "Invalid CSV. Error details:" -ForegroundColor Red Write-Host $_.Exception.Message } } function Analyze-CSVStructure { $filePath = Read-Host "Enter the path to the CSV file" if (-not (Test-Path $filePath)) { Write-Host "File not found." -ForegroundColor Red return } try { $csv = Import-Csv $filePath $headers = ($csv | Get-Member -MemberType NoteProperty).Name Write-Host "CSV Structure Analysis:" -ForegroundColor Yellow foreach ($header in $headers) { $values = $csv.$header | Where-Object { $_ -ne "" } $dataType = Guess-DataType $values $uniqueValues = ($values | Select-Object -Unique).Count $nullCount = ($csv.$header | Where-Object { $_ -eq "" }).Count Write-Host "`nColumn: $header" Write-Host " Data Type: $dataType" Write-Host " Unique Values: $uniqueValues" Write-Host " Null/Empty Values: $nullCount" Write-Host " Sample Values: $($values | Select-Object -First 5 -Unique)" } } catch { Write-Host "Error analyzing CSV structure: $_" -ForegroundColor Red } } function Guess-DataType($values) { $sampleSize = [Math]::Min($values.Count, 100) $sample = $values | Select-Object -First $sampleSize $isNumeric = $sample | ForEach-Object { $_ -as [double] } | Where-Object { $_ -ne $null } $isDate = $sample | ForEach-Object { $_ -as [datetime] } | Where-Object { $_ -ne $null } if ($isNumeric.Count -eq $sampleSize) { return "Numeric" } elseif ($isDate.Count -eq $sampleSize) { return "Date" } else { return "String" } } function Check-DataInconsistencies { $filePath = Read-Host "Enter the path to the CSV file" if (-not (Test-Path $filePath)) { Write-Host "File not found." -ForegroundColor Red return } try { $csv = Import-Csv $filePath $headers = ($csv | Get-Member -MemberType NoteProperty).Name Write-Host "Checking for Data Inconsistencies:" -ForegroundColor Yellow foreach ($header in $headers) { $values = $csv.$header $dataType = Guess-DataType $values $inconsistencies = @() switch ($dataType) { "Numeric" { $inconsistencies = $values | Where-Object { $_ -ne "" -and ($_ -as [double]) -eq $null } } "Date" { $inconsistencies = $values | Where-Object { $_ -ne "" -and ($_ -as [datetime]) -eq $null } } } if ($inconsistencies.Count -gt 0) { Write-Host "`nInconsistencies found in column '$header' (Expected $dataType):" Write-Host ($inconsistencies | Select-Object -First 5) -ForegroundColor Red if ($inconsistencies.Count -gt 5) { Write-Host "... and $($inconsistencies.Count - 5) more." } } } } catch { Write-Host "Error checking data inconsistencies: $_" -ForegroundColor Red } } function Generate-CSVSummary { $filePath = Read-Host "Enter the path to the CSV file" if (-not (Test-Path $filePath)) { Write-Host "File not found." -ForegroundColor Red return } try { $csv = Import-Csv $filePath $rowCount = $csv.Count $headers = ($csv | Get-Member -MemberType NoteProperty).Name Write-Host "CSV Summary:" -ForegroundColor Yellow Write-Host "File: $filePath" Write-Host "Total Rows: $rowCount" Write-Host "Total Columns: $($headers.Count)" Write-Host "`nColumn Summary:" foreach ($header in $headers) { $values = $csv.$header $nonEmptyValues = $values | Where-Object { $_ -ne "" } $uniqueValues = ($nonEmptyValues | Select-Object -Unique).Count $nullCount = ($values | Where-Object { $_ -eq "" }).Count $dataType = Guess-DataType $nonEmptyValues Write-Host "`n$header:" Write-Host " Data Type: $dataType" Write-Host " Unique Values: $uniqueValues" Write-Host " Null/Empty Count: $nullCount" Write-Host " Fill Rate: $([math]::Round(($rowCount - $nullCount) / $rowCount * 100, 2))%" if ($dataType -eq "Numeric") { $numericValues = $nonEmptyValues | ForEach-Object { $_ -as [double] } $min = ($numericValues | Measure-Object -Minimum).Minimum $max = ($numericValues | Measure-Object -Maximum).Maximum $avg = ($numericValues | Measure-Object -Average).Average Write-Host " Min: $min" Write-Host " Max: $max" Write-Host " Average: $([math]::Round($avg, 2))" } } } catch { Write-Host "Error generating CSV summary: $_" -ForegroundColor Red } } function Compare-CSVFiles { $filePath1 = Read-Host "Enter the path to the first CSV file" $filePath2 = Read-Host "Enter the path to the second CSV file" if (-not (Test-Path $filePath1) -or -not (Test-Path $filePath2)) { Write-Host "One or both files not found." -ForegroundColor Red return } try { $csv1 = Import-Csv $filePath1 $csv2 = Import-Csv $filePath2 $headers1 = ($csv1 | Get-Member -MemberType NoteProperty).Name $headers2 = ($csv2 | Get-Member -MemberType NoteProperty).Name Write-Host "Comparing CSV Files:" -ForegroundColor Yellow Write-Host "File 1: $filePath1" Write-Host "File 2: $filePath2" Write-Host "`nColumn Comparison:" $commonHeaders = $headers1 | Where-Object { $headers2 -contains $_ } $uniqueHeaders1 = $headers1 | Where-Object { $headers2 -notcontains $_ } $uniqueHeaders2 = $headers2 | Where-Object { $headers1 -notcontains $_ } Write-Host "Common Columns: $($commonHeaders -join ', ')" Write-Host "Columns only in File 1: $($uniqueHeaders1 -join ', ')" Write-Host "Columns only in File 2: $($uniqueHeaders2 -join ', ')" Write-Host "`nRow Count Comparison:" Write-Host "File 1 Row Count: $($csv1.Count)" Write-Host "File 2 Row Count: $($csv2.Count)" Write-Host "`nData Comparison for Common Columns:" foreach ($header in $commonHeaders) { $diff = Compare-Object $csv1.$header $csv2.$header if ($diff) { Write-Host "Differences found in column '$header':" Write-Host " Values only in File 1: $($diff | Where-Object { $_.SideIndicator -eq '<=' } | Select-Object -ExpandProperty InputObject)" Write-Host " Values only in File 2: $($diff | Where-Object { $_.SideIndicator -eq '=>' } | Select-Object -ExpandProperty InputObject)" } else { Write-Host "No differences found in column '$header'" } } } catch { Write-Host "Error comparing CSV files: $_" -ForegroundColor Red } } # Main program loop do { Show-Menu $choice = Read-Host "`nEnter your choice (1-6)" switch ($choice) { "1" { Validate-CSVFile } "2" { Analyze-CSVStructure } "3" { Check-DataInconsistencies } "4" { Generate-CSVSummary } "5" { Compare-CSVFiles } "6" { Write-Host "Exiting program..." -ForegroundColor Yellow; break } default { Write-Host "Invalid choice. Please try again." -ForegroundColor Red } } if ($choice -ne "6") { Read-Host "`nPress Enter to continue..." } } while ($choice -ne "6")
This CSV File Validator Tool includes:
- A menu-driven interface for easy navigation.
- Functions to perform various CSV-related tasks:
- Validate CSV files and provide basic statistics
- Analyze and display the structure of CSV files
- Check for data inconsistencies
- Generate a comprehensive summary of the CSV content
- Compare two CSV files
Key features:
- CSV Validation:
- Checks if the CSV is valid and can be parsed
- Provides row count, column count, headers, and file size
- CSV Structure Analysis:
- Analyzes each column for data type, unique values, and null/empty values
- Provides sample values for each column
- Data Inconsistency Check:
- Identifies values that don’t match the expected data type for each column
- CSV Summary Generation:
- Provides a comprehensive summary of the CSV, including data types, unique values, fill rates, and basic statistics for numeric columns
- CSV Comparison:
- Compares two CSV files for structural differences and data discrepancies
This tool is particularly useful for:
- Data analysts validating CSV datasets
- QA engineers checking CSV file integrity
- Developers working with CSV data imports/exports
- Anyone needing to quickly analyze or compare CSV files
To use this script effectively:
- Run the script in PowerShell
- Use the menu options to select the desired function
- Provide the path to the CSV file(s) when prompted
- Review the output for validation results, structure analysis, inconsistencies, summaries, or file comparisons
This script provides a comprehensive set of tools for working with CSV files, making it easier to validate, understand, and compare CSV data without having to manually inspect the files or use multiple tools.
Leave a Reply
Want to join the discussion?Feel free to contribute!