<#
.SYNOPSIS
CSV File Validator Tool
.DESCRIPTION
This script provides a tool to validate CSV files, check for structural issues,
analyze data types, and provide detailed information about the CSV content.
.NOTES
File Name : CSVValidator.ps1
Author : [Your Name]
Prerequisite : PowerShell V5.1 or later
Version : 1.0
Date : [Current Date]
.EXAMPLE
.\CSVValidator.ps1
#>
function Show-Menu {
Clear-Host
Write-Host "=== CSV File Validator Tool ===" -ForegroundColor Cyan
Write-Host "1. Validate CSV File"
Write-Host "2. Analyze CSV Structure"
Write-Host "3. Check for Data Inconsistencies"
Write-Host "4. Generate CSV Summary"
Write-Host "5. Compare Two CSV Files"
Write-Host "6. Exit"
}
function Validate-CSVFile {
$filePath = Read-Host "Enter the path to the CSV file"
if (-not (Test-Path $filePath)) {
Write-Host "File not found." -ForegroundColor Red
return
}
try {
$csv = Import-Csv $filePath
$rowCount = $csv.Count
$columnCount = ($csv | Get-Member -MemberType NoteProperty).Count
Write-Host "CSV is valid." -ForegroundColor Green
Write-Host "Number of rows: $rowCount"
Write-Host "Number of columns: $columnCount"
$headers = ($csv | Get-Member -MemberType NoteProperty).Name
Write-Host "Headers: $($headers -join ', ')"
$size = (Get-Item $filePath).Length
Write-Host "File Size: $size bytes"
}
catch {
Write-Host "Invalid CSV. Error details:" -ForegroundColor Red
Write-Host $_.Exception.Message
}
}
function Analyze-CSVStructure {
$filePath = Read-Host "Enter the path to the CSV file"
if (-not (Test-Path $filePath)) {
Write-Host "File not found." -ForegroundColor Red
return
}
try {
$csv = Import-Csv $filePath
$headers = ($csv | Get-Member -MemberType NoteProperty).Name
Write-Host "CSV Structure Analysis:" -ForegroundColor Yellow
foreach ($header in $headers) {
$values = $csv.$header | Where-Object { $_ -ne "" }
$dataType = Guess-DataType $values
$uniqueValues = ($values | Select-Object -Unique).Count
$nullCount = ($csv.$header | Where-Object { $_ -eq "" }).Count
Write-Host "`nColumn: $header"
Write-Host " Data Type: $dataType"
Write-Host " Unique Values: $uniqueValues"
Write-Host " Null/Empty Values: $nullCount"
Write-Host " Sample Values: $($values | Select-Object -First 5 -Unique)"
}
}
catch {
Write-Host "Error analyzing CSV structure: $_" -ForegroundColor Red
}
}
function Guess-DataType($values) {
$sampleSize = [Math]::Min($values.Count, 100)
$sample = $values | Select-Object -First $sampleSize
$isNumeric = $sample | ForEach-Object { $_ -as [double] } | Where-Object { $_ -ne $null }
$isDate = $sample | ForEach-Object { $_ -as [datetime] } | Where-Object { $_ -ne $null }
if ($isNumeric.Count -eq $sampleSize) { return "Numeric" }
elseif ($isDate.Count -eq $sampleSize) { return "Date" }
else { return "String" }
}
function Check-DataInconsistencies {
$filePath = Read-Host "Enter the path to the CSV file"
if (-not (Test-Path $filePath)) {
Write-Host "File not found." -ForegroundColor Red
return
}
try {
$csv = Import-Csv $filePath
$headers = ($csv | Get-Member -MemberType NoteProperty).Name
Write-Host "Checking for Data Inconsistencies:" -ForegroundColor Yellow
foreach ($header in $headers) {
$values = $csv.$header
$dataType = Guess-DataType $values
$inconsistencies = @()
switch ($dataType) {
"Numeric" {
$inconsistencies = $values | Where-Object { $_ -ne "" -and ($_ -as [double]) -eq $null }
}
"Date" {
$inconsistencies = $values | Where-Object { $_ -ne "" -and ($_ -as [datetime]) -eq $null }
}
}
if ($inconsistencies.Count -gt 0) {
Write-Host "`nInconsistencies found in column '$header' (Expected $dataType):"
Write-Host ($inconsistencies | Select-Object -First 5) -ForegroundColor Red
if ($inconsistencies.Count -gt 5) {
Write-Host "... and $($inconsistencies.Count - 5) more."
}
}
}
}
catch {
Write-Host "Error checking data inconsistencies: $_" -ForegroundColor Red
}
}
function Generate-CSVSummary {
$filePath = Read-Host "Enter the path to the CSV file"
if (-not (Test-Path $filePath)) {
Write-Host "File not found." -ForegroundColor Red
return
}
try {
$csv = Import-Csv $filePath
$rowCount = $csv.Count
$headers = ($csv | Get-Member -MemberType NoteProperty).Name
Write-Host "CSV Summary:" -ForegroundColor Yellow
Write-Host "File: $filePath"
Write-Host "Total Rows: $rowCount"
Write-Host "Total Columns: $($headers.Count)"
Write-Host "`nColumn Summary:"
foreach ($header in $headers) {
$values = $csv.$header
$nonEmptyValues = $values | Where-Object { $_ -ne "" }
$uniqueValues = ($nonEmptyValues | Select-Object -Unique).Count
$nullCount = ($values | Where-Object { $_ -eq "" }).Count
$dataType = Guess-DataType $nonEmptyValues
Write-Host "`n$header:"
Write-Host " Data Type: $dataType"
Write-Host " Unique Values: $uniqueValues"
Write-Host " Null/Empty Count: $nullCount"
Write-Host " Fill Rate: $([math]::Round(($rowCount - $nullCount) / $rowCount * 100, 2))%"
if ($dataType -eq "Numeric") {
$numericValues = $nonEmptyValues | ForEach-Object { $_ -as [double] }
$min = ($numericValues | Measure-Object -Minimum).Minimum
$max = ($numericValues | Measure-Object -Maximum).Maximum
$avg = ($numericValues | Measure-Object -Average).Average
Write-Host " Min: $min"
Write-Host " Max: $max"
Write-Host " Average: $([math]::Round($avg, 2))"
}
}
}
catch {
Write-Host "Error generating CSV summary: $_" -ForegroundColor Red
}
}
function Compare-CSVFiles {
$filePath1 = Read-Host "Enter the path to the first CSV file"
$filePath2 = Read-Host "Enter the path to the second CSV file"
if (-not (Test-Path $filePath1) -or -not (Test-Path $filePath2)) {
Write-Host "One or both files not found." -ForegroundColor Red
return
}
try {
$csv1 = Import-Csv $filePath1
$csv2 = Import-Csv $filePath2
$headers1 = ($csv1 | Get-Member -MemberType NoteProperty).Name
$headers2 = ($csv2 | Get-Member -MemberType NoteProperty).Name
Write-Host "Comparing CSV Files:" -ForegroundColor Yellow
Write-Host "File 1: $filePath1"
Write-Host "File 2: $filePath2"
Write-Host "`nColumn Comparison:"
$commonHeaders = $headers1 | Where-Object { $headers2 -contains $_ }
$uniqueHeaders1 = $headers1 | Where-Object { $headers2 -notcontains $_ }
$uniqueHeaders2 = $headers2 | Where-Object { $headers1 -notcontains $_ }
Write-Host "Common Columns: $($commonHeaders -join ', ')"
Write-Host "Columns only in File 1: $($uniqueHeaders1 -join ', ')"
Write-Host "Columns only in File 2: $($uniqueHeaders2 -join ', ')"
Write-Host "`nRow Count Comparison:"
Write-Host "File 1 Row Count: $($csv1.Count)"
Write-Host "File 2 Row Count: $($csv2.Count)"
Write-Host "`nData Comparison for Common Columns:"
foreach ($header in $commonHeaders) {
$diff = Compare-Object $csv1.$header $csv2.$header
if ($diff) {
Write-Host "Differences found in column '$header':"
Write-Host " Values only in File 1: $($diff | Where-Object { $_.SideIndicator -eq '<=' } | Select-Object -ExpandProperty InputObject)"
Write-Host " Values only in File 2: $($diff | Where-Object { $_.SideIndicator -eq '=>' } | Select-Object -ExpandProperty InputObject)"
}
else {
Write-Host "No differences found in column '$header'"
}
}
}
catch {
Write-Host "Error comparing CSV files: $_" -ForegroundColor Red
}
}
# Main program loop
do {
Show-Menu
$choice = Read-Host "`nEnter your choice (1-6)"
switch ($choice) {
"1" { Validate-CSVFile }
"2" { Analyze-CSVStructure }
"3" { Check-DataInconsistencies }
"4" { Generate-CSVSummary }
"5" { Compare-CSVFiles }
"6" { Write-Host "Exiting program..." -ForegroundColor Yellow; break }
default { Write-Host "Invalid choice. Please try again." -ForegroundColor Red }
}
if ($choice -ne "6") {
Read-Host "`nPress Enter to continue..."
}
} while ($choice -ne "6")
This CSV File Validator Tool includes:
- A menu-driven interface for easy navigation.
- Functions to perform various CSV-related tasks:
- Validate CSV files and provide basic statistics
- Analyze and display the structure of CSV files
- Check for data inconsistencies
- Generate a comprehensive summary of the CSV content
- Compare two CSV files
Key features:
- CSV Validation:
- Checks if the CSV is valid and can be parsed
- Provides row count, column count, headers, and file size
- CSV Structure Analysis:
- Analyzes each column for data type, unique values, and null/empty values
- Provides sample values for each column
- Data Inconsistency Check:
- Identifies values that don’t match the expected data type for each column
- CSV Summary Generation:
- Provides a comprehensive summary of the CSV, including data types, unique values, fill rates, and basic statistics for numeric columns
- CSV Comparison:
- Compares two CSV files for structural differences and data discrepancies
This tool is particularly useful for:
- Data analysts validating CSV datasets
- QA engineers checking CSV file integrity
- Developers working with CSV data imports/exports
- Anyone needing to quickly analyze or compare CSV files
To use this script effectively:
- Run the script in PowerShell
- Use the menu options to select the desired function
- Provide the path to the CSV file(s) when prompted
- Review the output for validation results, structure analysis, inconsistencies, summaries, or file comparisons
This script provides a comprehensive set of tools for working with CSV files, making it easier to validate, understand, and compare CSV data without having to manually inspect the files or use multiple tools.