ImportToSQLDatabase.psm1

using namespace System.Collections.Generic
using namespace Microsoft.VisualBasic.FileIO

. "$PSScriptRoot\Private.ps1"

function Import-ToSqlDatabase {
    [CmdletBinding()]
    param(
        [Parameter(Mandatory=$true)]
        [string]$CsvFile,

        [Parameter(Mandatory=$true)]
        [string]$SqlServer,

        [Parameter(Mandatory=$true)]
        [string]$Database,

        [Parameter(Mandatory=$true)]
        [string]$Table,

        [Parameter(Mandatory=$false)]
        [string]$Delimiter = ",",

        [Parameter(Mandatory=$false)]
        [switch]$FirstRowColumns,
        
        [Parameter(Mandatory=$false)]
        [switch]$SkipHeaderRow,

        [Parameter(Mandatory=$false)]
        [switch]$Truncate,

        [Parameter(Mandatory=$false)]
        [System.Management.Automation.PSCredential]$SqlCredential,

        [Parameter(Mandatory=$false)]
        [int]$BatchSize = 0,

        [Parameter(Mandatory=$false)]
        [int]$Timeout = 600,

        [Parameter(Mandatory=$false)]
        [string]$LogFile,
        
        [Parameter(Mandatory=$false)]
        [switch]$ShowProgress,
        
        [Parameter(Mandatory=$false)]
        [switch]$UseTableLock,

        [Parameter(Mandatory=$false)]
        [switch]$CheckConstraints,

        [Parameter(Mandatory=$false)]
        [switch]$KeepNulls,

        [Parameter(Mandatory=$false)]
        [switch]$KeepIdentity,

        [Parameter(Mandatory=$false)]
        [switch]$ManageIndexes,
        
        [Parameter(Mandatory=$false)]
        [switch]$ManageConstraints
    )

    Begin {
        # Normalize the path to the CSV file
        $CsvFile = (Resolve-Path $CsvFile).Path
        

        # Validate file exists
        if (-not (Test-Path $CsvFile)) {
            throw "Input file not found: $CsvFile"
        }

        # Initialize timing and tracking variables
        $startTime = Get-Date
        #[int]$lastProgressReport = 0
        [int]$rowsProcessed = 0
        $totalRows = $null

        # Count rows if requested for progress reporting
        if ($ShowProgress -and -not $SkipRowCount) {
            Write-Verbose "Calculating total rows in CSV file..."
            $totalRows = (Get-Content $CsvFile | Measure-Object -Line).Lines
            if ($FirstRowColumns -or $SkipHeaderRow) { $totalRows-- }
            Write-Verbose "CSV contains $totalRows total rows to process"
        }

        # # Function to write to log
        # function Write-Host {
        # param([string]$Message)
            
        # $timestamp = Get-Date -Format "yyyy-MM-dd HH:mm:ss"
        # $logMessage = "[$timestamp] $Message"
            
        # Write-Verbose $logMessage
            
        # if ($LogFile) {
        # $logMessage | Out-File -FilePath $LogFile -Append
        # }
        # }

        # Build connection string
        if ($SqlCredential) {
            $username = $SqlCredential.UserName
            $password = $SqlCredential.GetNetworkCredential().Password
            $connectionString = "Server=$SqlServer;Database=$Database;User Id=$username;Password=$password;"
        } else {
            $connectionString = "Server=$SqlServer;Database=$Database;Integrated Security=True;"
        }

        # Create SQL connection
        try {
            $connection = New-Object System.Data.SqlClient.SqlConnection($connectionString)
            $connection.Open()
            Write-Host "Connection to SQL Server established successfully."
        }
        catch {
            throw "Failed to connect to SQL Server: $_"
        }

        # Truncate table if requested
        if ($Truncate) {
            try {
                $truncateCommand = New-Object System.Data.SqlClient.SqlCommand("TRUNCATE TABLE $Table", $connection)
                [void](
                    $truncateCommand.ExecuteNonQuery()
                )
                Write-Host "Table truncated successfully."
            }
            catch {
                Write-Host "Error truncating table: $_"
                throw
            }
        }
        
        # Validate table exists
        try {
            $tableCheckCommand = New-Object System.Data.SqlClient.SqlCommand(
                "SELECT COUNT(*) FROM INFORMATION_SCHEMA.TABLES WHERE TABLE_NAME = '$Table'", 
                $connection
            )
            $tableExists = [int]$tableCheckCommand.ExecuteScalar() -gt 0
            
            if (-not $tableExists) {
                throw "Table $Table does not exist in database $Database."
            }
        }
        catch {
            Write-Host "Error validating table existence: $_"
            throw
        }

        if ($ManageConstraints) {
            # Disable constraints
            try {
                Write-Host "Disabling foreign key constraints on $Table..."
                $disableConstraintsCommand = New-Object System.Data.SqlClient.SqlCommand(
                    "ALTER TABLE $Table NOCHECK CONSTRAINT ALL", 
                    $connection
                )
                [void](
                    $disableConstraintsCommand.ExecuteNonQuery() 
                )
                Write-Host "Foreign key constraints disabled."
            }
            catch {
                Write-Host "Error disabling constraints: $_"
                # Consider whether to throw or continue
            }
        }

        If ($ManageIndexes) {
            try {
                Write-Host "Disabling non-clustered indexes on $Table..."
                $disableIndexesCommand = New-Object System.Data.SqlClient.SqlCommand(
                    "ALTER INDEX ALL ON $Table DISABLE",
                    $connection
                )
                [void](
                    $disableIndexesCommand.ExecuteNonQuery()
                )
                Write-Host "Non-clustered indexes disabled."
            }
            catch {
                Write-Host "Error disabling indexes: $_"
                # Consider whether to throw or continue
            }
        }

    }

    # Corrected Process block for the Import-ToSqlDatabase function

    Process {
        # Build BulkCopyOptions based on switches
        $bulkCopyOptions = [System.Data.SqlClient.SqlBulkCopyOptions]::Default

        if ($UseTableLock) {
            $bulkCopyOptions = $bulkCopyOptions -bor [System.Data.SqlClient.SqlBulkCopyOptions]::TableLock
        }

        if ($CheckConstraints) {
            $bulkCopyOptions = $bulkCopyOptions -bor [System.Data.SqlClient.SqlBulkCopyOptions]::CheckConstraints
        }

        if ($KeepNulls) {
            $bulkCopyOptions = $bulkCopyOptions -bor [System.Data.SqlClient.SqlBulkCopyOptions]::KeepNulls
        }

        if ($KeepIdentity) {
            $bulkCopyOptions = $bulkCopyOptions -bor [System.Data.SqlClient.SqlBulkCopyOptions]::KeepIdentity
        }

        # Create bulk copy
        $bulkCopy = New-Object System.Data.SqlClient.SqlBulkCopy($connection, $bulkCopyOptions, $null)
        $bulkCopy.DestinationTableName = $Table
        $bulkCopy.BatchSize = $BatchSize
        $bulkCopy.BulkCopyTimeout = $Timeout
        $bulkCopy.NotifyAfter = $BatchSize

        # Get table columns to ensure proper mapping
        $schemaCommand = New-Object System.Data.SqlClient.SqlCommand(
            "SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '$Table' ORDER BY ORDINAL_POSITION", 
            $connection
        )
        
        $tableColumns = @{}
        $reader = $schemaCommand.ExecuteReader()
        while ($reader.Read()) {
            $columnName = $reader["COLUMN_NAME"]
            $tableColumns[$columnName] = $columnName
        }
        $reader.Close()
        
        Write-Host "Detected $($tableColumns.Count) columns in SQL table."

        #Now that we have the columns from the SQL Table we can map the table import.
        Write-Host "Reading data from $CsvFile..."
        If ($FirstRowColumns) {
            # Import-the data normally including the column headers
            $csvData = Import-CsV -Path $CsvFile -Delimiter $Delimiter            
        } elseif($SkipHeaderRow) {
            # Import the data skipping the header row
            # we are going to use the sql table columns as the headers
            [array]$Headers = $tableColumns.Keys
            $csvData = Import-Csv -Path $CsvFile -Delimiter $Delimiter -Header $Headers | Select-Object -Skip 1 
        } else {
            # Import all the data normally specifying headers (there is no header row)
            $csvData = Import-Csv -Path $CsvFile -Delimiter $Delimiter -Header $Headers
        }

        # Create DataTable
        $dataTable = New-Object System.Data.DataTable

        $Columns = $CsvData[0].PSObject.Properties.Name
 
        Write-Host "Mapping columns from CSV to SQL Table"
        for ($i = 0; $i -lt $Columns.Count; $i++) {
            if ($FirstRowColumns) {
                if ($Columns[$i] -ne $tableColumns[$i]) {
                    Throw "Column $i in CSV file does not match table column $i"
                }
            } 
            [void]$dataTable.Columns.Add($Columns[$i])
            [void]$bulkCopy.ColumnMappings.Add($i, $Columns[$i])
        }
        

        # Reset counter (already initialized in Begin block)
        $rowsProcessed = 0
        # $lineNumber - 0
        # if ($FirstRowColumns -or $SkipHeaderRow) {
        # $lineNumber = 1
        # }
        
        Write-Host "Processing CSV Data..."
        
        foreach ($CsvRow in $CsvData) {
            $row = $dataTable.NewRow()
            $CsvRow.PSObject.Properties | ForEach-Object {
                $row[$_.Name] = $_.Value
            }
            $DataTable.Rows.Add($row)
            if ($BatchSize -gt 0) {
                If ($dataTable.Rows.Count -ge $BatchSize) {
                    Write-Host "Writing DataTable to SQL Server (batch of $($dataTable.Rows.Count) rows)..."
                    try {
                        Write-Host "Writing $($dataTable.Rows.Count) rows to SQL Server..."
                        [void]($bulkCopy.WriteToServer($dataTable))
                        [void]($dataTable.Clear())
                    }
                    catch {
                        Write-Host "Error during bulk copy: $_"
                        throw
                    }
                }
            }
            $rowsProcessed++
            If ($ShowProgress) {
                $PercentComplete = [int](($rowsProcessed / $csvData.count) * 100)
                Write-Progress -Activity "Processing CSV data" -Status "Processed $rowsProcessed rows" -PercentComplete $PercentComplete
            }
        }
        # Either write the entire table ($BatchSize = 0) or the remaining rows
        if ($dataTable.Rows.Count -gt 0) {
            Write-Host "Writing $($dataTable.Rows.Count) rows to SQL Server..."
            try {
                [void](
                    $bulkCopy.WriteToServer($dataTable)
                )
            }
            catch {
                Write-Host "Error during final bulk copy: $_"
                throw
            }
        
        }
    }
    end {
        # Final cleanup
        #$sb = $null
        #$reader.Close()
        $connection.Close()
        
        # Final stats
        $totalTime = (Get-Date) - $startTime
        $rowsPerSecond = if ($totalTime.TotalSeconds -gt 0) {
            [math]::Round($rowsProcessed / $totalTime.TotalSeconds, 1)
        } else {
            0
        }
        Write-Host "Import completed. Total rows processed: $rowsProcessed in $($totalTime.ToString('hh\:mm\:ss')). Average speed: $rowsPerSecond rows/sec"

        if ($ManageIndexes) {
            try {
                Write-Host "Re-enabling non-clustered indexes on $Table..."
                $enableIndexesCommand = New-Object System.Data.SqlClient.SqlCommand(
                    "ALTER INDEX ALL ON $Table REBUILD",
                    $connection
                )
                [void](
                    $enableIndexesCommand.ExecuteNonQuery()
                )
                Write-Host "Non-clustered indexes re-enabled."
            }
            catch {
                Write-Host "Error re-enabling indexes: $_"
                # Consider whether to throw or continue
            }
        }

        if ($ManageConstraints) {
            try {
                Write-Host "Re-enabling foreign key constraints on $Table..."
                $enableConstraintsCommand = New-Object System.Data.SqlClient.SqlCommand(
                    "ALTER TABLE $Table CHECK CONSTRAINT ALL",
                    $connection
                )
                [void](
                    $enableConstraintsCommand.ExecuteNonQuery()
                )
                Write-Host "Foreign key constraints re-enabled."
            }
            catch {
                Write-Host "Error re-enabling constraints: $_"
                # Consider whether to throw or continue
            }
        }


        # Complete progress bar if it was shown
        if ($ShowProgress) {
            Write-Progress -Activity "Importing CSV data" -Completed
        }
        
        Write-Output "Successfully imported $rowsProcessed rows from $CsvFile to $Table in $($totalTime.ToString('hh\:mm\:ss')) ($rowsPerSecond rows/sec)"
    }
    <#
    .SYNOPSIS
    Imports data from a Delimited (csv, pipe, tab) file into a SQL Server table using SqlBulkCopy.
    .DESCRIPTION
    The Import-ToSqlDatabase function imports data from a delimited file into a SQL Server table using the SqlBulkCopy class.
    The function supports importing CSV, pipe-delimited, and tab-delimited files. The function can handle quoted fields and
    supports various options for managing the import process, including truncating the table before import, managing indexes
    and constraints, and enabling high-performance mode for large imports.
    .PARAMETER CsvFile
    The path to the delimited file to import.
    .PARAMETER SqlServer
    The name of the SQL Server instance to connect to.
    .PARAMETER Database
    The name of the database to import data into.
    .PARAMETER Table
    The name of the table to import data into.
    .PARAMETER Delimiter
    The delimiter used in the delimited file. The default is a comma (,).
    .PARAMETER FirstRowColumns
    Indicates that the first row of the file contains column headers. If this switch is used, the function will attempt to match
    the column headers in the file to the columns in the table.
    .PARAMETER SkipHeaderRow
    Indicates that the first row of the file contains headers that don't match table columns. The first row will be skipped,
    and columns will be mapped by position rather than by name.
    .PARAMETER Truncate
    Indicates that the table should be truncated before importing data.
    .PARAMETER SqlCredential
    A PSCredential object containing the username and password to use when connecting to SQL Server. If not provided,
    Windows authentication will be used.
    .PARAMETER BatchSize
    The number of rows to process in each batch. The default is 5000.
    .PARAMETER Timeout
    The timeout in seconds for the bulk copy operation. The default is 600 seconds.
    .PARAMETER LogFile
    The path to a log file where import progress and errors will be written. If not provided, no log file will be created.
    .PARAMETER ShowProgress
    Indicates that a progress bar should be displayed during the import process. This can be useful for long-running imports.
    .PARAMETER SkipRowCount
    Indicates that the total row count should not be calculated for progress reporting. This can improve performance for large files.
    .PARAMETER UseTableLock
    Indicates that a table lock should be used during the import process. This can improve performance for large imports.
    .PARAMETER CheckConstraints
    Indicates that foreign key constraints should be checked during the import process.
    .PARAMETER KeepNulls
    Indicates that null values in the file should be preserved in the database.
    .PARAMETER KeepIdentity
    Indicates that identity values in the file should be preserved in the database.
    .PARAMETER ManageIndexes
    Indicates that non-clustered indexes on the table should be disabled during the import process and re-enabled after.
    .PARAMETER ManageConstraints
    Indicates that foreign key constraints on the table should be disabled during the import process and re-enabled after.
    .PARAMETER HighPerformanceMode
    Enables high-performance mode, which sets the batch size to 10000, the timeout to 1200 seconds, and enables ManagedIndexes,
    ManageConstraints, and UseTableLock.
    .EXAMPLE
    Import-ToSqlDatabase -CsvFile 'C:\data\employees.csv' -SqlServer 'localhost' -Database 'HR' -Table 'Employees'
    Imports data from the 'employees.csv' file into the 'Employees' table in the 'HR' database on the 'localhost' SQL Server instance.
    #>

}


function Import-BulkInsert {
    [CmdletBinding()]
    param(
        [Parameter(Mandatory=$true)]
        [string]$CsvFile,
        
        [Parameter(Mandatory=$true)]
        [string]$SqlServer,
        
        [Parameter(Mandatory=$true)]
        [string]$Database,
        
        [Parameter(Mandatory=$true)]
        [string]$Table,
        
        [Parameter(Mandatory=$false)]
        [string]$Delimiter = "|",
        
        [Parameter(Mandatory=$false)]
        [switch]$SkipHeaderRow,
        
        [Parameter(Mandatory=$false)]
        [switch]$Truncate,
        
        [Parameter(Mandatory=$false)]
        [System.Management.Automation.PSCredential]$SqlCredential,
        
        [Parameter(Mandatory=$false)]
        [string]$SharedPath,
        
        [Parameter(Mandatory=$false)]
        [System.Management.Automation.PSCredential]$Credentials,
        
        [Parameter(Mandatory=$false)]
        [switch]$HandleTrailingDelimiters,
        
        [Parameter(Mandatory=$false)]
        [int]$CommandTimeout = 600,

        [Parameter(Mandatory = $false)]
        [switch]$ShowProgress
    )
    
    # Determine if this is a cross-platform scenario
    # $isLinux = $PSVersionTable.Platform -eq 'Unix' -or $IsLinux
    
    # Determine a shared path location if not provided
    if (-not $SharedPath) {
        # Try to use the same directory as the input file
        $SharedPath = [System.IO.Path]::GetDirectoryName($CsvFile)
        Write-verbose "Using shared path: $SharedPath"
    }
    
    # Build connection string
    if ($SqlCredential) {
        $username = $SqlCredential.UserName
        $password = $SqlCredential.GetNetworkCredential().Password
        $connectionString = "Server=$SqlServer;Database=$Database;User Id=$username;Password=$password;"
    } else {
        $connectionString = "Server=$SqlServer;Database=$Database;Integrated Security=True;"
    }
    
    try {
        $connection = New-Object System.Data.SqlClient.SqlConnection($connectionString)
        $connection.Open()
        
        # Truncate if requested
        if ($Truncate) {
            $truncateCmd = New-Object System.Data.SqlClient.SqlCommand("TRUNCATE TABLE $Table", $connection)
            $truncateCmd.ExecuteNonQuery() | Out-Null
            Write-verbose "Table truncated."
        }
        
        # Get column info
        $columnsCmd = New-Object System.Data.SqlClient.SqlCommand(
            "SELECT COLUMN_NAME, DATA_TYPE, CHARACTER_MAXIMUM_LENGTH FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '$Table' ORDER BY ORDINAL_POSITION", 
            $connection
        )
        $columnsAdapter = New-Object System.Data.SqlClient.SqlDataAdapter($columnsCmd)
        $columnsTable = New-Object System.Data.DataTable
        $columnsAdapter.Fill($columnsTable) | Out-Null
        $columnCount = $columnsTable.Rows.Count
        
        Write-verbose "Found $columnCount columns in table $Table."
        
        # Process CSV file to shared path
        Write-Verbose "Using delimiter: '$Delimiter' for CSV processing"
        $ProcessCsvParams = @{
            CsvFile =$CsvFile
            SharedPath = $SharedPath 
            SkipHeaderRow =$SkipHeaderRow 
            HandleTrailingDelimiters = $HandleTrailingDelimiters 
            Delimiter = $Delimiter
            ColumnCount = $columnCount
        }
        if ($ShowProgress) {
            $ProcessCsvParams["ShowProgress"] = $true
        }
        $processedCsvPath = Process_CsvToSharedPath @ProcessCsvParams
        
        if (-not $processedCsvPath) {
            throw "Failed to process CSV file to shared path."
        }
        
        # Create format file
        $formatFileParams = @{
            SharedPath = $SharedPath 
            ColumnCount = $columnCount 
            ColumnsTable = $columnsTable 
            Delimiter = $Delimiter
        }
        $formatFilePath = Create_BcpFormatFile @formatFileParams
        
        if (-not $formatFilePath) {
            throw "Failed to create BCP format file."
        }
        
        # Build BULK INSERT command
        $bulkInsertSql = @"
BULK INSERT $Table
FROM '$processedCsvPath'
WITH (
    FORMATFILE = '$formatFilePath',
    FIRSTROW = 1,
    TABLOCK,
    MAXERRORS = 0,
    KEEPNULLS
)
"@

        
        Write-Verbose "Executing SQL Command: $bulkInsertSql"
        $bulkCmd = New-Object System.Data.SqlClient.SqlCommand($bulkInsertSql, $connection)
        $bulkCmd.CommandTimeout = $CommandTimeout
        
        $bulkCmd.ExecuteNonQuery() | Out-Null
        Write-Verbose "BULK INSERT completed successfully."
        
        # Return success
        $result = @{
            Success = $true
        }
    }
    catch {
        Write-verbose "Error during operation: $($_.Exception.Message)" -ForegroundColor Red
        if ($_.Exception.InnerException) {
            Write-Verbose "Inner exception: $($_.Exception.InnerException.Message)" -ForegroundColor Red
        }
        $result = @{
            Success = $false
            Exception = $_.Exception
        }
    }
    finally {
        # Close connection
        if ($connection -and $connection.State -ne 'Closed') {
            $connection.Close()
            Write-Verbose "Database connection closed."
        }
        #removing temporary files
        Remove-Item -Path $processedCsvPath
        Remove-Item -Path $formatFilePath
        if ($result["Success"] = $false) {
            throw $result["Exception"].InnerException
        }
    }

    
    Write-Host "Import operation completed."
    <#
    .SYNOPSIS
    Imports data from a delimited file into a SQL Server table using BULK INSERT.
    .DESCRIPTION
    The Import-BulkInsert function imports data from a delimited file into a SQL Server table using the BULK INSERT command.
    The function supports importing CSV, pipe-delimited, and tab-delimited files. The function can handle quoted fields and
    supports options for skipping header rows, truncating the table before import, and handling trailing delimiters.
    .PARAMETER CsvFile
    The path to the delimited file to import.
    .PARAMETER SqlServer
    The name of the SQL Server instance to connect to.
    .PARAMETER Database
    The name of the database to import data into.
    .PARAMETER Table
    The name of the table to import data into.
    .PARAMETER Delimiter
    The delimiter used in the delimited file. The default is a pipe (|).
    .PARAMETER SkipHeaderRow
    Indicates that the first row of the file contains headers and should be skipped during import.
    .PARAMETER Truncate
    Indicates that the table should be truncated before importing data.
    .PARAMETER SqlCredential
    A PSCredential object containing the username and password to use when connecting to SQL Server. If not provided,
    Windows authentication will be used.
    .PARAMETER SharedPath
    A shared path accessible to both PowerShell and SQL Server where temporary files will be stored. If not provided, the
    directory of the input file will be used.
    .PARAMETER Credentials
    A PSCredential object to connect to the shared path.
    If not provided integrated credentials will be utilized.
    .PARAMETER HandleTrailingDelimiters
    Indicates that the function should handle cases where the input file has trailing delimiters that don't match the number
    of columns in the table. The function will add or remove delimiters as needed to match the column count.
    .EXAMPLE
    Import-BulkInsert -CsvFile 'C:\data\employees.csv' -SqlServer 'localhost' -Database 'HR' -Table 'Employees'
    Imports data from the 'employees.csv' file into the 'Employees' table in the 'HR' database on the 'localhost' SQL Server instance.
    .EXAMPLE
    Import-BulkInsert -CsvFile 'C:\data\employees.csv' -SqlServer 'localhost' -Database 'HR' -Table 'Employees' -SkipHeaderRow
    Imports data from the 'employees.csv' file into the 'Employees' table in the 'HR' database, skipping the header row in the file.
    .EXAMPLE
    Import-BulkInsert -CsvFile 'C:\data\employees.csv' -SqlServer 'localhost' -Database 'HR' -Table 'Employees' -Truncate
    Imports data from the 'employees.csv' file into the 'Employees' table in the 'HR' database, truncating the table before import.
    .EXAMPLE
    Import-BulkInsert -CsvFile 'C:\data\employees.csv' -SqlServer 'localhost' -Database 'HR' -Table 'Employees' -HandleTrailingDelimiters
    Imports data from the 'employees.csv' file into the 'Employees' table in the 'HR' database, handling trailing delimiters in the file.
    .NOTES
    This function is significantly faster than the Import-ToSqlDatabase function, but it has fewer options and may not handle all edge cases.
    If the SQL server is on a different machine, the shared path must be accessible to both the local machine and the SQL server.
    The SQL Server Service needs to be running under an account that has access to the shared path and the account
    needs "Trust this user for delegation to any service (Kerberos only)" enabled for network paths.
    Share path CANNOT be shared on an AWS Storage Gateway File Gateway. It can be a shared path on an AWS FSx Windows File Server.
    If you are importing to a hosted SQL Server service you will need most likely to use the Import-ToSqlDatabase function.
    #>

}