Public/Import-OSAllBulkDocument.ps1
function Import-OSAllBulkDocument { <# .SYNOPSIS Takes an array of hashtables or PSCustomObjects, and bulk imports to an OpenSearch instance. _id generated by OpenSearch. Duplicates may occur. .DESCRIPTION Takes an array of hashtables or PSCustomObjects. _id will be generated by OpenSearch, and cannot be specified. See Insert-UniqueBulkData for alternative. Uses the bulk import API to improve import speed of large quantetites of data. .PARAMETER Index Index you would like the data to be added to. .PARAMETER Documents Array of Hashtables or PSCustomObjects which will be indexed into OpenSearch. .PARAMETER UploadLimit Break up upload to this many files per attempt. Max 4999. Break up upload to this many files per attempt. Max 4999. Sometimes necessary if individual documents are large. .PARAMETER OpType Operation to perform on the API. This will default to index, and usually that is fine. Data streams need 'create' .PARAMETER Credential PSCredential for basic authentication to OpenSearch. .PARAMETER Certificate User certificate for certificate authentication to OpenSearch. .PARAMETER OpenSearchURL URL(s) to OpenSearch instance. Do not include any path or api endpoint. #> [OutputType([bool])] [CmdletBinding()] param( [Parameter(Mandatory=$true)] [string]$Index, [Parameter(Mandatory=$true)] $Documents, [Int64]$UploadLimit=4999, [string]$OpType, [System.Management.Automation.Credential()] [PSCredential]$Credential=[PSCredential]::Empty, [System.Security.Cryptography.X509Certificates.X509Certificate2]$Certificate, $OpenSearchURL ) # Index names must be lowercase $Index = $Index.ToLower() # OpenSearch defined limit for uploads is 4999, but there are entries for the action so x2 if ($UploadLimit -gt 4999){ $UploadLimit = 4999 } $UploadLimit = $UploadLimit * 2 # Default $OpType to 'index' if ($OpType -eq ''){ $OpType = 'index' } $DocumentList = [System.Collections.Generic.List[PSObject]]::new() # The _bulk api uses a really nasty JSON-esque format for it's data that's difficult to work with. # It requires every other line be an action, and the document itself to perform the action on. # See examples: https://opensearch.org/docs/latest/api-reference/document-apis/bulk/ foreach ($Document in $Documents){ # Generate action line $DocumentList.Add("{ `"$OpType`": { `"_index`": `"$Index`" } }") # Add nextline $DocumentList.Add($($Document | ConvertTo-Json -Depth 100 -Compress)) } $DocumentList = $DocumentList.ToArray() # OpenSearch has a limit on how many records _bulk can handle at a time. # Split apart bulk requests to smaller chunks. if ($DocumentList.Count -gt $UploadLimit){ # Keep track of bulk errors, throw if any are found, but complete the actions for the rest $BulkErrors = [System.Collections.Generic.List[PSObject]]::new() # Loop through $UploadLimit line increments for ($LineCounter = 0; $LineCounter -le $DocumentList.Count; $LineCounter += $UploadLimit){ # Grab the next $UploadLimit lines (or less) $RequestBody = $DocumentList[$LineCounter..$($LineCounter+($UploadLimit - 1))] # Set the Output Field Seperator to a newline, then set it back. If you don't set it back, it will mess up errors in Invoke-OSCustomWebRequest (https://devblogs.microsoft.com/powershell/psmdtagfaq-what-is-ofs/) $OldOfs = $ofs $ofs = "`n" # Convert back to string $RequestBody = [String]$RequestBody $ofs = $OldOfs # Add a newline at the end $RequestBody += "`n" # Perform bulk request $Request = '/_bulk' $Response = Invoke-OSCustomWebRequest -OpenSearchUrls $OpenSearchURL -Request $Request -Method "POST" -Credential $Credential -Certificate $Certificate -Body $RequestBody # Pass to bulk error handling function $TempErrors = Find-OSBulkError $Response if ($null -ne $TempErrors){ $BulkErrors.Add($TempErrors) } } $Errors = $BulkErrors.ToArray() } # Current request body is sufficiently sized else { # Set the Output Field Seperator to a newline, then set it back. If you don't set it back, it will mess up errors in Invoke-OSCustomWebRequest (https://devblogs.microsoft.com/powershell/psmdtagfaq-what-is-ofs/) $OldOfs = $ofs $ofs = "`n" $RequestBody = [String]$DocumentList $ofs = $OldOfs # Add a newline at the end $RequestBody += "`n" # Perform bulk request $Request = '/_bulk' $Response = Invoke-OSCustomWebRequest -OpenSearchUrls $OpenSearchURL -Request $Request -Method "POST" -Credential $Credential -Certificate $Certificate -Body $RequestBody # Pass to bulk error handling function $Errors = Find-OSBulkError $Response } if ($Errors.Count -eq 0){ return $True } else { throw $Errors } } Export-ModuleMember -Function Import-OSAllBulkDocument |