<#
Created by Garry Ohanian @moderna
Automates ESXi iSCSI socket buffer tuning (BDP-based) with:
- vmkping RTT (jumbo) primary, esxcli fallback
- Version/key gating (7.0 U3d+ or 8.x)
- MTU warning, canary rollout, artifacts + rollback
- Post-apply read-back verification
#>
param(
[Parameter(Mandatory=$true)] [string]$VCenter,
[Parameter(Mandatory=$true)] [string]$User,
[string]$ClusterName,
[string]$Vmk = "vmk1",
[int]$LinkGbps = 25,
[double]$Headroom = 1.15,
[int]$RoundKB = 64,
[int]$PingSize = 8972,
[int]$PingCount = 10,
[int]$RolloutPercent = 25, # default canary
[switch]$Apply,
[switch]$ForceFull, # override canary
[switch]$SkipJumboCheck,
[string]$ArtifactsDir = ".",
[string]$ChangeTicket # optional: change tracking
)
# ---- helpers ----
function Must($ok,$msg){ if(-not $ok){ throw $msg } }
function KiBpsPerGbps(){ 122070.3125 }
function Round-UpKB([double]$v,[int]$m=64){ if($v -le 0){return $m}; [int]([math]::Ceiling($v/$m)*$m) }
function BDP-KiB([int]$gbps,[double]$rttMs,[double]$head){ (KiBpsPerGbps()*$gbps)*($rttMs/1000.0)*$head }
$MAXKB=6144; $DEF_SND=600; $DEF_RCV=256
$ErrorActionPreference="Stop"
if(-not (Get-Module -ListAvailable -Name VMware.PowerCLI)){ throw "Install-Module VMware.PowerCLI" }
$pass = Read-Host -AsSecureString "Password for $User"
Connect-VIServer -Server $VCenter -User $User -Password $pass | Out-Null
# ---- scope ----
$hosts = if($ClusterName){ Get-Cluster -Name $ClusterName | Get-VMHost } else { Get-VMHost }
$hosts = $hosts | Sort-Object Name
Must ($hosts) "No ESXi hosts in scope."
# ---- gating: require keys to exist (implicitly ensures 7.0 U3d+ / 8.x) ----
function Assert-KeysSupported([VMHost]$h){
$need = @('ISCSI.SocketSndBufLenKB','ISCSI.SocketRcvBufLenKB')
foreach($k in $need){
$s = Get-AdvancedSetting -Entity $h -Name $k -ErrorAction SilentlyContinue
if(-not $s){ throw "$($h.Name): advanced key $k not found (needs vSphere 7.0 U3d+)." }
}
}
# ---- MTU check ----
function Check-MTU([VMHost]$h,[string]$vmk){
try{
$esx = Get-EsxCli -VMHost $h -V2
$ifcs = $esx.network.ip.interface.list.Invoke()
$row = $ifcs | ? { $_.Name -eq $vmk }
if($row -and [int]$row.Mtu -lt 9000){
Write-Warning "$($h.Name): $vmk MTU=$($row.Mtu) (<9000). Jumbo RTT may fail."
}
}catch{ Write-Warning "$($h.Name): MTU check failed: $_" }
}
# ---- RTT: vmkping primary (SSH), esxcli fallback ----
function Get-RTT([VMHost]$Host,[string]$Vmk,[string]$TargetIP,[int]$Size=8972,[int]$Count=10){
# primary
try{
$cmd = "vmkping -I $Vmk -s $Size -d -c $Count $TargetIP"
$out = Invoke-VMHostSSH -VMHost $Host -ScriptText $cmd -ErrorAction Stop
$txt = ($out.Output | Out-String)
$m = [regex]::Match($txt,'min/avg/max\s*=\s*[\d\.]+/([\d\.]+)/[\d\.]+\s*ms')
if($m.Success){ return [double]$m.Groups[1].Value }
throw "vmkping parse failed. Raw: $txt"
}catch{
# fallback
try{
$esx = Get-EsxCli -VMHost $Host -V2
$args = @{ S=$Vmk; c=$Count; I=$TargetIP; d=$true; s=$Size }
$out = $esx.network.diag.ping.Invoke($args)
$txt = ($out | Out-String)
$m = [regex]::Match($txt,'rtt.*=\s*[\d\.]+/([\d\.]+)/[\d\.]+\s*ms')
if($m.Success){ return [double]$m.Groups[1].Value }
throw "esxcli ping parse failed. Raw: $txt"
}catch{
Write-Warning "$($Host.Name): RTT failed to $TargetIP ($_)"
return $null
}
}
}
# ---- iSCSI target discovery ----
function Get-IScsiTargetIPs([VMHost]$h){
$ips=@()
try{
$esxcli = Get-EsxCli -VMHost $h -V2
$adps = $esxcli.iscsi.adapter.list.Invoke() | ? {$_.Type -match 'Software|Dependent'}
foreach($a in $adps){
$sess = $esxcli.iscsi.session.list.Invoke(@{adapter=$a.Adapter})
foreach($s in $sess){
if($s.RemoteAddress -match '^([\d\.]+)'){ $ips += $matches[1] }
}
}
}catch{ Write-Warning "iSCSI discovery failed on $($h.Name): $_" }
$ips | Select-Object -Unique
}
# ---- artifacts ----
$runId=(Get-Date).ToString('yyyyMMdd-HHmmss')
$runDir = Join-Path $ArtifactsDir ("iscsi-sockbuf-run-"+$runId)
New-Item -ItemType Directory -Force -Path $runDir | Out-Null
$manifest = @{
time=(Get-Date); vcenter=$VCenter; cluster=$ClusterName; vmk=$Vmk; linkGbps=$LinkGbps
headroom=$Headroom; roundKB=$RoundKB; pingSize=$PingSize; pingCount=$PingCount
rolloutPercent=($ForceFull.IsPresent ? 100 : $RolloutPercent); apply=$Apply.IsPresent
changeTicket=$ChangeTicket
}
$manifest | ConvertTo-Json -Depth 4 | Out-File (Join-Path $runDir "manifest.json")
# ---- gather + compute ----
$rows=@(); $perHost=@()
foreach($h in $hosts){
try{ Assert-KeysSupported $h }catch{ Write-Warning $_; continue }
if(-not $SkipJumboCheck){ Check-MTU $h $Vmk }
$targets = Get-IScsiTargetIPs $h
if(-not $targets){ Write-Warning "$($h.Name): no iSCSI sessions; skipping"; continue }
$recs=@()
foreach($ip in $targets){
$rtt = Get-RTT $h $Vmk $ip $PingSize $PingCount
if($null -eq $rtt){ continue }
$bdp = BDP-KiB $LinkGbps $rtt $Headroom
$rec = Round-UpKB $bdp $RoundKB
$snd=[int][math]::Min([math]::Max($rec,$DEF_SND),$MAXKB)
$rcv=[int][math]::Min([math]::Max($rec,$DEF_RCV),$MAXKB)
$rows += [pscustomobject]@{ Host=$h.Name; TargetIP=$ip; RTT_ms=[math]::Round($rtt,3)
LinkGbps=$LinkGbps; Headroom=$Headroom; BDP_KiB=[math]::Round($bdp,0)
RecommendKB=$rec; Apply_SndKB=$snd; Apply_RcvKB=$rcv }
$recs += @{ snd=$snd; rcv=$rcv }
}
if($recs.Count -gt 0){
$mxSnd = ($recs.snd | Measure-Object -Maximum).Maximum
$mxRcv = ($recs.rcv | Measure-Object -Maximum).Maximum
$perHost += [pscustomobject]@{ Host=$h.Name; SndKB=$mxSnd; RcvKB=$mxRcv }
}
}
$recCsv = Join-Path $runDir "recommendations.csv"
$rows | Export-Csv -NoTypeInformation -Path $recCsv
$aggCsv = Join-Path $runDir "per-host.csv"
$perHost | Export-Csv -NoTypeInformation -Path $aggCsv
# snapshot before (and prepare rollback file)
$beforeCsv = Join-Path $runDir "advsettings-before.csv"
$rollbackCsv = Join-Path $runDir "rollback.csv"
$names = @('ISCSI.SocketSndBufLenKB','ISCSI.SocketRcvBufLenKB')
$cur=@()
foreach($h in $hosts){
foreach($n in $names){
$s=Get-AdvancedSetting -Entity $h -Name $n -ErrorAction SilentlyContinue
$cur += [pscustomobject]@{ Host=$h.Name; Setting=$n; Value=($s?.Value) }
}
}
$cur | Export-Csv -NoTypeInformation -Path $beforeCsv
$cur | Export-Csv -NoTypeInformation -Path $rollbackCsv
# ---- rollout selection ----
$applySet = $perHost | Sort-Object Host
if(-not $ForceFull){
$take=[math]::Ceiling(($applySet.Count * $RolloutPercent)/100.0)
$applySet = $applySet | Select-Object -First $take
Write-Host ("Canary rollout: {0}% -> {1} host(s)" -f $RolloutPercent,$take)
}else{
Write-Host "Full rollout forced."
}
# ---- apply + verify ----
if($Apply){
foreach($row in $applySet){
$h = Get-VMHost -Name $row.Host
# set
foreach($pair in @(@{k="ISCSI.SocketSndBufLenKB";v=$row.SndKB}, @{k="ISCSI.SocketRcvBufLenKB";v=$row.RcvKB})){
$curr = Get-AdvancedSetting -Entity $h -Name $pair.k -ErrorAction SilentlyContinue
if(-not $curr){ New-AdvancedSetting -Entity $h -Name $pair.k -Value $pair.v -Confirm:$false | Out-Null }
elseif($curr.Value -ne $pair.v){ Set-AdvancedSetting $curr -Value $pair.v -Confirm:$false | Out-Null }
}
# verify
$sndNow = (Get-AdvancedSetting -Entity $h -Name "ISCSI.SocketSndBufLenKB").Value
$rcvNow = (Get-AdvancedSetting -Entity $h -Name "ISCSI.SocketRcvBufLenKB").Value
if(($sndNow -ne $row.SndKB) -or ($rcvNow -ne $row.RcvKB)){
throw ("{0}: post-apply verification failed (Snd desired {1} got {2}; Rcv desired {3} got {4})" -f $h.Name,$row.SndKB,$sndNow,$row.RcvKB,$rcvNow)
}
Write-Host ("Applied {0}: Snd={1}KB Rcv={2}KB (verified)" -f $row.Host,$row.SndKB,$row.RcvKB)
}
}else{
Write-Host "Dry-run only. Use -Apply (and optionally -ForceFull) to enforce."
}
Write-Host "Artifacts in: $runDir"
Write-Host "Rollback file: $rollbackCsv"