This document contains KQL (Kusto Query Language) queries for monitoring Azure OpenAI usage in Log Analytics.
- Azure OpenAI diagnostic settings enabled (logs sent to Log Analytics)
- Access to the Log Analytics workspace
AzureMetrics
| where ResourceProvider == "MICROSOFT.COGNITIVESERVICES"
| where MetricName in ("ProcessedPromptTokens", "GeneratedCompletionTokens", "TokenTransaction")
| summarize TotalTokens = sum(Total) by MetricName, bin(TimeGenerated, 1h)
| order by TimeGenerated descAzureMetrics
| where ResourceProvider == "MICROSOFT.COGNITIVESERVICES"
| where MetricName in ("ProcessedPromptTokens", "GeneratedCompletionTokens")
| summarize TotalTokens = sum(Total) by MetricName
| order by TotalTokens descAzureDiagnostics
| where ResourceProvider == "MICROSOFT.COGNITIVESERVICES"
| extend props = parse_json(properties_s)
| summarize
TotalCalls = count(),
TotalRequestBytes = sum(tolong(props.requestLength)),
TotalResponseBytes = sum(tolong(props.responseLength))
by Model = tostring(props.modelDeploymentName)
| order by TotalCalls descAzureDiagnostics
| where ResourceProvider == "MICROSOFT.COGNITIVESERVICES"
| extend props = parse_json(properties_s)
| summarize CallCount = count() by tostring(props.modelDeploymentName), bin(TimeGenerated, 1h)
| order by TimeGenerated descAzureDiagnostics
| where ResourceProvider == "MICROSOFT.COGNITIVESERVICES"
| extend props = parse_json(properties_s)
| summarize CallCount = count() by StreamType = tostring(props.streamType), bin(TimeGenerated, 1h)AzureDiagnostics
| where ResourceProvider == "MICROSOFT.COGNITIVESERVICES"
| extend props = parse_json(properties_s)
| extend latencyTicks = tolong(props.responseTime) - tolong(props.requestTime)
| extend latencyMs = latencyTicks / 10000
| summarize
AvgLatencyMs = avg(latencyMs),
MaxLatencyMs = max(latencyMs),
MinLatencyMs = min(latencyMs),
P95LatencyMs = percentile(latencyMs, 95)
by Model = tostring(props.modelDeploymentName)AzureDiagnostics
| where ResourceProvider == "MICROSOFT.COGNITIVESERVICES"
| extend props = parse_json(properties_s)
| extend latencyTicks = tolong(props.responseTime) - tolong(props.requestTime)
| extend latencyMs = latencyTicks / 10000
| summarize AvgLatencyMs = avg(latencyMs), MaxLatencyMs = max(latencyMs)
by tostring(props.modelDeploymentName), bin(TimeGenerated, 1h)
| order by TimeGenerated descAzureDiagnostics
| where ResourceProvider == "MICROSOFT.COGNITIVESERVICES"
| extend props = parse_json(properties_s)
| extend latencyTicks = tolong(props.responseTime) - tolong(props.requestTime)
| extend latencyMs = latencyTicks / 10000
| project
TimeGenerated,
Model = tostring(props.modelDeploymentName),
LatencyMs = latencyMs,
RequestBytes = tolong(props.requestLength),
ResponseBytes = tolong(props.responseLength),
StreamType = tostring(props.streamType)
| order by TimeGenerated descAzureDiagnostics
| where ResourceProvider == "MICROSOFT.COGNITIVESERVICES"
| extend props = parse_json(properties_s)
| extend latencyTicks = tolong(props.responseTime) - tolong(props.requestTime)
| extend latencyMs = latencyTicks / 10000
| project
TimeGenerated,
Model = tostring(props.modelDeploymentName),
ModelVersion = tostring(props.modelVersion),
ApiVersion = tostring(props.apiName),
StreamType = tostring(props.streamType),
RequestBytes = tolong(props.requestLength),
ResponseBytes = tolong(props.responseLength),
LatencyMs = latencyMs,
ObjectId = tostring(props.objectId)
| order by TimeGenerated descAzureDiagnostics
| where ResourceProvider == "MICROSOFT.COGNITIVESERVICES"
| extend props = parse_json(properties_s)
| extend latencyTicks = tolong(props.responseTime) - tolong(props.requestTime)
| extend latencyMs = latencyTicks / 10000
| where latencyMs > 5000
| project
TimeGenerated,
Model = tostring(props.modelDeploymentName),
LatencyMs = latencyMs,
RequestBytes = tolong(props.requestLength),
ResponseBytes = tolong(props.responseLength)
| order by LatencyMs descAzureDiagnostics
| where ResourceProvider == "MICROSOFT.COGNITIVESERVICES"
| extend props = parse_json(properties_s)
| extend latencyTicks = tolong(props.responseTime) - tolong(props.requestTime)
| extend latencyMs = latencyTicks / 10000
| summarize
TotalRequests = count(),
AvgLatencyMs = avg(latencyMs),
P95LatencyMs = percentile(latencyMs, 95),
TotalRequestMB = sum(tolong(props.requestLength)) / 1048576.0,
TotalResponseMB = sum(tolong(props.responseLength)) / 1048576.0AzureDiagnostics
| where ResourceProvider == "MICROSOFT.COGNITIVESERVICES"
| extend props = parse_json(properties_s)
| summarize
DailyCalls = count(),
TotalRequestKB = sum(tolong(props.requestLength)) / 1024
by bin(TimeGenerated, 1d), Model = tostring(props.modelDeploymentName)
| order by TimeGenerated desc- Token metrics come from
AzureMetricstable - Request details come from
AzureDiagnosticstable withproperties_sJSON parsing - Latency is calculated from
responseTime - requestTime(in ticks, divide by 10000 for milliseconds) - Logs may take 5-15 minutes to appear after API calls