BEGIN:VCALENDAR
VERSION:2.0
PRODID:-//pretalx//pretalx.devconf.info//devconf-us-2025//talk//CXPTQY
BEGIN:VTIMEZONE
TZID:EST
BEGIN:STANDARD
DTSTART:20001029T030000
RRULE:FREQ=YEARLY;BYDAY=-1SU;BYMONTH=10;UNTIL=20061029T070000Z
TZNAME:EST
TZOFFSETFROM:-0400
TZOFFSETTO:-0500
END:STANDARD
BEGIN:STANDARD
DTSTART:20071104T030000
RRULE:FREQ=YEARLY;BYDAY=1SU;BYMONTH=11
TZNAME:EST
TZOFFSETFROM:-0400
TZOFFSETTO:-0500
END:STANDARD
BEGIN:DAYLIGHT
DTSTART:20000402T030000
RRULE:FREQ=YEARLY;BYDAY=1SU;BYMONTH=4;UNTIL=20060402T080000Z
TZNAME:EDT
TZOFFSETFROM:-0500
TZOFFSETTO:-0400
END:DAYLIGHT
BEGIN:DAYLIGHT
DTSTART:20070311T030000
RRULE:FREQ=YEARLY;BYDAY=2SU;BYMONTH=3
TZNAME:EDT
TZOFFSETFROM:-0500
TZOFFSETTO:-0400
END:DAYLIGHT
END:VTIMEZONE
BEGIN:VEVENT
UID:pretalx-devconf-us-2025-CXPTQY@pretalx.devconf.info
DTSTART;TZID=EST:20250919T112000
DTEND;TZID=EST:20250919T113000
DESCRIPTION:My auto-tuning project aims to find the best settings for runni
 ng large language models using vLLM. We want to maximize the number of out
 put tokens / second (throughput). At the same time\, we need to minimize t
 he latency. Specifically we will ensure that the p95 latency is faster tha
 n the set baseline (default parameters). This involves testing different p
 arameter configurations for supported models like Qwen3-32B-FP8 and Qwen3-
 30B-A3B-FP8.
DTSTAMP:20260315T080134Z
LOCATION:Hewitt Boardroom (Capacity 35)
SUMMARY:Auto-tuning vllm - Rehan Samaratunga
URL:https://pretalx.devconf.info/devconf-us-2025/talk/CXPTQY/
END:VEVENT
END:VCALENDAR
