BEGIN:VCALENDAR
VERSION:2.0
PRODID:-//pretalx//pretalx.devconf.info//devconf-us-2025//talk//CQQRKB
BEGIN:VTIMEZONE
TZID:EST
BEGIN:STANDARD
DTSTART:20001029T030000
RRULE:FREQ=YEARLY;BYDAY=-1SU;BYMONTH=10;UNTIL=20061029T070000Z
TZNAME:EST
TZOFFSETFROM:-0400
TZOFFSETTO:-0500
END:STANDARD
BEGIN:STANDARD
DTSTART:20071104T030000
RRULE:FREQ=YEARLY;BYDAY=1SU;BYMONTH=11
TZNAME:EST
TZOFFSETFROM:-0400
TZOFFSETTO:-0500
END:STANDARD
BEGIN:DAYLIGHT
DTSTART:20000402T030000
RRULE:FREQ=YEARLY;BYDAY=1SU;BYMONTH=4;UNTIL=20060402T080000Z
TZNAME:EDT
TZOFFSETFROM:-0500
TZOFFSETTO:-0400
END:DAYLIGHT
BEGIN:DAYLIGHT
DTSTART:20070311T030000
RRULE:FREQ=YEARLY;BYDAY=2SU;BYMONTH=3
TZNAME:EDT
TZOFFSETFROM:-0500
TZOFFSETTO:-0400
END:DAYLIGHT
END:VTIMEZONE
BEGIN:VEVENT
UID:pretalx-devconf-us-2025-CQQRKB@pretalx.devconf.info
DTSTART;TZID=EST:20250920T135000
DTEND;TZID=EST:20250920T142500
DESCRIPTION:Join us for an overview of all the latest methods of language m
 odel post-training openly available today! We will begin with offline meth
 ods like standard Supervised Fine-Tuning (SFT)\, Parameter-Efficient Fine-
 Tuning (PEFT)\, Direct Preference Optimization (DPO)\, and continual learn
 ing techniques for further tuning existing instruct models. We will then m
 ove into online reinforcement learning options like Reinforcement Learning
  from Human Feedback (RLHF) and Group Relative Policy Optimization (GRPO).
  The talk will consist of a walkthrough of the use-cases for each method\,
  as well as how to get started today via our very own Training Hub!
DTSTAMP:20260310T063008Z
LOCATION:Ladd Room (Capacity 170)
SUMMARY:Language Model Post-Training in 2025: an Overview of Customization 
 Options Today - Mustafa Eyceoz
URL:https://pretalx.devconf.info/devconf-us-2025/talk/CQQRKB/
END:VEVENT
END:VCALENDAR
