[{"data":1,"prerenderedAt":-1},["ShallowReactive",2],{"project-87366":3},{"id":4,"name":5,"fullName":6,"owner":7,"repo":5,"description":8,"homepage":9,"htmlUrl":10,"language":11,"languages":10,"totalLinesOfCode":10,"stars":12,"forks":13,"watchers":13,"openIssues":13,"contributorsCount":14,"subscribersCount":14,"size":14,"stars1d":14,"stars7d":14,"stars30d":14,"stars90d":14,"forks30d":14,"starsTrendScore":14,"compositeScore":15,"rankGlobal":10,"rankLanguage":10,"license":16,"archived":17,"fork":17,"defaultBranch":18,"hasWiki":17,"hasPages":17,"topics":19,"createdAt":10,"pushedAt":10,"updatedAt":27,"readmeContent":10,"aiSummary":10,"trendingCount":14,"starSnapshotCount":14,"syncStatus":13,"lastSyncTime":28,"discoverSource":29},87366,"RLCSD","THU-BPM\u002FRLCSD","THU-BPM","Source code of paper \"RLCSD: Reinforcement Learning with Contrastive On-Policy Self-Distillation\"","https:\u002F\u002Farxiv.org\u002Fabs\u002F2606.11709",null,"Python",51,2,0,37.43,"MIT License",false,"main",[20,21,22,23,24,25,26],"large-language-models","llm","on-policy-distillation","opd","opsd","reinforcement-learning","self-distillation","2026-06-25 04:01:47","2026-06-25 02:30:15","CREATED_QUERY"]