mSTEB Leaderboard

This leaderboard has the results of evaluation of models on mSTEB benchmark.

Select Region
{
  • "headers": [
    • "Model",
    • "Average ⬆️",
    • "LID (ACC)",
    • "TC (ACC)",
    • "RC-QA (ACC)",
    • "ASR (100-CER)",
    • "S2TT (xx-en) (CHRF++)"
    ],
  • "data": [
    • [
      • "Gemini 2.0 Flash",
      • 76.28,
      • 86.1,
      • 79.5,
      • 72.6,
      • 89.7,
      • 53.5
      ],
    • [
      • "GPT-4o-audio",
      • 67.56,
      • 81.2,
      • 76,
      • 63.4,
      • 69.9,
      • 47.3
      ],
    • [
      • "Gemma-3n-E4B-it",
      • 57.38,
      • 65.4,
      • 62.9,
      • 55.4,
      • 70.8,
      • 32.4
      ],
    • [
      • "Qwen2-Audio-7B-Instruct",
      • 24.32,
      • 15.8,
      • 40.4,
      • 33.3,
      • 11.6,
      • 20.5
      ]
    ],
  • "metadata": null
}
{
  • "headers": [
    • "Model",
    • "Average ⬆️",
    • "LID (ACC)",
    • "TC (ACC)",
    • "RC-QA (ACC)",
    • "ASR (100-CER)",
    • "S2TT (xx-en) (CHRF++)"
    ],
  • "data": [
    • [
      • "Gemini 2.0 Flash",
      • 62.42,
      • 68.5,
      • 68.5,
      • 55.5,
      • 82,
      • 37.6
      ],
    • [
      • "Gemma-3n-E4B-it",
      • 45.4,
      • 51,
      • 45.2,
      • 37.7,
      • 68.5,
      • 24.6
      ],
    • [
      • "GPT-4o-audio",
      • 45.18,
      • 56.6,
      • 55.3,
      • 45.6,
      • 39.9,
      • 28.5
      ],
    • [
      • "Qwen2-Audio-7B-Instruct",
      • 16.76,
      • 0,
      • 28.2,
      • 26.3,
      • 16.1,
      • 13.2
      ]
    ],
  • "metadata": null
}
{
  • "headers": [
    • "Model",
    • "Average ⬆️",
    • "LID (ACC)",
    • "TC (ACC)",
    • "RC-QA (ACC)",
    • "ASR (100-CER)",
    • "S2TT (xx-en) (CHRF++)"
    ],
  • "data": [
    • [
      • "GPT-4o-audio",
      • 63.08,
      • 98.1,
      • 66.9,
      • 45.6,
      • 68.3,
      • 36.5
      ],
    • [
      • "Gemini 2.0 Flash",
      • 60,
      • 96.7,
      • 44.7,
      • 50.9,
      • 80.5,
      • 27.2
      ],
    • [
      • "Gemma-3n-E4B-it",
      • 46.24,
      • 89.3,
      • 30.7,
      • 32.3,
      • 59,
      • 19.9
      ],
    • [
      • "Qwen2-Audio-7B-Instruct",
      • 7.6,
      • 0.3,
      • 15.2,
      • 27.1,
      • -46.2,
      • 11.9
      ]
    ],
  • "metadata": null
}
{
  • "headers": [
    • "Model",
    • "Average ⬆️",
    • "LID (ACC)",
    • "TC (ACC)",
    • "RC-QA (ACC)",
    • "ASR (100-CER)",
    • "S2TT (xx-en) (CHRF++)"
    ],
  • "data": [
    • [
      • "Gemini 2.0 Flash",
      • 80.52,
      • 97.5,
      • 83,
      • 71.5,
      • 92.3,
      • 58.3
      ],
    • [
      • "GPT-4o-audio",
      • 69.2,
      • 91.8,
      • 79.8,
      • 62.2,
      • 64.9,
      • 47.3
      ],
    • [
      • "Gemma-3n-E4B-it",
      • 57.32,
      • 71.7,
      • 67.9,
      • 57,
      • 61.7,
      • 28.3
      ],
    • [
      • "Qwen2-Audio-7B-Instruct",
      • 13.98,
      • 4.5,
      • 32.8,
      • 30.3,
      • -13.7,
      • 16
      ]
    ],
  • "metadata": null
}
{
  • "headers": [
    • "Model",
    • "Average ⬆️",
    • "LID (ACC)",
    • "TC (ACC)",
    • "RC-QA (ACC)",
    • "ASR (100-CER)",
    • "S2TT (xx-en) (CHRF++)"
    ],
  • "data": [
    • [
      • "Gemini 2.0 Flash",
      • 79.92,
      • 91,
      • 82.3,
      • 75.8,
      • 92.7,
      • 57.8
      ],
    • [
      • "GPT-4o-audio",
      • 68.32,
      • 88.8,
      • 76,
      • 63.8,
      • 65,
      • 48
      ],
    • [
      • "Gemma-3n-E4B-it",
      • 57.62,
      • 70,
      • 64.8,
      • 56.3,
      • 66.7,
      • 30.3
      ],
    • [
      • "Qwen2-Audio-7B-Instruct",
      • 18.62,
      • 10.5,
      • 33,
      • 31.4,
      • 2.2,
      • 16
      ]
    ],
  • "metadata": null
}
{
  • "headers": [
    • "Model",
    • "Average ⬆️",
    • "LID (ACC)",
    • "TC (ACC)",
    • "RC-QA (ACC)",
    • "ASR (100-CER)",
    • "S2TT (xx-en) (CHRF++)"
    ],
  • "data": [
    • [
      • "Gemini 2.0 Flash",
      • 78.72,
      • 88,
      • 82.1,
      • 74.7,
      • 93.3,
      • 55.5
      ],
    • [
      • "GPT-4o-audio",
      • 68.84,
      • 80.2,
      • 79.1,
      • 64.6,
      • 72.8,
      • 47.5
      ],
    • [
      • "Gemma-3n-E4B-it",
      • 61.16,
      • 74.2,
      • 66.2,
      • 56,
      • 74.2,
      • 35.2
      ],
    • [
      • "Qwen2-Audio-7B-Instruct",
      • 16.82,
      • 21.7,
      • 28.4,
      • 30.1,
      • -11.7,
      • 15.6
      ]
    ],
  • "metadata": null
}
{
  • "headers": [
    • "Model",
    • "Average ⬆️",
    • "LID (ACC)",
    • "TC (ACC)",
    • "RC-QA (ACC)",
    • "ASR (100-CER)",
    • "S2TT (xx-en) (CHRF++)"
    ],
  • "data": [
    • [
      • "Gemini 2.0 Flash",
      • 79.56,
      • 98.1,
      • 83.5,
      • 77.3,
      • 86.1,
      • 52.8
      ],
    • [
      • "GPT-4o-audio",
      • 73,
      • 90.5,
      • 80.5,
      • 70,
      • 76.8,
      • 47.2
      ],
    • [
      • "Gemma-3n-E4B-it",
      • 51.5,
      • 62.3,
      • 64.3,
      • 53.8,
      • 50.9,
      • 26.2
      ],
    • [
      • "Qwen2-Audio-7B-Instruct",
      • 49.44,
      • 59.2,
      • 58.7,
      • 43.5,
      • 51.8,
      • 34
      ]
    ],
  • "metadata": null
}
{
  • "headers": [
    • "Model",
    • "Average ⬆️",
    • "LID (ACC)",
    • "TC (ACC)",
    • "RC-QA (ACC)",
    • "ASR (100-CER)",
    • "S2TT (xx-en) (CHRF++)"
    ],
  • "data": [
    • [
      • "Gemini 2.0 Flash",
      • 79.86,
      • 84.5,
      • 82.1,
      • 81.7,
      • 92.7,
      • 58.3
      ],
    • [
      • "GPT-4o-audio",
      • 77.78,
      • 86.1,
      • 84,
      • 74.3,
      • 87.6,
      • 56.9
      ],
    • [
      • "Gemma-3n-E4B-it",
      • 63.04,
      • 64.3,
      • 68.1,
      • 66.5,
      • 77.2,
      • 39.1
      ],
    • [
      • "Qwen2-Audio-7B-Instruct",
      • 38.34,
      • 28.6,
      • 54.5,
      • 41.6,
      • 37.4,
      • 29.6
      ]
    ],
  • "metadata": null
}
{
  • "headers": [
    • "Model",
    • "Average ⬆️",
    • "LID (ACC)",
    • "TC (ACC)",
    • "RC-QA (ACC)",
    • "ASR (100-CER)",
    • "S2TT (xx-en) (CHRF++)"
    ],
  • "data": [
    • [
      • "Gemini 2.0 Flash",
      • 82.68,
      • 94.1,
      • 84.9,
      • 82.1,
      • 90.1,
      • 62.2
      ],
    • [
      • "GPT-4o-audio",
      • 79.46,
      • 90.4,
      • 85.6,
      • 74,
      • 88,
      • 59.3
      ],
    • [
      • "Gemma-3n-E4B-it",
      • 65.84,
      • 71.3,
      • 72.9,
      • 66.1,
      • 80,
      • 38.9
      ],
    • [
      • "Qwen2-Audio-7B-Instruct",
      • 25.54,
      • 11.8,
      • 53.6,
      • 36.6,
      • 1.8,
      • 23.9
      ]
    ],
  • "metadata": null
}

🔍 Compare Regions (Average All Tasks)

Select regions to compare
Model
All
Africa
Asia (S)
Qwen2-Audio-7B-Instruct
76.28
62.42
80.52