The benchmark data can be accessed through the Transfer Learning in Dialogue Benchmarking Toolkit (TLiDB link) with just a few simple lines of code.
We highly recommend to install and utilize the benchmark through the TLiDB package, but the raw datasets can also be downloaded.
pip install tlidb
Then models can quickly be trained directly from the command line such as:tlidb --source_datasets Friends --source_tasks emory_emotion_recognition --target_datasets Friends --target_tasks reading_comprehension --do_train --do_finetune --do_eval --eval_best --model_config bert --few_shot_percent 0.1
pip install tlidb
git clone git@github.com:alon-albalak/TLiDB.git
cd TLiDB
pip install -e .
Then, follow these instructions on data loading to incorporate TLiDB dataloaders into your own script.
{
"metadata": {
"dataset_name": "Dataset Name",
"tasks": [ # list of task names
"task1",
"task2",
],
"task_metadata": { # metadata about tasks, for example: labels, metrics, or metric keyword arguments
"task_1": {
"labels": [
"label1",
"label2"
],
"metrics": [
"f1"
]
},
"task_2":{
"labels": [
"label1",
"label2",
"label3"
]
}
}
},
"data": [ # list of dicts
{
"dialogue_id": "dialogue-1",
"dialogue_metadata":{ # can be used to determine which tasks exist in this dialogue
"dialogue-level-classification-task1": null,
"dialogue-level-classification-task2": null,
"turn-level-classification-task1": null,
"turn-level-classification-task2": null,
}
"dialogue-level-classification-task1": {
"label": "ground truth label",
"instance_id": instance_id
,
"dialogue-level-classification-task2": {
"label": "ground truth label",
"instance_id": instance_id
},
"dialogue": [ # list of dicts
{
"turn_id": "1",
"speakers": ["speaker1"],
"utterance": "Example utterance",
"turn-level-classification-task1": {
"label": "ground truth label",
"instance_id": instance_id
},
"turn-level-classification-task2": {
"label": "ground truth label",
"instance_id": instance_id
},
},
{
"turn_id": "2",
"speakers": ["speaker2"],
"utterance": "Second example utterance",
"turn-level-classification-task1": {
"label": "ground truth label",
"instance_id": instance_id
},
"turn-level-classification-task2": {
"label": "ground truth label",
"instance_id": instance_id
}
}
]
}
]
}