font
Early, Joseph; Bewley, Tom; Evers, Christine; Ramchurn, Sarvapali
Non-markovian reward modelling from trajectory labels via interpretable multiple instance learning Journal Article
In: arXiv, 2022, (20 pages (9 main content; 2 references; 9 appendix). 11 figures (8 main content; 3 appendix)).
Abstract | Links | BibTeX | Tags: cs.AI, cs.LG
@article{soton458023,
title = {Non-markovian reward modelling from trajectory labels via interpretable multiple instance learning},
author = {Joseph Early and Tom Bewley and Christine Evers and Sarvapali Ramchurn},
url = {https://eprints.soton.ac.uk/458023/},
year = {2022},
date = {2022-05-01},
journal = {arXiv},
abstract = {We generalise the problem of reward modelling (RM) for reinforcement learning (RL) to handle non-Markovian rewards. Existing work assumes that human evaluators observe each step in a trajectory independently when providing feedback on agent behaviour. In this work, we remove this assumption, extending RM to include hidden state information that captures temporal dependencies in human assessment of trajectories. We then show how RM can be approached as a multiple instance learning (MIL) problem, and develop new MIL models that are able to capture the time dependencies in labelled trajectories. We demonstrate on a range of RL tasks that our novel MIL models can reconstruct reward functions to a high level of accuracy, and that they provide interpretable learnt hidden information that can be used to train high-performing agent policies.},
note = {20 pages (9 main content; 2 references; 9 appendix). 11 figures (8 main content; 3 appendix)},
keywords = {cs.AI, cs.LG},
pubstate = {published},
tppubtype = {article}
}
Early, Joseph; Evers, Christine; Ramchurn, Sarvapali
Model agnostic interpretability for multiple instance learning Proceedings Article
In: International Conference on Learning Representations 2022 (25/04/22 - 29/04/22), 2022, (25 pages (9 content, 2 acknowledgement + references, 14 appendix). 16 figures (3 main content, 13 appendix). Submitted and accepted to ICLR 22, see http://openreview.net/forum?id=KSSfF5lMIAg . Revision: added additional acknowledgements).
Abstract | Links | BibTeX | Tags: cs.AI, cs.LG
@inproceedings{soton454952,
title = {Model agnostic interpretability for multiple instance learning},
author = {Joseph Early and Christine Evers and Sarvapali Ramchurn},
url = {https://eprints.soton.ac.uk/454952/},
year = {2022},
date = {2022-01-01},
booktitle = {International Conference on Learning Representations 2022 (25/04/22 - 29/04/22)},
abstract = {In Multiple Instance Learning (MIL), models are trained using bags of instances, where only a single label is provided for each bag. A bag label is often only determined by a handful of key instances within a bag, making it difficult to interpret what information a classifier is using to make decisions. In this work, we establish the key requirements for interpreting MIL models. We then go on to develop several model-agnostic approaches that meet these requirements. Our methods are compared against existing inherently interpretable MIL models on several datasets, and achieve an increase in interpretability accuracy of up to 30%. We also examine the ability of the methods to identify interactions between instances and scale to larger datasets, improving their applicability to real-world problems.},
note = {25 pages (9 content, 2 acknowledgement + references, 14 appendix). 16 figures (3 main content, 13 appendix). Submitted and accepted to ICLR 22, see http://openreview.net/forum?id=KSSfF5lMIAg . Revision: added additional acknowledgements},
keywords = {cs.AI, cs.LG},
pubstate = {published},
tppubtype = {inproceedings}
}
Early, Joseph; Bewley, Tom; Evers, Christine; Ramchurn, Sarvapali
Non-markovian reward modelling from trajectory labels via interpretable multiple instance learning Journal Article
In: arXiv, 2022, (20 pages (9 main content; 2 references; 9 appendix). 11 figures (8 main content; 3 appendix)).
@article{soton458023,
title = {Non-markovian reward modelling from trajectory labels via interpretable multiple instance learning},
author = {Joseph Early and Tom Bewley and Christine Evers and Sarvapali Ramchurn},
url = {https://eprints.soton.ac.uk/458023/},
year = {2022},
date = {2022-05-01},
journal = {arXiv},
abstract = {We generalise the problem of reward modelling (RM) for reinforcement learning (RL) to handle non-Markovian rewards. Existing work assumes that human evaluators observe each step in a trajectory independently when providing feedback on agent behaviour. In this work, we remove this assumption, extending RM to include hidden state information that captures temporal dependencies in human assessment of trajectories. We then show how RM can be approached as a multiple instance learning (MIL) problem, and develop new MIL models that are able to capture the time dependencies in labelled trajectories. We demonstrate on a range of RL tasks that our novel MIL models can reconstruct reward functions to a high level of accuracy, and that they provide interpretable learnt hidden information that can be used to train high-performing agent policies.},
note = {20 pages (9 main content; 2 references; 9 appendix). 11 figures (8 main content; 3 appendix)},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Early, Joseph; Evers, Christine; Ramchurn, Sarvapali
Model agnostic interpretability for multiple instance learning Proceedings Article
In: International Conference on Learning Representations 2022 (25/04/22 - 29/04/22), 2022, (25 pages (9 content, 2 acknowledgement + references, 14 appendix). 16 figures (3 main content, 13 appendix). Submitted and accepted to ICLR 22, see http://openreview.net/forum?id=KSSfF5lMIAg . Revision: added additional acknowledgements).
@inproceedings{soton454952,
title = {Model agnostic interpretability for multiple instance learning},
author = {Joseph Early and Christine Evers and Sarvapali Ramchurn},
url = {https://eprints.soton.ac.uk/454952/},
year = {2022},
date = {2022-01-01},
booktitle = {International Conference on Learning Representations 2022 (25/04/22 - 29/04/22)},
abstract = {In Multiple Instance Learning (MIL), models are trained using bags of instances, where only a single label is provided for each bag. A bag label is often only determined by a handful of key instances within a bag, making it difficult to interpret what information a classifier is using to make decisions. In this work, we establish the key requirements for interpreting MIL models. We then go on to develop several model-agnostic approaches that meet these requirements. Our methods are compared against existing inherently interpretable MIL models on several datasets, and achieve an increase in interpretability accuracy of up to 30%. We also examine the ability of the methods to identify interactions between instances and scale to larger datasets, improving their applicability to real-world problems.},
note = {25 pages (9 content, 2 acknowledgement + references, 14 appendix). 16 figures (3 main content, 13 appendix). Submitted and accepted to ICLR 22, see http://openreview.net/forum?id=KSSfF5lMIAg . Revision: added additional acknowledgements},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Early, Joseph; Bewley, Tom; Evers, Christine; Ramchurn, Sarvapali
Non-markovian reward modelling from trajectory labels via interpretable multiple instance learning Journal Article
In: arXiv, 2022, (20 pages (9 main content; 2 references; 9 appendix). 11 figures (8 main content; 3 appendix)).
Abstract | Links | BibTeX | Tags: cs.AI, cs.LG
@article{soton458023,
title = {Non-markovian reward modelling from trajectory labels via interpretable multiple instance learning},
author = {Joseph Early and Tom Bewley and Christine Evers and Sarvapali Ramchurn},
url = {https://eprints.soton.ac.uk/458023/},
year = {2022},
date = {2022-05-01},
journal = {arXiv},
abstract = {We generalise the problem of reward modelling (RM) for reinforcement learning (RL) to handle non-Markovian rewards. Existing work assumes that human evaluators observe each step in a trajectory independently when providing feedback on agent behaviour. In this work, we remove this assumption, extending RM to include hidden state information that captures temporal dependencies in human assessment of trajectories. We then show how RM can be approached as a multiple instance learning (MIL) problem, and develop new MIL models that are able to capture the time dependencies in labelled trajectories. We demonstrate on a range of RL tasks that our novel MIL models can reconstruct reward functions to a high level of accuracy, and that they provide interpretable learnt hidden information that can be used to train high-performing agent policies.},
note = {20 pages (9 main content; 2 references; 9 appendix). 11 figures (8 main content; 3 appendix)},
keywords = {cs.AI, cs.LG},
pubstate = {published},
tppubtype = {article}
}
Early, Joseph; Evers, Christine; Ramchurn, Sarvapali
Model agnostic interpretability for multiple instance learning Proceedings Article
In: International Conference on Learning Representations 2022 (25/04/22 - 29/04/22), 2022, (25 pages (9 content, 2 acknowledgement + references, 14 appendix). 16 figures (3 main content, 13 appendix). Submitted and accepted to ICLR 22, see http://openreview.net/forum?id=KSSfF5lMIAg . Revision: added additional acknowledgements).
Abstract | Links | BibTeX | Tags: cs.AI, cs.LG
@inproceedings{soton454952,
title = {Model agnostic interpretability for multiple instance learning},
author = {Joseph Early and Christine Evers and Sarvapali Ramchurn},
url = {https://eprints.soton.ac.uk/454952/},
year = {2022},
date = {2022-01-01},
booktitle = {International Conference on Learning Representations 2022 (25/04/22 - 29/04/22)},
abstract = {In Multiple Instance Learning (MIL), models are trained using bags of instances, where only a single label is provided for each bag. A bag label is often only determined by a handful of key instances within a bag, making it difficult to interpret what information a classifier is using to make decisions. In this work, we establish the key requirements for interpreting MIL models. We then go on to develop several model-agnostic approaches that meet these requirements. Our methods are compared against existing inherently interpretable MIL models on several datasets, and achieve an increase in interpretability accuracy of up to 30%. We also examine the ability of the methods to identify interactions between instances and scale to larger datasets, improving their applicability to real-world problems.},
note = {25 pages (9 content, 2 acknowledgement + references, 14 appendix). 16 figures (3 main content, 13 appendix). Submitted and accepted to ICLR 22, see http://openreview.net/forum?id=KSSfF5lMIAg . Revision: added additional acknowledgements},
keywords = {cs.AI, cs.LG},
pubstate = {published},
tppubtype = {inproceedings}
}
Early, Joseph; Bewley, Tom; Evers, Christine; Ramchurn, Sarvapali
Non-markovian reward modelling from trajectory labels via interpretable multiple instance learning Journal Article
In: arXiv, 2022, (20 pages (9 main content; 2 references; 9 appendix). 11 figures (8 main content; 3 appendix)).
@article{soton458023,
title = {Non-markovian reward modelling from trajectory labels via interpretable multiple instance learning},
author = {Joseph Early and Tom Bewley and Christine Evers and Sarvapali Ramchurn},
url = {https://eprints.soton.ac.uk/458023/},
year = {2022},
date = {2022-05-01},
journal = {arXiv},
abstract = {We generalise the problem of reward modelling (RM) for reinforcement learning (RL) to handle non-Markovian rewards. Existing work assumes that human evaluators observe each step in a trajectory independently when providing feedback on agent behaviour. In this work, we remove this assumption, extending RM to include hidden state information that captures temporal dependencies in human assessment of trajectories. We then show how RM can be approached as a multiple instance learning (MIL) problem, and develop new MIL models that are able to capture the time dependencies in labelled trajectories. We demonstrate on a range of RL tasks that our novel MIL models can reconstruct reward functions to a high level of accuracy, and that they provide interpretable learnt hidden information that can be used to train high-performing agent policies.},
note = {20 pages (9 main content; 2 references; 9 appendix). 11 figures (8 main content; 3 appendix)},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Early, Joseph; Evers, Christine; Ramchurn, Sarvapali
Model agnostic interpretability for multiple instance learning Proceedings Article
In: International Conference on Learning Representations 2022 (25/04/22 - 29/04/22), 2022, (25 pages (9 content, 2 acknowledgement + references, 14 appendix). 16 figures (3 main content, 13 appendix). Submitted and accepted to ICLR 22, see http://openreview.net/forum?id=KSSfF5lMIAg . Revision: added additional acknowledgements).
@inproceedings{soton454952,
title = {Model agnostic interpretability for multiple instance learning},
author = {Joseph Early and Christine Evers and Sarvapali Ramchurn},
url = {https://eprints.soton.ac.uk/454952/},
year = {2022},
date = {2022-01-01},
booktitle = {International Conference on Learning Representations 2022 (25/04/22 - 29/04/22)},
abstract = {In Multiple Instance Learning (MIL), models are trained using bags of instances, where only a single label is provided for each bag. A bag label is often only determined by a handful of key instances within a bag, making it difficult to interpret what information a classifier is using to make decisions. In this work, we establish the key requirements for interpreting MIL models. We then go on to develop several model-agnostic approaches that meet these requirements. Our methods are compared against existing inherently interpretable MIL models on several datasets, and achieve an increase in interpretability accuracy of up to 30%. We also examine the ability of the methods to identify interactions between instances and scale to larger datasets, improving their applicability to real-world problems.},
note = {25 pages (9 content, 2 acknowledgement + references, 14 appendix). 16 figures (3 main content, 13 appendix). Submitted and accepted to ICLR 22, see http://openreview.net/forum?id=KSSfF5lMIAg . Revision: added additional acknowledgements},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Multi-agent signal-less intersection management with dynamic platoon formationĀ
AI Foundation Models: initial review, CMA Consultation, TAS Hub ResponseĀ
The effect of data visualisation quality and task density on human-swarm interaction
Demonstrating performance benefits of human-swarm teamingĀ
Early, Joseph; Bewley, Tom; Evers, Christine; Ramchurn, Sarvapali
Non-markovian reward modelling from trajectory labels via interpretable multiple instance learning Journal Article
In: arXiv, 2022, (20 pages (9 main content; 2 references; 9 appendix). 11 figures (8 main content; 3 appendix)).
@article{soton458023,
title = {Non-markovian reward modelling from trajectory labels via interpretable multiple instance learning},
author = {Joseph Early and Tom Bewley and Christine Evers and Sarvapali Ramchurn},
url = {https://eprints.soton.ac.uk/458023/},
year = {2022},
date = {2022-05-01},
journal = {arXiv},
abstract = {We generalise the problem of reward modelling (RM) for reinforcement learning (RL) to handle non-Markovian rewards. Existing work assumes that human evaluators observe each step in a trajectory independently when providing feedback on agent behaviour. In this work, we remove this assumption, extending RM to include hidden state information that captures temporal dependencies in human assessment of trajectories. We then show how RM can be approached as a multiple instance learning (MIL) problem, and develop new MIL models that are able to capture the time dependencies in labelled trajectories. We demonstrate on a range of RL tasks that our novel MIL models can reconstruct reward functions to a high level of accuracy, and that they provide interpretable learnt hidden information that can be used to train high-performing agent policies.},
note = {20 pages (9 main content; 2 references; 9 appendix). 11 figures (8 main content; 3 appendix)},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
Early, Joseph; Evers, Christine; Ramchurn, Sarvapali
Model agnostic interpretability for multiple instance learning Proceedings Article
In: International Conference on Learning Representations 2022 (25/04/22 - 29/04/22), 2022, (25 pages (9 content, 2 acknowledgement + references, 14 appendix). 16 figures (3 main content, 13 appendix). Submitted and accepted to ICLR 22, see http://openreview.net/forum?id=KSSfF5lMIAg . Revision: added additional acknowledgements).
@inproceedings{soton454952,
title = {Model agnostic interpretability for multiple instance learning},
author = {Joseph Early and Christine Evers and Sarvapali Ramchurn},
url = {https://eprints.soton.ac.uk/454952/},
year = {2022},
date = {2022-01-01},
booktitle = {International Conference on Learning Representations 2022 (25/04/22 - 29/04/22)},
abstract = {In Multiple Instance Learning (MIL), models are trained using bags of instances, where only a single label is provided for each bag. A bag label is often only determined by a handful of key instances within a bag, making it difficult to interpret what information a classifier is using to make decisions. In this work, we establish the key requirements for interpreting MIL models. We then go on to develop several model-agnostic approaches that meet these requirements. Our methods are compared against existing inherently interpretable MIL models on several datasets, and achieve an increase in interpretability accuracy of up to 30%. We also examine the ability of the methods to identify interactions between instances and scale to larger datasets, improving their applicability to real-world problems.},
note = {25 pages (9 content, 2 acknowledgement + references, 14 appendix). 16 figures (3 main content, 13 appendix). Submitted and accepted to ICLR 22, see http://openreview.net/forum?id=KSSfF5lMIAg . Revision: added additional acknowledgements},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}