@inproceedings{ea803f560e78424ab0e4c2f320b31c7b,
title = "Fine-tuning BERT Models on Demand for Information Systems Explained Using Training Data from Pre-modern Arabic",
abstract = "Humanities scholars can use Large Language Models (LLMs) to simplify text analysis and pattern recognition. Fine-tuning LLMs for specific humanities tasks can be challenging due to limited training data. However, in the humanities exists a growing number of information systems with research data which can be used for this purpose. This article outlines how to fine-tune Bidirectional Encoder Representations from Transformers (BERT) models using pre-modern Arabic data available in an information system. We also introduce the Humanities Aligned Chatbot (ChatHA) for user-friendly interaction with the fine-tuned model to break down the barriers to the application of LLMs in the humanities. The result we have achieved is that all archived research data can be used in a research data repository for fine-tuning models in a short time without requiring IT expertise. Additionally, users can chat with a ChatHA, which provides users with more precise answers. This success is also attributed to the availability of well-structured data in canonical form, enabling us to precisely define the mapping of entity types to labels. In addition, we use a manifest file which serves as the cornerstone for structuring and organizing training data to automate the Fine-tuning on Demand (FToD) process. The results we obtained show that the FToD process can be done in just a few minutes using a sample dataset and BERT. The FToD process identified names of people, places, or dates written in pre-modern Arabic that could not be recognised by the pre-trained model.",
keywords = "BERT, ChatHA, Fine-tuning on demand, manifest file, pre-modern Arabic",
author = "Thomas Asselborn and Sylvia Melzer and Said Aljoumani and Magnus Bender and Florian Marwitz and Konrad Hirschler and Ralf M{\"o}ller",
year = "2023",
month = dec,
day = "7",
language = "English",
volume = "3580",
series = "CEUR Workshop Proceedings",
pages = "38--51",
booktitle = "CEUR Workshop Proceedings",
publisher = "CEUR-WS.org",
note = "3rd Workshop on Humanities-Centred Artificial Intelligence: 46th German Conference on Artificial Intelligence ; Conference date: 26-09-2023 Through 26-09-2023",
}