@article{Buschjaeger/Morik/2023,author={Buschj{\"{a}}ger, Sebastian and Morik, Katharina},title={Joint leaf-refinement and ensemble pruning through L\({}_{\mbox{1}}\) regularization},number={3},pages={1230--1261},volume={37},bibsource={dblp computer science bibliography, https://dblp.org},biburl={https://dblp.org/rec/journals/datamine/BuschjagerM23.bib},year={2023},doi={10.1007/s10618-023-00921-z},journal={Data Min. Knowl. Discov.},timestamp={Sat, 13 May 2023 01:07:00 +0200},url={https://doi.org/10.1007/s10618-023-00921-z},}
@article{Koschel/etal/2023,author={Koschel, Simon and Buschj{\"{a}}ger, Sebastian and Lucchese, Claudio and Morik, Katharina},title={Fast Inference of Tree Ensembles on {ARM} Devices},volume={abs/2305.08579},bibsource={dblp computer science bibliography, https://dblp.org},biburl={https://dblp.org/rec/journals/corr/abs-2305-08579.bib},year={2023},doi={10.48550/arXiv.2305.08579},eprint={2305.08579},eprinttype={arXiv},journal={CoRR},timestamp={Wed, 17 May 2023 15:47:36 +0200},url={https://doi.org/10.48550/arXiv.2305.08579},}
2022
PhD Thesis
Ensemble learning with discrete classifiers on small devices
@phdthesis{Buschjaeger/2022,author={Buschj{\"{a}}ger, Sebastian},title={Ensemble learning with discrete classifiers on small devices},url={http://hdl.handle.net/2003/41132},bibsource={dblp computer science bibliography, https://dblp.org},biburl={https://dblp.org/rec/phd/dnb/Buschjager22.bib},school={Technical University of Dortmund, Germany},timestamp={Fri, 20 Jan 2023 21:27:23 +0100},urn={urn:nbn:de:101:1-2022111802423335923878},year={2022},}
@inproceedings{Buschjaeger/etal/2022,author={Buschjäger, Sebastian and Hess, Sibylle and Morik, Katharina},booktitle={Proceedings of the Thirty-Sixth {AAAI} Conference on Artificial Intelligence (AAAI-22)},title={Shrub Ensembles for Online Classification},year={2022},publisher={{AAAI} Press},}
@article{Yayla/etal/2022,author={Yayla, Mikail and Thomann, Simon and Buschj{\"{a}}ger, Sebastian and Morik, Katharina and Chen, Jian{-}Jia and Amrouch, Hussam},journal={{IEEE} Trans. Circuits Syst. {I} Regul. Pap.},title={Reliable Binarized Neural Networks on Unreliable Beyond Von-Neumann Architecture},year={2022},number={6},pages={2516--2528},volume={69},bibsource={dblp computer science bibliography, https://dblp.org},biburl={https://dblp.org/rec/journals/tcasI/YaylaTBMCA22.bib},doi={10.1109/TCSI.2022.3156165},timestamp={Mon, 28 Aug 2023 21:37:53 +0200},url={https://doi.org/10.1109/TCSI.2022.3156165}}
Book Chapter
Summary Extraction from Streams
Sebastian Buschjäger, and Katharina Morik
In Machine Learning under Resource Constraints - Volume 1: Fundamentals, 2022
@incollection{Rhode/etal/2022,author={Rhode, Wolfgang and Ruhe, Tim and Linhoff, Maximilian and Bu{\ss}, Jens and Nickel, Lukas and Buschj{\"{a}}ger, Sebastian},booktitle={Machine Learning under Resource Constraints - Volume 2: Discovery in Physics},publisher={De Gruyter},title={Monitoring and Feature Extraction},year={2022},editor={Morik, Katharina and Rhode, Wolfgang},pages={163--191},series={De Gruyter {STEM}},bibsource={dblp computer science bibliography, https://dblp.org},biburl={https://dblp.org/rec/books/degruyter/22/RhodeRLBNB22.bib},doi={10.1515/9783110785968-007},timestamp={Fri, 28 Jul 2023 11:18:35 +0200},url={https://doi.org/10.1515/9783110785968-007}}
Book Chapter
Deep Learning Applications
Wolfgang Rhode, Mirco Hünnefeld, Bernhard Spaan, and 3 more authors
In Machine Learning under Resource Constraints - Volume 2: Discovery in Physics, 2022
@incollection{Rhode/etal/2022a,author={Rhode, Wolfgang and H{\"{u}}nnefeld, Mirco and Spaan, Bernhard and Jevtic, Vukan and Pfahler, Lukas and Buschj{\"{a}}ger, Sebastian},booktitle={Machine Learning under Resource Constraints - Volume 2: Discovery in Physics},publisher={De Gruyter},title={Deep Learning Applications},year={2022},editor={Morik, Katharina and Rhode, Wolfgang},pages={245--278},series={De Gruyter {STEM}},bibsource={dblp computer science bibliography, https://dblp.org},biburl={https://dblp.org/rec/books/degruyter/22/RhodeHSJPB22.bib},doi={10.1515/9783110785968-009},timestamp={Fri, 28 Jul 2023 11:18:35 +0200},url={https://doi.org/10.1515/9783110785968-009}}
Book Chapter
Machine Learning Based on Emerging Memories
Mikail Yayla, Sebastian Buschjäger, and Hussam Amrouch
In Machine Learning under Resource Constraints - Volume 1: Fundamentals, 2022
For timing-sensitive edge applications, the demand for efficient lightweight machine learning solutions has increased recently. Tree ensembles are among the state-of-the-art in many machine learning applications. While single decision trees are comparably small, an ensemble of trees can have a significant memory footprint leading to cache locality issues, which are crucial to performance in terms of execution time. In this work, we analyze memory-locality issues of the two most common realizations of decision trees, i.e. native and if-else trees. We highlight, that both realizations demand a more careful memory layout to improve caching behavior and maximize performance. We adopt a probabilistic model of decision tree inference to find the best memory layout for each tree at the application layer. Further, we present an efficient heuristic to take architecture-dependent information into account thereby optimizing the given ensemble for a target computer architecture. Our code-generation framework, which is freely available on an open-source repository, produces optimized code sessions while preserving the structure and accuracy of the trees. With several real-world data sets, we evaluate the elapsed time of various tree realizations on server hardware as well as embedded systems for Intel and ARM processors. Our optimized memory layout achieves a reduction in execution time up to 75 % execution for server-class systems, and up to 70 % for embedded systems, respectively.
@article{Chen/etal/2021,author={Chen, Kuan-Hsun and Su, ChiaHui and Hakert, Christian and Buschj\"{a}ger, Sebastian and Lee, Chao-Lin and Lee, Jenq-Kuen and Morik, Katharina and Chen, Jian-Jia},title={Efficient Realization of Decision Trees for Real-Time Inference},doi={10.1145/3508019},issn={1539-9087},url={https://doi.org/10.1145/3508019},address={New York, NY, USA},journal={{ACM} Transactions on Embedded Computing Systems},keywords={Random Forest, Optimized Memory Layout, Real-Time Inference, Cache-Aware Optimization, Architecture-Aware Realization},month=dec,publisher={Association for Computing Machinery},year={2021},}
@misc{Buschjaeger/morik/2021c,title={There is no Double-Descent in Random Forests},author={Buschjäger, Sebastian and Morik, Katharina},year={2021},eprint={2111.04409},archiveprefix={arXiv},primaryclass={cs.LG},}
@misc{Buschjaeger/Morik/2021b,title={Improving the Accuracy-Memory Trade-Off of Random Forests Via Leaf-Refinement},author={Buschjäger, Sebastian and Morik, Katharina},year={2021},eprint={2110.10075},archiveprefix={arXiv},primaryclass={cs.LG},}
ECML
Very Fast Streaming Submodular Function Maximization
Sebastian Buschjäger, Philipp-Jan Honysz, Lukas Pfahler, and 1 more author
In Joint European Conference on Machine Learning and Knowledge Discovery in Databases, Dec 2021
@inproceedings{Buschjaeger/etal/2021a,author={Buschj\"{a}ger, Sebastian and Honysz, Philipp-Jan and Pfahler, Lukas and Morik, Katharina},title={Very Fast Streaming Submodular Function Maximization},booktitle={Joint European Conference on Machine Learning and Knowledge Discovery in Databases},publisher={Springer},year={2021},}
@misc{Honysz/etal/2021b,title={Providing Meaningful Data Summarizations Using Exemplar-based Clustering in Industry 4.0},author={Honysz, Philipp-Jan and Schulze-Struchtrup, Alexander and Buschjäger, Sebastian and Morik, Katharina},year={2021},eprint={2105.12026},}
The optimization of submodular functions constitutes a viable way to perform clustering. Strong approximation guarantees and feasible optimization w.r.t. streaming data make this clustering approach favorable. Technically, submodular functions map subsets of data to real values, which indicate how "representative" a specific subset is. Optimal sets might then be used to partition the data space and to infer clusters. Exemplar-based clustering is one of the possible submodular functions, but suffers from high computational complexity. However, for practical applications, the particular real-time or wall-clock run-time is decisive. In this work, we present a novel way to evaluate this particular function on GPUs, which keeps the necessities of optimizers in mind and reduces wall-clock run-time. To discuss our GPU algorithm, we investigated both the impact of different run-time critical problem properties, like data dimensionality and the number of data points in a subset, and the influence of required floating-point precision. In reproducible experiments, our GPU algorithm was able to achieve competitive speedups of up to 72x depending on whether multi-threaded computation on CPUs was used for comparison and the type of floating-point precision required. Half-precision GPU computation led to large speedups of up to 452x compared to single-precision, single-thread CPU computations.
@misc{Honysz/etal/2021a,title={GPU-Accelerated Optimizer-Aware Evaluation of Submodular Exemplar Clustering},author={Honysz, Philipp-Jan and Buschjäger, Sebastian and Morik, Katharina},year={2021},}
@misc{Buschjaeger/etal/2021c,title={Bit Error Tolerance Metrics for Binarized Neural Networks},author={Buschjäger, Sebastian and Chen, Jian-Jia and Chen, Kuan-Hsun and Günzel, Mario and Morik, Katharina and Novkin, Rodion and Pfahler, Lukas and Yayla, Mikail},year={2021},}
TC
FeFET-based Binarized Neural Networks Under Temperature-dependent Bit Errors
Mikail Yayla, Sebastian Buschjager, Aniket Gupta, and 5 more authors
@article{Yayla/etal/2021,author={Yayla, Mikail and Buschjager, Sebastian and Gupta, Aniket and Chen, Jian-Jia and Henkel, Jorg and Morik, Katharina and Chen, Kuan-Hsun and Amrouch, Hussam},journal={IEEE Transactions on Computers},title={FeFET-based Binarized Neural Networks Under Temperature-dependent Bit Errors},year={2021},pages={1-1},doi={10.1109/TC.2021.3104736},url={https://ieeexplore.ieee.org/document/9513530}}
@inproceedings{Buschjaeger/etal/2021b,author={Buschj\"{a}ger, Sebastian and Chen, Jian-Jia and Chen, Kuan-Hsun and G{\"u}nzel, Mario and Hakert, Christian and Morik, Katharina and Novkin, Rodion and Pfahler, Lukas and Yayla, Mikail},title={Margin-Maximization in Binarized Neural Networks for Optimizing Bit Error Tolerance},booktitle={Proceedings of DATE 2021},year={2021},}
@article{Buschjager/Honysz/2020c,author={Buschj{\"{a}}ger, Sebastian and Honysz, Philipp-jan and Morik, Katharina},doi={10.1007/s41060-020-00238-w},isbn={4106002000},issn={2364-4168},journal={International Journal of Data Science and Analytics},keywords={Density estimation,Ensemble,Isolation forest,Outlier detection,Tree,density estimation,ensemble,isolation forest,outlier detection,tree},publisher={Springer International Publishing},title={{Randomized outlier detection with trees}},url={https://doi.org/10.1007/s41060-020-00238-w},year={2020},video={https://www.youtube.com/watch?v=MOH6n8wiF7E},}
DSAA
Generalized Isolation Forest: Some Theory and More Applications – Extended Abstract
Sebastian Buschjäger, Philipp-Jan Honysz, and Katharina Morik
In Proceedings 2020 IEEE 7th International Conference on Data Science and Advanced Analytics (DSAA 2020), Dec 2020
Isolation Forest is a popular outlier detection algorithm that isolates outlier observations from regular observations by building multiple random decision trees. Multiple extensions enhance the original Isolation Forest algorithm including the Extended Isolation Forest which allows for non-rectangular splits and the SCiForest which improves the fitting of individual trees. All these approaches rate the outlierness of an observation by its average path-length. However, we find a lack of theoretical explanation on why these isolation-based algorithms offer such good practical performance. In this paper, we present a theoretical framework that describes the effectiveness of isolation-based approaches from a distributional viewpoint. We show that these algorithms fit a mixture of distributions, where the average path length of an observation can be viewed as a (somewhat crude) approximation of the mixture coefficient. Using this framework, we derive the Generalized Isolation Forest (GIF) which also trains random trees, but combining them moves beyond using the average path-length. In an extensive evaluation of over \350,000 experiments, we show that GIF outperforms the other methods on a variety of datasets while having comparable runtime.
@inproceedings{Buschjaeger/Honysz/2020a,author={Buschj\"{a}ger, Sebastian and Honysz, Philipp-Jan and Morik, Katharina},title={Generalized Isolation Forest: Some Theory and More Applications -- Extended Abstract},booktitle={Proceedings 2020 IEEE 7th International Conference on Data Science and Advanced Analytics (DSAA 2020)},organization={IEEE},year={2020},video={https://www.youtube.com/watch?v=MOH6n8wiF7E},}
Data summarization has become a valuable tool in understanding even terabytes of data. Due to their compelling theoretical properties, submodular functions have been in the focus of summarization algorithms. These algorithms offer worst-case approximations guarantees to the expense of higher computation and memory requirements. However, many practical applications do not fall under this worst-case, but are usually much more well-behaved. In this paper, we propose a new submodular function maximization algorithm called ThreeSieves, which ignores the worst-case, but delivers a good solution in high probability. It selects the most informative items from a data-stream on the fly and maintains a provable performance on a fixed memory budget. In an extensive evaluation of more than 7000 experiments, we show that our algorithm outperforms current state-of-the-art algorithms and, at the same time, uses fewer resources. Last, we highlight a real-world use-case of our algorithm for data summarization in gamma-ray astronomy.
@article{Buschjaeger/Honysz/2020b,author={Buschj\"{a}ger, Sebastian and Honysz, Philipp-Jan and Morik, Katharina},title={Very Fast Streaming Submodular Function Maximization},year={2020},}
Ensemble algorithms offer state of the art performance in many machine learning applications. A common explanation for their excellent performance is due to the bias-variance decomposition of the mean squared error which shows that the algorithm’s error can be decomposed into its bias and variance. Both quantities are often opposed to each other and ensembles offer an effective way to manage them as they reduce the variance through a diverse set of base learners while keeping the bias low at the same time. Even though there have been numerous works on decomposing other loss functions, the exact mathematical connection is rarely exploited explicitly for ensembling, but merely used as a guiding principle. In this paper, we formulate a generalized bias-variance decomposition for arbitrary twice differentiable loss functions and study it in the context of Deep Learning. We use this decomposition to derive a Generalized Negative Correlation Learning (GNCL) algorithm which offers explicit control over the ensemble’s diversity and smoothly interpolates between the two extremes of independent training and the joint training of the ensemble. We show how GNCL encapsulates many previous works and discuss under which circumstances training of an ensemble of Neural Networks might fail and what ensembling method should be favored depending on the choice of the individual networks.
@misc{Buschjaeger/etal/2020c,author={Buschj\"{a}ger, Sebastian and Pfahler, Lukas and Morik, Katharina},title={Generalized Negative Correlation Learning for Deep Ensembling},year={2020},}
Towards Explainable Bit Error Tolerance of Resistive RAM-Based Binarized Neural Networks
Sebastian Buschjäger, Jian-Jia Chen, Kuan-Hsun Chen, and 6 more authors
@misc{Buschjaeger/etal/2020b,author={Buschj\"{a}ger, Sebastian and Chen, Jian-Jia and Chen, Kuan-Hsun and G{\"u}nzel, Mario and Hakert, Christian and Morik, Katharina and Novkin, Rodion and Pfahler, Lukas and Yayla, Mikail},title={Towards Explainable Bit Error Tolerance of Resistive RAM-Based Binarized Neural Networks},year={2020},}
ECML
On-Site Gamma-Hadron Separation with Deep Learning on FPGAs
Sebastian Buschjäger, Lukas Pfahler, Jens Buss, and 2 more authors
In Joint European Conference on Machine Learning and Knowledge Discovery in Databases, Dec 2020
@inproceedings{Buschjaeger/etal/2020a,author={Buschj\"{a}ger, Sebastian and Pfahler, Lukas and Buss, Jens and Morik, Katharina and Rhode, Wolfgang},title={On-Site Gamma-Hadron Separation with Deep Learning on FPGAs},booktitle={Joint European Conference on Machine Learning and Knowledge Discovery in Databases},publisher={Springer},year={2020},}
@inproceedings{Hakert/Yayla/2019a,author={Hakert, Christian and Yayla, Mikail and Chen, Kuan-Hsun and Br\"uggen, Georg von der and Chen, Jian-Jia and Buschj\"ager, Sebastian and Morik, Katharina and Genssler, Paul R. and Bauer, Lars and Amrouch, Hussam and Henkel, J\"org},title={Stack Usage Analysis for Efficient Wear Leveling in Non-Volatile Main Memory Systems},booktitle={1st ACM/IEEE Workshop on Machine Learning for CAD (MLCAD)},year={2019},}
Traffic congestion is one of the most pressing issues for smart cities. Information on traffic flow can be used to reduce congestion by predicting vehicle counts at unmonitored locations so that counter-measures can be applied before congestion appears. To do so pricy sensors must be distributed sparsely in the city and at important roads in the city center to collect road and vehicle information throughout the city in real-time. Then, Machine Learning models can be applied to predict vehicle counts at unmonitored locations. To be fault-tolerant and increase coverage of the traffic predictions to the suburbs, rural regions, or even neighboring villages, these Machine Learning models should not operate at a central traffic control room but rather be distributed across the city. Gaussian Processes (GP) work well in the context of traffic count prediction, but cannot capitalize on the vast amount of data available in an entire city. Furthermore, Gaussian Processes are a global and centra lized model, which requires all measurements to be available at a central computation node. Product of Expert (PoE) models have been proposed as a scalable alternative to Gaussian Processes. A PoE model trains multiple, independent GPs on different subsets of the data and weight individual predictions based on each experts uncertainty. These methods work well, but they assume that experts are independent even though they may share data points. Furthermore, PoE models require exhaustive communication bandwidth between the individual experts to form the final prediction. In this paper we propose a hierarchical Product of Expert model, which consist of multiple layers of small, independent and local GP experts. We view Gaussian Process induction as regularized optimization procedure and utilize this view to derive an efficient algorithm which selects independent regions of the data. Then, we train local expert models on these regions, so that each expert is responsible for a given region. The resulting algorithm scales well for large amounts of data and outperforms flat PoE models in terms of communication cost, model size and predictive performance. Last, we discuss how to deploy these local expert models onto small devices
@inproceedings{Buschjaeger/etal/2019a,author={Buschj\"{a}ger, Sebastian and Liebig, Thomas and Morik, Katharina},title={Gaussian Model Trees for Traffic Imputation},booktitle={Proceedings of the International Conference on Pattern Recognition Applications and Methods (ICPRAM)},organization={SciTePress},pages={243 - 254},year={2019},url={https://www.scitepress.org/PublicationsDetail.aspx?ID=g+tVIY+KNts=\&t=1},}
The optimization of learning has always been of particular concern for big data analytics. However, the ongoing integration of machine learning models into everyday life also demand the evaluation to be extremely fast and in real-time. Moreover, in the Internet of Things, the computing facilities that run the learned model are restricted. Hence, the implementation of the model application must take the characteristics of the executing platform into account Although there exist some heuristics that optimize the code, principled approaches for fast execution of learned models are rare. In this paper, we introduce a method that optimizes the execution of Decision Trees (DT). Decision Trees form the basis of many ensemble methods, such as Random Forests (RF) or Extremely Randomized Trees (ET). For these methods to work best, trees should be as large as possible. This challenges the data and the instruction cache of modern CPUs and thus demand a more careful memory layout. Based on a probabilistic view of decision tree execution, we optimize the two most common implementation schemes of decision trees. We discuss the advantages and disadvantages of both implementations and present a theoretically well-founded memory layout which maximizes locality during execution in both cases. The method is applied to three computer architectures, namely ARM (RISC), PPC (Extended RISC) and Intel (CISC) and is automatically adopted to the specific architecture by a code generator. We perform over 1800 experiments on several real-world data sets and report an average speed-up of 2 to 4 across all three architectures by using the proposed memory layout. Moreover, we find that our implementation outperforms sklearn, which was used to train the models by a factor of 1500.
@inproceedings{Buschjaeger/2018a,author={Buschjaeger, Sebastian and Chen, Kuan-Hsun and Chen, Jian-Jia and Morik, Katharina},title={Realization of Random Forest for Real-Time Evaluation through Tree Framing},booktitle={The IEEE International Conference on Data Mining series (ICDM)},month=nov,year={2018},url={https://ieeexplore.ieee.org/document/8594826},}
@article{Buschjaeger/Morik/2017b,author={Buschj\"{a}ger, Sebastian and Morik, Katharina},title={Decision Tree and Random Forest Implementations for Fast Filtering of Sensor Data},journal={IEEE Transactions on Circuits and Systems I: Regular Papers},month=jan,number={1},pages={209--222},volume={65-I},year={2018},url={https://doi.org/10.1109/TCSI.2017.2710627},}
2017
KI
Big Data Science
Katharina Morik, Christian Bockermann, and Sebastian Buschjäger
In ever more disciplines, science is driven by data, which leads to data analytics becoming a primary skill for researchers. This includes the complete process from data acquisition at sensors, over pre-processing and feature extraction to the use and application of machine learning. Sensors here often produce a plethora of data that needs to be dealt with in near-realtime, which requires a combined effort of implementations at the hardware level to high-level design of data flows. In this paper we outline two use-cases of this wide span of data analysis for science in a real-world example in astroparticle physics. We outline a high-level design approach which is capable of defining the complete data flow from sensor hardware to final analysis.
@article{Morik/etal/2017a,author={Morik, Katharina and Bockermann, Christian and Buschj\"{a}ger, Sebastian},title={Big Data Science},booktitle={German journal on Artificial Intelligence (KI 2017)},month=dec,number={1},pages={27--36},volume={32},year={2017},url={https://doi.org/10.1007/s13218-017-0522-8},}
IoTStreaming
Summary Extraction on Data Streams in Embedded Systems
Sebastian Buschjäger, Katharina Morik, and Maik Schmidt
In Proceedings of the ECML Workshop on IoT Large Scale Learning From Data Streams, Dec 2017
@inproceedings{Buschjaeger/Morik/2017a,author={Buschj\"{a}ger, Sebastian and Morik, Katharina and Schmidt, Maik},title={Summary Extraction on Data Streams in Embedded Systems},booktitle={Proceedings of the ECML Workshop on IoT Large Scale Learning From Data Streams},publisher={ceur-ws.org},year={2017},}
@mastersthesis{Buschjaeger/2016a,author={Buschj\"{a}ger, Sebastian},title={Online Gau{\ss}-Prozesse zur Regression auf FPGAs},school={TU Dortmund},year={2016},}
2015
Discovering Subtle Word Relation in Large German Corpora
Sebastian Buschjäger, Lukas Pfahler, and Katharina Morik
In Proceedings of the 3rd Workshop on the Challenges in the Management of Large Corpora, Dec 2015
@inproceedings{Buschjaeger/etal/2015a,author={Buschj\"{a}ger, Sebastian and Pfahler, Lukas and Morik, Katharina},title={Discovering Subtle Word Relation in Large German Corpora},booktitle={Proceedings of the 3rd Workshop on the Challenges in the Management of Large Corpora},year={2015},}
Untersuchungen zur Analyse von deutschsprachigen Textdaten
Katharina Morik, Alexander Jung, Jan Weckwerth, and 4 more authors
@techreport{Morik/etal/2015a,author={Morik, Katharina and Jung, Alexander and Weckwerth, Jan and R{\"o}tner, Stefan and Hess, Sibylle and Buschj\"{a}ger, Sebastian and Pfahler, Lukas},title={Untersuchungen zur Analyse von deutschsprachigen Textdaten},institution={Technische Universit\"{a}t Dortmund},month=dec,number={2},year={2015},}