@article{Rocha_Duarte_Welter_2022, title={What Happened in 2020: a Topic Modeling Approach based on a Topic Similarity Metric}, volume={15}, url={https://sol.sbc.org.br/journals/index.php/isys/article/view/2307}, DOI={10.5753/isys.2022.2307}, abstractNote={<p>2020 was atypical mainly due to the Covid-19 pandemic’s beginning which has become a vastly discussed subject worldwide. Unsurprisingly, online news websites have followed this trend, besides publishing traditional subjects (e.g., sports, business, and politics). Understanding how the subjects interact with each other over the year is a challenge. In this paper, we intend to build a 2020 timeline based on the subjects and their similarity using a topic modeling approach (LDA) and a novel topic similarity metric. To accomplish that, we scrap news articles websites to build a collection of 2020 news. After that, the collection is pre-processed and sliced monthly.  We use an LDA approach to discover the latent topics from all temporal collections.  Next, we calculate the similarity between the topics across 2020 using five semantic correlations: born, death, keep, merge, and split.  The discovered topics and the drift semantic between them show that building a meaningful 2020 time line is possible.</p>}, number={1}, journal={iSys - Brazilian Journal of Information Systems}, author={Rocha, Leonardo H. and Duarte, Denio and Welter, Daniel}, year={2022}, month={Oct.}, pages={19:1 – 19:17} }